In [2]:
import warnings
warnings.simplefilter('ignore')
from sklearn.tree import DecisionTreeClassifier
# %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
from sklearn.externals import joblib



class PandasDummies(BaseEstimator, TransformerMixin):
    def transform(self, X, *_):
        return pd.get_dummies(X)
    
    def fit(self, *_):
        return self

In [3]:
teamData=pd.read_csv("../team/cleaned_team_stats.csv")
teamData.columns



Index(['team_id', 'variable', 'first_downs', 'first_downs_by_penalty',
       'third_down_percentage', 'fourth_down_percentage',
       'average_interception_yards', 'average_kickoff_return_yards',
       'average_punt_return_yards', 'interceptions', 'net_average_punt_yards',
       'net_passing_yards', 'net_passing_yards_per_game',
       'passing_first_downs', 'passing_touchdowns', 'rushing_first_downs',
       'rushing_attempts', 'rushing_touchdowns', 'rushing_yards',
       'rushing_yards_per_game', 'total_offensive_plays', 'total_points',
       'total_points_per_game', 'total_touchdowns', 'total_offensive_yards',
       'yards_per_game', 'yards_per_pass_attempt', 'yards_per_rush_attempt',
       'completed_passes', 'attempted_passes', 'field_goals_completed',
       'field_goals_attempted', 'total_fumbles', 'defensive_interception',
       'yards_after_interception', 'total_kickoffs_received',
       'yards_off_kickoff_received', 'total_punts_received',
       'yards_off_punts_re

In [26]:
X = teamData[['rushing_first_downs',
              'passing_first_downs',
  'passing_touchdowns',
  'rushing_touchdowns',
  'third_down_percentage',
  'total_offensive_plays',
   ]]

y= teamData['total_offensive_yards'].values.reshape(-1, 1)

In [15]:
def linerregressionRun(X, y, times):
    
    MSEresults=[]
    R2results=[]
    rffit=[]
    rfscore=[]
    i=0
    rfFeatures=[]
    steps=[]
   
    while i<times:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=42)
        model = make_pipeline(PandasDummies(), StandardScaler(), LinearRegression())
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        MSE = mean_squared_error(y_test, predictions)
        r2 = model.score(X_test, y_test)
        print(f"MSE: {MSE}, R2: {r2}")
        MSEresults.append(MSE)
        R2results.append(r2)
        joblib.dump(model, F"{i}my_model_yards.pkl")
        rf = RandomForestRegressor(n_estimators=200, max_leaf_nodes=5)
        rf = rf.fit(X_train, y_train)
        acc = rf.score(X_test, y_test)
        rfF=sorted(zip(rf.feature_importances_, X.columns), reverse=True)
        rfFeatures.append(rfF)
        steps.append(model.steps)
        rffit.append(rf)
        
        
        rfscore.append(acc)
        print(f"accuracy = {acc}")
        i=i+1
    results={"MSE":MSEresults, 
             "r2":R2results,
             "RandomForest":rffit,
             "RFScore":rfscore,
             "features":rfFeatures,
             "steps":steps
             }
    return pd.DataFrame(results)
    


In [27]:
TUNNING = linerregressionRun(X, y, 20)

MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7449314970074464
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7463108681679639
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7496987077639297
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7449052913954497
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7382335564626743
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7405188210381504
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7435928742373548
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7464820242788408
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7605810385569569
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7381116536183994
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7343511531438904
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.7480747434922441
MSE: 43788.736115904605, R2: 0.9490930548424777
accuracy = 0.756

In [21]:
TUNNING.columns

Index(['MSE', 'r2', 'RandomForest', 'RFScore', 'features', 'steps'], dtype='object')

In [28]:
TUNNING["features"][0]

[(0.40292706973322623, 'third_down_percentage'),
 (0.26886419819996366, 'total_offensive_plays'),
 (0.18050503380581878, 'passing_touchdowns'),
 (0.0712233230462737, 'passing_first_downs'),
 (0.04130186552869967, 'rushing_touchdowns'),
 (0.03517850968601788, 'rushing_first_downs')]

In [111]:
TUNNING["RandomForest"][0].base_estimator_


DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')

In [134]:
X2 = teamData[[ 'first_downs_by_penalty',
       'third_down_percentage', 'fourth_down_percentage',
       'average_interception_yards', 'average_kickoff_return_yards',
       'average_punt_return_yards', 'interceptions', 'net_average_punt_yards',
       'net_passing_yards', 'net_passing_yards_per_game',
       'passing_first_downs', 'passing_touchdowns', 'rushing_first_downs',
       'rushing_attempts', 'rushing_touchdowns', 'rushing_yards',
       'rushing_yards_per_game', 'total_offensive_plays', 
       'yards_per_pass_attempt',
       'yards_per_rush_attempt', 'total_penalties', 'total_yards_penalized',
       'total_defensive_sacks', 'yards_lost_from_sacks', 'total_punts_kicked',
       'total_punt_yards', 'total_team_penalties']]

y2= teamData['total_points'].values.reshape(-1, 1)
              

In [135]:

some_totally_random_model = joblib.load("0my_model.pkl")

In [137]:

X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.40, random_state=42)

In [161]:
results=some_totally_random_model.predict(X2.mean)

TypeError: 'DataFrame' objects are mutable, thus they cannot be hashed

In [159]:
results


array([[306.88831043]])

In [160]:
teamData['total_points'][60]

324