In [112]:
import warnings
warnings.simplefilter('ignore')
from sklearn.tree import DecisionTreeClassifier
# %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
from sklearn.externals import joblib



class PandasDummies(BaseEstimator, TransformerMixin):
    def transform(self, X, *_):
        return pd.get_dummies(X)
    
    def fit(self, *_):
        return self

In [113]:
teamData=pd.read_csv("./team/clean_team_stats.csv")
teamData.drop('Unnamed: 0', axis=1, inplace=True)
X = teamData[[ 'first_downs_by_penalty',
       'third_down_percentage', 'fourth_down_percentage',
        'average_kickoff_return_yards',
       'average_punt_return_yards', 'interceptions', 'net_average_punt_yards',
       'net_passing_yards', 'net_passing_yards_per_game',
       'passing_first_downs', 'passing_touchdowns', 'rushing_first_downs',
       'rushing_attempts', 'rushing_touchdowns', 'rushing_yards',
       , 'total_offensive_plays', 
       'yards_per_pass_attempt',
       'yards_per_rush_attempt', 'total_penalties', 'total_yards_penalized',
       'total_defensive_sacks', 'yards_lost_from_sacks', 'total_punts_kicked',
       'total_punt_yards', 'total_team_penalties']]

y= teamData['total_points'].values.reshape(-1, 1)


In [117]:
def linerregressionRun(X, y, times):
    
    MSEresults=[]
    R2results=[]
    rffit=[]
    rfscore=[]
    i=0
    rfFeatures=[]
    steps=[]
   
    while i<times:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=42)
        model = make_pipeline(PandasDummies(), StandardScaler(), LinearRegression())
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        MSE = mean_squared_error(y_test, predictions)
        r2 = model.score(X_test, y_test)
        print(f"MSE: {MSE}, R2: {r2}")
        MSEresults.append(MSE)
        R2results.append(r2)
        joblib.dump(model, F"{i}my_model.pkl")
        rf = RandomForestRegressor(n_estimators=200, max_leaf_nodes=5)
        rf = rf.fit(X_train, y_train)
        acc = rf.score(X_test, y_test)
        rfF=sorted(zip(rf.feature_importances_, X.columns), reverse=True)
        rfFeatures.append(rfF)
        steps.append(model.steps)
        rffit.append(rf)
        
        
        rfscore.append(acc)
        print(f"accuracy = {acc}")
        i=i+1
    results={"MSE":MSEresults, 
             "r2":R2results,
             "RandomForest":rffit,
             "RFScore":rfscore,
             "features":rfFeatures,
             "steps":steps
             }
    return pd.DataFrame(results)
    


In [118]:
TUNNING = linerregressionRun(X, y, 20)

MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7769670493955096
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.779029973549006
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7820744950716557
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7687489093482573
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7796632699420702
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7785120069743038
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7893498990155187
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7758668003009017
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7866864054510758
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7829215342357744
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7897037630657369
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7613890043825722
MSE: 376.95102575665754, R2: 0.9646660225195112
accuracy = 0.7788

In [119]:
TUNNING.columns

Index(['MSE', 'r2', 'RandomForest', 'RFScore', 'features', 'steps'], dtype='object')

In [120]:
TUNNING["features"][0]

[(0.29118823956183243, 'third_down_percentage'),
 (0.17182913042454054, 'yards_per_pass_attempt'),
 (0.14856909209951744, 'passing_touchdowns'),
 (0.11507859625315613, 'rushing_touchdowns'),
 (0.09038059665280274, 'total_offensive_plays'),
 (0.0758681022796706, 'net_passing_yards'),
 (0.05310080553952706, 'total_punts_kicked'),
 (0.009331673507695447, 'rushing_first_downs'),
 (0.00695250968201114, 'total_punt_yards'),
 (0.006534083088571214, 'rushing_yards'),
 (0.004774943750746577, 'average_punt_return_yards'),
 (0.004320320831464841, 'net_passing_yards_per_game'),
 (0.0037086340090284374, 'yards_per_rush_attempt'),
 (0.0030915289707397326, 'total_defensive_sacks'),
 (0.0026315281368179903, 'average_kickoff_return_yards'),
 (0.0020182110476301028, 'total_team_penalties'),
 (0.0020127653050985113, 'rushing_attempts'),
 (0.0019219778900766912, 'fourth_down_percentage'),
 (0.00171856886106225, 'total_yards_penalized'),
 (0.0012586464605442548, 'yards_lost_from_sacks'),
 (0.00081240453883

In [111]:
TUNNING["RandomForest"][0].base_estimator_


DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')

In [134]:
X2 = teamData[[ 'first_downs_by_penalty',
       'third_down_percentage', 'fourth_down_percentage',
       'average_interception_yards', 'average_kickoff_return_yards',
       'average_punt_return_yards', 'interceptions', 'net_average_punt_yards',
       'net_passing_yards', 'net_passing_yards_per_game',
       'passing_first_downs', 'passing_touchdowns', 'rushing_first_downs',
       'rushing_attempts', 'rushing_touchdowns', 'rushing_yards',
       'rushing_yards_per_game', 'total_offensive_plays', 
       'yards_per_pass_attempt',
       'yards_per_rush_attempt', 'total_penalties', 'total_yards_penalized',
       'total_defensive_sacks', 'yards_lost_from_sacks', 'total_punts_kicked',
       'total_punt_yards', 'total_team_penalties']]

y2= teamData['total_points'].values.reshape(-1, 1)
              

In [135]:

some_totally_random_model = joblib.load("0my_model.pkl")

In [137]:

X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.40, random_state=42)

In [161]:
results=some_totally_random_model.predict(X2.mean)

TypeError: 'DataFrame' objects are mutable, thus they cannot be hashed

In [159]:
results


array([[306.88831043]])

In [160]:
teamData['total_points'][60]

324