In [2]:
import pickle
import pandas as pd
import numpy as np

from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC

from sklearn.externals import joblib

In [6]:
def games_up_to_2018_season_filter(df):
    '''Filter for games up to 2018 season'''
    notourney2018 = (df['GameType'] != 'tourney2018')
    noseason2018 = (df['GameType'] != 'season2018')
    games_up_to_2018_season = df[notourney2018 & noseason2018]
    return games_up_to_2018_season

def season2018_filter(df):
    '''Filter for games up to 2018 season'''
    season2018cond = (df['GameType'] == 'season2018')
    season2018 = df[season2018cond]
    return season2018

def data_for_model(df):
    '''
    Inputs: Model DataFrame
    Outputs: Vectors for model
    '''

    games_up_to_2018_season = games_up_to_2018_season_filter(df)
    season2018 = season2018_filter(df)

    Xy_train = games_up_to_2018_season[['W', 'Wp', 'ppg', 'pApg', 'FGp',
        '3Pp', 'FTp', 'ORBpg', 'RBpg', 'ASTpg', 'STLpg', 'BLKpg', 'TOpg',
        'PFpg', 'sos', 'exp_factor', 'C0', 'C1', 'C2', 'F0', 'F1', 'F2',
        'G0', 'G1', 'G2', 'G3', 'OPWp', 'OPppg', 'OPpApg', 'OPFGp', 'OP3Pp',
        'OPFTp', 'OPORBpg', 'OPRBpg', 'OPASTpg', 'OPSTLpg', 'OPBLKpg',
        'OPTOpg', 'OPPFpg', 'OPsos', 'OPexp_factor', 'C0', 'C1', 'C2',
        'F0', 'F1', 'F2', 'G0', 'G1', 'G2', 'G3']]

    Xy_test = season2018[['W', 'Wp', 'ppg', 'pApg', 'FGp', '3Pp', 'FTp',
        'ORBpg', 'RBpg', 'ASTpg', 'STLpg', 'BLKpg', 'TOpg', 'PFpg', 'sos',
        'exp_factor', 'C0', 'C1', 'C2', 'F0', 'F1', 'F2', 'G0', 'G1', 'G2',
        'G3', 'OPWp', 'OPppg', 'OPpApg', 'OPFGp', 'OP3Pp', 'OPFTp',
        'OPORBpg', 'OPRBpg', 'OPASTpg', 'OPSTLpg', 'OPBLKpg', 'OPTOpg',
        'OPPFpg', 'OPsos', 'OPexp_factor', 'C0', 'C1', 'C2', 'F0', 'F1',
        'F2', 'G0', 'G1', 'G2', 'G3']]

    '''Set up features and targets'''
    X_train = Xy_train.iloc[:, 1:].as_matrix()
    y_train = Xy_train.iloc[:, 0].as_matrix()
    X_test = Xy_test.iloc[:, 1:].as_matrix()
    y_test = Xy_test.iloc[:, 0].as_matrix()

    '''Standardize data'''
    scale = StandardScaler()
    scale.fit(X_train)
    X_train = scale.transform(X_train)
    X_test = scale.transform(X_test)

    return X_train, y_train, X_test, y_test

In [8]:
#load data
'''Read in model data.'''
data_df = pd.read_pickle('../model_data/gamelog_5_exp_clust.pkl')
X_train, y_train, X_test, y_test = data_for_model(data_df)
data = (X_train, y_train, X_test, y_test)

In [13]:
# load the model from disk
filename = 'lr_finalized_model.sav'
loaded_model = joblib.load(filename)
result = loaded_model.score(X_test, y_test)
print(result) 

0.6660951564509215


In [15]:
pkl_filename = 'lr_best_params.pkl'

with open(pkl_filename, 'rb') as file:  
    pickle_model = pickle.load(file)

# Calculate the accuracy score and predict target values
score = pickle_model.score(X_test, y_test)  
print("Test score: {0:.2f} %".format(100 * score))  
Ypredict = pickle_model.predict(X_test)  

Test score: 66.61 %


In [18]:
pkl_filename = 'lr_best_params.pkl'

with open(pkl_filename, 'rb') as file:  
    pickle_params = pickle.load(file)

In [19]:
pickle_params

GridSearchCV(cv=5, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6, 0.7000000000000001, 0.8, 0.9]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=1)

In [48]:
filepath = 'lr_best_params.txt'

# with open(filepath, "r") as text_file:
text_file = open(filepath, 'r')
 
params = text_file.read()
print(params)

{'C': 0.1, 'penalty': 'l1'}



In [49]:
params = params.strip()
params = params.replace("{", "")
params = params.replace("}", "")
params = params.replace(" ", "")

In [50]:
params

"'C':0.1,'penalty':'l1'"

In [33]:
params_dict = dict(params)

ValueError: dictionary update sequence element #0 has length 1; 2 is required