In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score


import pandas as pd 
import numpy as np
import os


In [2]:
import warnings
warnings.filterwarnings('ignore') 
os.environ['PYTHONWARNINGS']='ignore'

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [3]:
dataframe = pd.read_csv("processed.csv")
features = dataframe.columns.to_list()
features.remove('Unnamed: 0')
features.remove('HOME_TEAM_WINS')

data = dataframe.values
X = dataframe[features]
y = dataframe.HOME_TEAM_WINS

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [6]:


model = LogisticRegression()

#Search Space 
space = dict()
space['solver'] = ['newton-cg', 'lbfgs', 'liblinear',]
space['penalty'] = ['none', 'l1', 'l2', 'elasticnet']
space['C'] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100, 500, 1000]
space['max_iter']: [100, 500, 1000, 2000]

#Grid Search 
grid_search = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1)
grid_result = grid_search.fit(X, y) #Replace x and y with input output variables later 

print('Best Score: %s' % grid_result.best_score_)
print('Best Hyperparameters: %s' % grid_result.best_params_)

#Random Search 
random_search = RandomizedSearchCV(model, space, scoring='accuracy', n_jobs=-1)
random_result = random_search.fit(X, y) #Replace x and y with input output variables later 

print('Best Score: %s' % random_result.best_score_)
print('Best Hyperparameters: %s' % random_result.best_params_)

Best Score: 0.5712040272589736
Best Hyperparameters: {'C': 1e-05, 'penalty': 'none', 'solver': 'lbfgs'}
Best Score: 0.5670589495387663
Best Hyperparameters: {'solver': 'liblinear', 'penalty': 'l2', 'C': 100}


In [5]:
param_grid = [
    {
        'solver': ['newton-cg', 'lbfgs', 'liblinear',],
        'penalty':['none', 'l1', 'l2', 'elasticnet'],
        'C': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100, 500, 1000],
        'max_iter': [100, 500, 1000, 2000]
    }
#     {
#         "preprocesser__num_transform__imputer__strategy": ["mean", "median"],
#         "classifier__n_estimators": [10, 100, 1000],
#         "classifier": [RandomForestClassifier()]
#     }
]

# model = LogisticRegression()
# grid_search = GridSearchCV(model, param_grid, cv=10, verbose=1,n_jobs=-1)
# grid_search.fit(X_train, y_train)

# model = RandomForestClassifier()
# grid_search = GridSearchCV(model, param_grid, cv=10, verbose=1,n_jobs=-1)
# grid_search.fit(X_train, y_train)

#Grid Search 
grid_search = GridSearchCV(model, param_grid, scoring='accuracy', n_jobs=-1)
grid_result = grid_search.fit(X_train, y_train) #Replace x and y with input output variables later 

print('Best Score: %s' % grid_result.best_score_)
print('Best Hyperparameters: %s' % grid_result.best_params_)

#Random Search 
random_search = RandomizedSearchCV(model, param_grid, scoring='accuracy', n_jobs=-1)
random_result = random_search.fit(X_train, y_train) #Replace x and y with input output variables later 

print('Best Score: %s' % random_result.best_score_)
print('Best Hyperparameters: %s' % random_result.best_params_)

KeyboardInterrupt: 

In [16]:
ct = make_column_transformer(
       (OneHotEncoder(handle_unknown='ignore', sparse=False), features),
       remainder = 'passthrough'
)

# lgr = LogisticRegression(C=1e-05, penalty='none', solver='lbfgs')
lgr = LogisticRegression()

p1 = make_pipeline(ct, lgr)

cross_val_score( p1, X_train , y_train, cv=2, scoring='accuracy').mean()

0.6010362694300517