# CFB Machine Learning with Scikit-learn

## Imports 

In [1]:
import numpy as np
import pandas as pd
import csv

## Data

In [2]:
data = pd.read_csv('power5.csv', sep=',',header=0)
data = data.drop(columns="Unnamed: 0")

#for now, remove polls because this nn is not built to handle NaN values
data = data.drop(columns = ["H_Poll_curr", "H_Poll_pre", "H_Poll_high","A_Poll_curr", "A_Poll_pre", "A_Poll_high", "Home_score", "Away_score"])

#remove and simplify unecessary data
data = data.drop(columns = ['H_Overall_Pct', 'H_Conf_Pct', 'H_SOS', 'H_Offensive_Pass_Pct', 'H_Offensive_RusH_Yards_Average', 
                            'H_Offensive_Total_Average', 'H_Offensive_First_Downs_Total', 'H_Offensive_Turnovers_Total',
                           'H_Defensive_Pass_Pct', 'H_Defensive_RusH_Yards_Average', 'H_Defensive_Total_Average',
                           'H_Defensive_First_Downs_Total', 'H_Defensive_Turnovers_Total', 
                            'H_Offensive_Total_Plays', 'H_Offensive_Total_Yards',
                           'H_Defensive_Total_Plays', 'H_Defensive_Total_Yards',
                           'A_Overall_Pct', 'A_Conf_Pct', 'A_SOS', 'A_Offensive_Pass_Pct', 'A_Offensive_RusA_Yards_Average', 
                            'A_Offensive_Total_Average', 'A_Offensive_First_Downs_Total', 'A_Offensive_Turnovers_Total',
                           'A_Defensive_Pass_Pct', 'A_Defensive_RusA_Yards_Average', 'A_Defensive_Total_Average',
                           'A_Defensive_First_Downs_Total', 'A_Defensive_Turnovers_Total', 
                            'A_Offensive_Total_Plays', 'A_Offensive_Total_Yards',
                           'A_Defensive_Total_Plays', 'A_Defensive_Total_Yards'])

cols = list(data.columns.values) #Make a list of all of the columns in the df
cols.pop(cols.index('Winner')) #Remove b from list
data = data[cols+['Winner']] #Create new dataframe with columns in the order you want




# MLP Classifier

## Assign data

In [3]:
#assign data for columns 2:109 to X 
X = data.iloc[:, 2:76]

# assign y as winner
y = data.iloc[:,76]

## Training and testing set

In [4]:
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)   

In [5]:
from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()  
scaler.fit(X_train)

X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test)  

  return self.partial_fit(X, y)
  """
  


## Build classifier

In [6]:
from sklearn.neural_network import MLPClassifier  
mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10, 10, 10, 10), max_iter=1000)  
mlp.fit(X_train, y_train.values.ravel())  

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(10, 10, 10, 10, 10, 10),
       learning_rate='constant', learning_rate_init=0.001, max_iter=1000,
       momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
       power_t=0.5, random_state=None, shuffle=True, solver='adam',
       tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

## Test

In [7]:
predictions = mlp.predict(X_test)  

In [8]:
from sklearn.metrics import classification_report, confusion_matrix  
print(confusion_matrix(y_test,predictions))  
print(classification_report(y_test,predictions))  

[[28  6]
 [ 6 15]]
              precision    recall  f1-score   support

           0       0.82      0.82      0.82        34
           1       0.71      0.71      0.71        21

   micro avg       0.78      0.78      0.78        55
   macro avg       0.77      0.77      0.77        55
weighted avg       0.78      0.78      0.78        55



## Test Conference Championships

In [9]:
conf = pd.read_csv('conf_champs.csv', sep=',',header=0)
conf = conf.drop(columns="Unnamed: 0")

#for now, remove polls because this nn is not built to handle NaN values
conf = conf.drop(columns = ["H_Poll_curr", "H_Poll_pre", "H_Poll_high","A_Poll_curr", "A_Poll_pre", "A_Poll_high"])

#remove and simplify unecessary data
conf = conf.drop(columns = ['H_Overall_Pct', 'H_Conf_Pct', 'H_SOS', 'H_Offensive_Pass_Pct', 'H_Offensive_RusH_Yards_Average', 
                            'H_Offensive_Total_Average', 'H_Offensive_First_Downs_Total', 'H_Offensive_Turnovers_Total',
                           'H_Defensive_Pass_Pct', 'H_Defensive_RusH_Yards_Average', 'H_Defensive_Total_Average',
                           'H_Defensive_First_Downs_Total', 'H_Defensive_Turnovers_Total', 
                            'H_Offensive_Total_Plays', 'H_Offensive_Total_Yards',
                           'H_Defensive_Total_Plays', 'H_Defensive_Total_Yards',
                           'A_Overall_Pct', 'A_Conf_Pct', 'A_SOS', 'A_Offensive_Pass_Pct', 'A_Offensive_RusA_Yards_Average', 
                            'A_Offensive_Total_Average', 'A_Offensive_First_Downs_Total', 'A_Offensive_Turnovers_Total',
                           'A_Defensive_Pass_Pct', 'A_Defensive_RusA_Yards_Average', 'A_Defensive_Total_Average',
                           'A_Defensive_First_Downs_Total', 'A_Defensive_Turnovers_Total', 
                            'A_Offensive_Total_Plays', 'A_Offensive_Total_Yards',
                           'A_Defensive_Total_Plays', 'A_Defensive_Total_Yards'])


conf_cols = list(conf.columns.values) #Make a list of all of the columns in the df
#conf_cols.pop(conf_cols.index('Winner')) #Remove b from list
#conf = conf[conf_cols+['Winner']] #Create new dataframe with columns in the order you want

X_conf = conf.iloc[:, 2:76]
X_c =  X_conf.values
X_c = scaler.transform(X_c) 

In [10]:
conf_predictions = mlp.predict(X_c)
conf_predictions

array([0, 0, 1, 0, 1, 0, 1, 1, 0])

In [11]:
real_conf_champs = np.array([0,0,1,0,1])

In [12]:
print(confusion_matrix(real_conf_champs,conf_predictions))  

ValueError: Found input variables with inconsistent numbers of samples: [5, 9]

## Predict Bowl Games

In [None]:
bowl = pd.read_csv('bowl_champs.csv', sep=',',header=0)
bowl = bowl.drop(columns="Unnamed: 0")

#for now, remove polls because this nn is not built to handle NaN values
bowl = bowl.drop(columns = ["H_Poll_curr", "H_Poll_pre", "H_Poll_high","A_Poll_curr", "A_Poll_pre", "A_Poll_high"])

#remove and simplify unecessary data
bowl = bowl.drop(columns = ['H_Overall_Pct', 'H_Conf_Pct', 'H_SOS', 'H_Offensive_Pass_Pct', 'H_Offensive_RusH_Yards_Average', 
                            'H_Offensive_Total_Average', 'H_Offensive_First_Downs_Total', 'H_Offensive_Turnovers_Total',
                           'H_Defensive_Pass_Pct', 'H_Defensive_RusH_Yards_Average', 'H_Defensive_Total_Average',
                           'H_Defensive_First_Downs_Total', 'H_Defensive_Turnovers_Total', 
                            'H_Offensive_Total_Plays', 'H_Offensive_Total_Yards',
                           'H_Defensive_Total_Plays', 'H_Defensive_Total_Yards',
                           'A_Overall_Pct', 'A_Conf_Pct', 'A_SOS', 'A_Offensive_Pass_Pct', 'A_Offensive_RusA_Yards_Average', 
                            'A_Offensive_Total_Average', 'A_Offensive_First_Downs_Total', 'A_Offensive_Turnovers_Total',
                           'A_Defensive_Pass_Pct', 'A_Defensive_RusA_Yards_Average', 'A_Defensive_Total_Average',
                           'A_Defensive_First_Downs_Total', 'A_Defensive_Turnovers_Total', 
                            'A_Offensive_Total_Plays', 'A_Offensive_Total_Yards',
                           'A_Defensive_Total_Plays', 'A_Defensive_Total_Yards'])


bowl_cols = list(bowl.columns.values) #Make a list of all of the columns in the df
#bowl_cols.pop(bowl_cols.index('Winner')) #Remove b from list
#bowl = conf[bowl_cols+['Winner']] #Create new dataframe with columns in the order you want

X_bowl = bowl.iloc[:, 2:76]
X_b =  X_bowl.values
X_b = scaler.transform(X_b) 

In [None]:
bowl_predictions = mlp.predict(X_b)
bowl_predictions