# CFB Machine Learning with Scikit-learn

## Imports 

In [None]:
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt

## Data

In [None]:
data = pd.read_csv('power5.csv', sep=',',header=0)
data = data.drop(columns="Unnamed: 0")

#for now, remove polls because this nn is not built to handle NaN values
data = data.drop(columns = ["H_Poll_curr", "H_Poll_pre", "H_Poll_high","A_Poll_curr", "A_Poll_pre", "A_Poll_high", "Home_score", "Away_score"])

#remove and simplify unecessary data
data = data.drop(columns = ['H_Overall_Pct', 'H_Conf_Pct', 'H_SOS', 'H_Offensive_Pass_Pct', 'H_Offensive_RusH_Yards_Average', 
                            'H_Offensive_Total_Average', 'H_Offensive_First_Downs_Total', 'H_Offensive_Turnovers_Total',
                           'H_Defensive_Pass_Pct', 'H_Defensive_RusH_Yards_Average', 'H_Defensive_Total_Average',
                           'H_Defensive_First_Downs_Total', 'H_Defensive_Turnovers_Total', 
                            'H_Offensive_Total_Plays', 'H_Offensive_Total_Yards',
                           'H_Defensive_Total_Plays', 'H_Defensive_Total_Yards',
                           'A_Overall_Pct', 'A_Conf_Pct', 'A_SOS', 'A_Offensive_Pass_Pct', 'A_Offensive_RusA_Yards_Average', 
                            'A_Offensive_Total_Average', 'A_Offensive_First_Downs_Total', 'A_Offensive_Turnovers_Total',
                           'A_Defensive_Pass_Pct', 'A_Defensive_RusA_Yards_Average', 'A_Defensive_Total_Average',
                           'A_Defensive_First_Downs_Total', 'A_Defensive_Turnovers_Total', 
                            'A_Offensive_Total_Plays', 'A_Offensive_Total_Yards',
                           'A_Defensive_Total_Plays', 'A_Defensive_Total_Yards'])

cols = list(data.columns.values) #Make a list of all of the columns in the df
cols.pop(cols.index('Winner')) #Remove b from list
data = data[cols+['Winner']] #Create new dataframe with columns in the order you want



In [None]:
#.info()
#cols

In [None]:
len(data.columns)

# MLP Classifier

## Assign data

In [None]:
#assign data for columns 2:109 to X 
X = data.iloc[:, 2:76]

# assign y as winner
y = data.iloc[:,76]

## Training and testing set

In [None]:
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)   

In [None]:
from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()  
scaler.fit(X_train)

X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test)  

## Build classifier

In [None]:
from sklearn.neural_network import MLPClassifier  
mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10, 10, 10, 10), max_iter=1000)  
mlp.fit(X_train, y_train.values.ravel())  

### Lime

In [None]:
from lime import lime_text
from sklearn.pipeline import make_pipeline
import sklearn.preprocessing as preprocessing
import lime
import lime.lime_tabular
from lime.lime_text import LimeTextExplainer

In [None]:
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=cols, class_names=y_train, discretize_continuous=True)

In [None]:
i = np.random.randint(0, X_test.shape[0])
exp = explainer.explain_instance(X_test[i], mlp.predict_proba, num_features=2, top_labels=1)

In [None]:
exp.show_in_notebook(show_table=True, show_all=False)

## Test

In [None]:
predictions = mlp.predict(X_test)  

In [None]:
from sklearn.metrics import classification_report, confusion_matrix  
print(confusion_matrix(y_test,predictions))  
print(classification_report(y_test,predictions))  

## Test Conference Championships

In [None]:
conf = pd.read_csv('conf_champs.csv', sep=',',header=0)
conf = conf.drop(columns="Unnamed: 0")

#for now, remove polls because this nn is not built to handle NaN values
conf = conf.drop(columns = ["H_Poll_curr", "H_Poll_pre", "H_Poll_high","A_Poll_curr", "A_Poll_pre", "A_Poll_high"])

#remove and simplify unecessary data
conf = conf.drop(columns = ['H_Overall_Pct', 'H_Conf_Pct', 'H_SOS', 'H_Offensive_Pass_Pct', 'H_Offensive_RusH_Yards_Average', 
                            'H_Offensive_Total_Average', 'H_Offensive_First_Downs_Total', 'H_Offensive_Turnovers_Total',
                           'H_Defensive_Pass_Pct', 'H_Defensive_RusH_Yards_Average', 'H_Defensive_Total_Average',
                           'H_Defensive_First_Downs_Total', 'H_Defensive_Turnovers_Total', 
                            'H_Offensive_Total_Plays', 'H_Offensive_Total_Yards',
                           'H_Defensive_Total_Plays', 'H_Defensive_Total_Yards',
                           'A_Overall_Pct', 'A_Conf_Pct', 'A_SOS', 'A_Offensive_Pass_Pct', 'A_Offensive_RusA_Yards_Average', 
                            'A_Offensive_Total_Average', 'A_Offensive_First_Downs_Total', 'A_Offensive_Turnovers_Total',
                           'A_Defensive_Pass_Pct', 'A_Defensive_RusA_Yards_Average', 'A_Defensive_Total_Average',
                           'A_Defensive_First_Downs_Total', 'A_Defensive_Turnovers_Total', 
                            'A_Offensive_Total_Plays', 'A_Offensive_Total_Yards',
                           'A_Defensive_Total_Plays', 'A_Defensive_Total_Yards'])


conf_cols = list(conf.columns.values) #Make a list of all of the columns in the df
#conf_cols.pop(conf_cols.index('Winner')) #Remove b from list
#conf = conf[conf_cols+['Winner']] #Create new dataframe with columns in the order you want

X_conf = conf.iloc[:, 2:76]
X_c =  X_conf.values
X_c = scaler.transform(X_c) 

In [None]:
len(conf.columns)

In [None]:
conf

In [None]:
conf_predictions = mlp.predict(X_c)
conf_predictions

In [None]:
real_conf_champs = np.array([0,0,1,0,1,0,1,1,0])

In [None]:
print(confusion_matrix(real_conf_champs,conf_predictions))  

## Predict Bowl Games

In [None]:
bowl = pd.read_csv('bowl_champs.csv', sep=',',header=0)
bowl = bowl.drop(columns="Unnamed: 0")

#for now, remove polls because this nn is not built to handle NaN values
bowl = bowl.drop(columns = ["H_Poll_curr", "H_Poll_pre", "H_Poll_high","A_Poll_curr", "A_Poll_pre", "A_Poll_high"])

#remove and simplify unecessary data
bowl = bowl.drop(columns = ['H_Overall_Pct', 'H_Conf_Pct', 'H_SOS', 'H_Offensive_Pass_Pct', 'H_Offensive_RusH_Yards_Average', 
                            'H_Offensive_Total_Average', 'H_Offensive_First_Downs_Total', 'H_Offensive_Turnovers_Total',
                           'H_Defensive_Pass_Pct', 'H_Defensive_RusH_Yards_Average', 'H_Defensive_Total_Average',
                           'H_Defensive_First_Downs_Total', 'H_Defensive_Turnovers_Total', 
                            'H_Offensive_Total_Plays', 'H_Offensive_Total_Yards',
                           'H_Defensive_Total_Plays', 'H_Defensive_Total_Yards',
                           'A_Overall_Pct', 'A_Conf_Pct', 'A_SOS', 'A_Offensive_Pass_Pct', 'A_Offensive_RusA_Yards_Average', 
                            'A_Offensive_Total_Average', 'A_Offensive_First_Downs_Total', 'A_Offensive_Turnovers_Total',
                           'A_Defensive_Pass_Pct', 'A_Defensive_RusA_Yards_Average', 'A_Defensive_Total_Average',
                           'A_Defensive_First_Downs_Total', 'A_Defensive_Turnovers_Total', 
                            'A_Offensive_Total_Plays', 'A_Offensive_Total_Yards',
                           'A_Defensive_Total_Plays', 'A_Defensive_Total_Yards'])


bowl_cols = list(bowl.columns.values) #Make a list of all of the columns in the df
#bowl_cols.pop(bowl_cols.index('Winner')) #Remove b from list
#bowl = conf[bowl_cols+['Winner']] #Create new dataframe with columns in the order you want

X_bowl = bowl.iloc[:, 2:76]
X_b =  X_bowl.values
X_b = scaler.transform(X_b) 

In [None]:
bowl_predictions = mlp.predict(X_b)
bowl_predictions

# SVM

In [None]:
from sklearn import svm

In [None]:
clf = svm.SVC(gamma='scale')
clf.fit(X_train, y_train)

In [None]:
clf_predictions = clf.predict(X_test)

In [None]:
print(confusion_matrix(y_test,clf_predictions))  
print(classification_report(y_test,clf_predictions))  

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

In [None]:
rf = RandomForestClassifier(n_estimators = 200)

In [None]:
rf.fit(X_train, y_train)

In [None]:
rf_predictions = rf.predict(X_test)

In [None]:
print(confusion_matrix(y_test,rf_predictions))  
print(classification_report(y_test,rf_predictions))  