## Import Pandas and the classifiers to experiment with

In [1]:
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

## Evaluate models on test data

In [4]:
from sklearn import metrics

def evaluate_model(model, row_name, training_file):
    training_df = pd.read_csv(training_file, index_col=0)

    target_feature = 'won_match'
    training_columns = [col for col in training_df.columns if col != target_feature]

    misclassification =  1 - cross_val_score(
        model, training_df[training_columns], training_df[target_feature], scoring='accuracy', cv=10
    ).mean()
    recall = cross_val_score(
        model, training_df[training_columns], training_df[target_feature], scoring='recall', cv=10
    ).mean()
    precision = cross_val_score(
        model, training_df[training_columns], training_df[target_feature], scoring='precision', cv=10
    ).mean()
    f1 = cross_val_score(
        model, training_df[training_columns], training_df[target_feature], scoring='f1', cv=10
    ).mean()
    
    return [
        row_name,
        misclassification,
        recall,
        precision,
        f1,
    ]


def evaluate_model_by_season(model):
    data= [
        evaluate_model(model, '2012/13', 'data/2012-2013.csv'),
        evaluate_model(model, '2013/14', 'data/2013-2014.csv'),
        evaluate_model(model, '2014/15', 'data/2014-2015.csv'),
        evaluate_model(model, '2015/16', 'data/2015-2016.csv'),
    ]
    
    return pd.DataFrame(data, columns=[
            'Season', 
            'Misclassification', 
            'Recall', 
            'Precision', 
            'F1',
        ])

### Logistic Regression

In [5]:
evaluate_model_by_season(LogisticRegression(solver='lbfgs'))

Unnamed: 0,Season,Misclassification,Recall,Precision,F1
0,2012/13,0.210325,0.651209,0.747915,0.694524
1,2013/14,0.180101,0.716418,0.794463,0.752941
2,2014/15,0.187747,0.694081,0.775789,0.732477
3,2015/16,0.194222,0.686926,0.766362,0.723397


### Gradient Boosting

In [6]:
evaluate_model_by_season(GradientBoostingClassifier(n_estimators=55, learning_rate=0.1))

Unnamed: 0,Season,Misclassification,Recall,Precision,F1
0,2012/13,0.214244,0.643189,0.742402,0.687313
1,2013/14,0.188679,0.696269,0.787931,0.740448
2,2014/15,0.193164,0.665637,0.781217,0.718521
3,2015/16,0.201913,0.676534,0.754464,0.712734


### Random Forest

In [7]:
evaluate_model_by_season(RandomForestClassifier())

Unnamed: 0,Season,Misclassification,Recall,Precision,F1
0,2012/13,0.234496,0.56875,0.741409,0.644698
1,2013/14,0.205839,0.639552,0.766519,0.708187
2,2014/15,0.220221,0.614157,0.756675,0.681297
3,2015/16,0.230485,0.598657,0.729701,0.648354


### Random Forest

In [8]:
evaluate_model_by_season(DecisionTreeClassifier())

Unnamed: 0,Season,Misclassification,Recall,Precision,F1
0,2012/13,0.305357,0.602449,0.585126,0.588298
1,2013/14,0.257293,0.653731,0.658147,0.656145
2,2014/15,0.280913,0.635696,0.618415,0.629134
3,2015/16,0.275535,0.606827,0.628975,0.626158


### Support Vector Machine

In [6]:
evaluate_model_by_season(SVC(kernel='linear'))

Unnamed: 0,Season,Misclassification,Recall,Precision,F1,TPR,FPR,TNR,FNR
0,2012/13,0.229508,0.762781,0.57473,0.655536,0.762781,0.226415,0.773585,0.237219
1,2013/14,0.196721,0.780969,0.670262,0.721393,0.780969,0.185925,0.814075,0.219031
2,2014/15,0.235948,0.752049,0.565485,0.645558,0.752049,0.231148,0.768852,0.247951
3,2015/16,0.235948,0.752049,0.565485,0.645558,0.752049,0.231148,0.768852,0.247951


### Stochastic Gradient Descent

In [9]:
evaluate_model_by_season(SGDClassifier(loss='log'))

Unnamed: 0,Season,Misclassification,Recall,Precision,F1
0,2012/13,0.304674,0.365242,0.688457,0.620947
1,2013/14,0.281879,0.708209,0.739534,0.630904
2,2014/15,0.325085,0.822413,0.783632,0.581389
3,2015/16,0.245066,0.466048,0.73731,0.515083


### Multi-layer Perceptron

In [10]:
evaluate_model_by_season(MLPClassifier())

Unnamed: 0,Season,Misclassification,Recall,Precision,F1
0,2012/13,0.214915,0.677987,0.764227,0.685281
1,2013/14,0.181235,0.716418,0.783967,0.743967
2,2014/15,0.192872,0.710182,0.790251,0.72647
3,2015/16,0.195599,0.689132,0.760986,0.710399
