## Import Pandas and the classifiers to experiment with

In [2]:
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn import preprocessing

## Evaluate models on test data

In [3]:
from sklearn import metrics

def evaluate_model(model, row_name):
    training_df = pd.read_csv('data/individual_teams.csv', index_col=0)
    test_df = pd.read_csv('data/test_data.csv', index_col=0)

    target_feature = 'won_match'
    training_columns = [col for col in training_df.columns if col != target_feature]

    model.fit(training_df[training_columns], training_df[target_feature])
    predictions = model.predict(test_df[training_columns])
    
    misclassification = 1 - metrics.accuracy_score(predictions, test_df[target_feature])
    recall = metrics.recall_score(predictions, test_df[target_feature])
    precision = metrics.precision_score(predictions, test_df[target_feature])
    f1 = metrics.f1_score(predictions, test_df[target_feature])
    
    confusion_matrix = metrics.confusion_matrix(predictions, test_df[target_feature])
    true_positives = confusion_matrix[1][1]
    true_negatives = confusion_matrix[0][0]
    false_positives = confusion_matrix[0][1]
    false_negatives = confusion_matrix[1][0]
    true_positive_rate = true_positives / (true_positives + false_negatives)
    true_negative_rate = true_negatives / (true_negatives + false_positives)
    false_positive_rate = false_positives / (true_negatives + false_positives)
    false_negative_rate = false_negatives / (true_positives + false_negatives)
    
    return [
        row_name,
        misclassification,
        recall,
        precision,
        f1,
        true_positive_rate,
        false_positive_rate,
        true_negative_rate,
        false_negative_rate,
    ]

In [4]:
data = [
    evaluate_model(GradientBoostingClassifier(n_estimators=55, learning_rate=0.1), 'Gradient Boost'),
    evaluate_model(RandomForestClassifier(), 'Random Forest'),
    evaluate_model(DecisionTreeClassifier(), 'Decision Tree'),
    evaluate_model(SVC(kernel='linear'), 'Linear SVM'),
    evaluate_model(SGDClassifier(loss='log', n_iter=60), 'SGD'),
    evaluate_model(LogisticRegression(solver='lbfgs'), 'Regression'),
    evaluate_model(MLPClassifier(hidden_layer_sizes=(90,), activation='logistic', max_iter=200), 'Neural Net'),
]

results = pd.DataFrame(data, columns=[
            'Classifier', 
            'Misclassification', 
            'Recall', 
            'Precision', 
            'F1', 
            'TPR', 
            'FPR', 
            'TNR', 
            'FNR'
        ])

results

Unnamed: 0,Classifier,Misclassification,Recall,Precision,F1,TPR,FPR,TNR,FNR
0,Gradient Boost,0.196136,0.779359,0.674884,0.723369,0.779359,0.184119,0.815881,0.220641
1,Random Forest,0.221311,0.753271,0.620955,0.680743,0.753271,0.209719,0.790281,0.246729
2,Decision Tree,0.278103,0.638095,0.619414,0.628616,0.638095,0.229128,0.770872,0.361905
3,Linear SVM,0.229508,0.758551,0.580894,0.657941,0.758551,0.224608,0.775392,0.241449
4,SGD,0.203162,0.761246,0.677966,0.717196,0.761246,0.184956,0.815044,0.238754
5,Regression,0.192623,0.78169,0.684129,0.729663,0.78169,0.179825,0.820175,0.21831
6,Neural Net,0.190281,0.78125,0.693374,0.734694,0.78125,0.175795,0.824205,0.21875
