In [1]:
import pandas as pd
import numpy as np
from statistics import mode
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import scale

In [2]:
ufc = pd.read_csv('../../Data/UFCdata.csv')

In [3]:
ufc = ufc.dropna()
ufc = ufc.drop(['R_fighter','B_fighter','Referee','date','location'],axis=1)

In [4]:
num_cols = ufc.select_dtypes('number').columns
ufc[num_cols] = scale(ufc[num_cols])

In [5]:
ufc_dummy = pd.get_dummies(ufc.loc[:, ufc.columns != 'Winner'])
ufc_dummy['Winner'] = ufc.Winner
ufc = ufc_dummy

In [6]:
ufc_train,ufc_test = train_test_split(ufc,test_size=0.2,random_state=321,stratify=ufc.Winner)

In [7]:
features_train = ufc_train.drop('Winner',axis=1)
features_test = ufc_test.drop('Winner',axis=1)
target_train = ufc_train.Winner
target_test = ufc_test.Winner

In [8]:
k = np.arange(1,1000)
params = {'n_neighbors':k}

In [9]:
knn = KNeighborsClassifier()
knnCV = GridSearchCV(knn,param_grid=params,return_train_score=True,n_jobs=-1)
knnCV.fit(features_train,target_train)

GridSearchCV(estimator=KNeighborsClassifier(), n_jobs=-1,
             param_grid={'n_neighbors': array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,...
       911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923,
       924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936,
       937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949,
       950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962,
       963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975,
       976, 977, 978, 979, 980, 981, 982

In [10]:
optimal_index = knnCV.cv_results_['mean_test_score'].argmax()
train_score = knnCV.cv_results_['mean_train_score'][optimal_index]
validation_score = knnCV.cv_results_['mean_test_score'][optimal_index]
test_score = knnCV.best_estimator_.score(features_test,target_test)
print("Optimal n_neighbors:",knnCV.best_params_['n_neighbors'])
print("Optimal Train Accuracy:",round(train_score,3))
print("Optimal Validation Accuracy:",round(validation_score,3))
print("Optimal Test Accuracy:",round(test_score,3))

Optimal n_neighbors: 115
Optimal Train Accuracy: 0.628
Optimal Validation Accuracy: 0.625
Optimal Test Accuracy: 0.627


In [11]:
baseline_score = sum(target_test == "Red") / len(target_test)
print("Baseline Accuracy:",round(baseline_score,3))

Baseline Accuracy: 0.616


In [12]:
ufc = pd.read_csv('../../Data/UFCDiffData.csv')

In [13]:
ufc = ufc.drop(ufc.columns[0],axis=1)
num_cols = ufc.select_dtypes('number').columns
ufc[num_cols] = scale(ufc[num_cols])

In [14]:
ufc_dummy = pd.get_dummies(ufc.loc[:, ufc.columns != 'Result'])
ufc_dummy['Result'] = ufc.Result
ufc = ufc_dummy

In [15]:
ufc_train,ufc_test = train_test_split(ufc,test_size=0.2,random_state=321,stratify=ufc.Result)

In [16]:
features_train = ufc_train.drop('Result',axis=1)
features_test = ufc_test.drop('Result',axis=1)
target_train = ufc_train.Result
target_test = ufc_test.Result

In [17]:
k = np.arange(1,1000)
params = {'n_neighbors':k}

In [18]:
knn = KNeighborsClassifier()
knnCV = GridSearchCV(knn,param_grid=params,return_train_score=True,n_jobs=-1)
knnCV.fit(features_train,target_train)

GridSearchCV(estimator=KNeighborsClassifier(), n_jobs=-1,
             param_grid={'n_neighbors': array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,...
       911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923,
       924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936,
       937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949,
       950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962,
       963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975,
       976, 977, 978, 979, 980, 981, 982

In [19]:
optimal_index = knnCV.cv_results_['mean_test_score'].argmax()
train_score = knnCV.cv_results_['mean_train_score'][optimal_index]
validation_score = knnCV.cv_results_['mean_test_score'][optimal_index]
test_score = knnCV.best_estimator_.score(features_test,target_test)
print("Optimal n_neighbors:",knnCV.best_params_['n_neighbors'])
print("Optimal Train Accuracy:",round(train_score,3))
print("Optimal Validation Accuracy:",round(validation_score,3))
print("Optimal Test Accuracy:",round(test_score,3))

Optimal n_neighbors: 473
Optimal Train Accuracy: 0.584
Optimal Validation Accuracy: 0.582
Optimal Test Accuracy: 0.566


In [20]:
baseline_score = sum(target_test == "Win") / len(target_test)
print("Baseline Accuracy:",round(baseline_score,3))

Baseline Accuracy: 0.491
