In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [2]:
ufc = pd.read_csv('../../Data/UFCdata.csv')

In [3]:
ufc = ufc.dropna()
ufc = ufc.drop(['R_fighter','B_fighter','Referee','date','location'],axis=1)

In [4]:
ufc_dummy = pd.get_dummies(ufc.loc[:, ufc.columns != 'Winner'])
ufc_dummy['Winner'] = ufc.Winner
ufc = ufc_dummy

In [5]:
ufc_train,ufc_test = train_test_split(ufc,test_size=0.2,random_state=321,stratify=ufc.Winner)

In [6]:
features_train = ufc_train.drop('Winner',axis=1)
features_test = ufc_test.drop('Winner',axis=1)
target_train = ufc_train.Winner
target_test = ufc_test.Winner

In [7]:
rf = RandomForestClassifier(random_state=123,n_estimators=270,max_depth=15)
feature_names = []
accs = []

In [8]:
for k in range(features_train.shape[1]):
    feature_train = features_train.iloc[:,k].values.reshape(-1,1)
    feature_test = features_test.iloc[:,k].values.reshape(-1,1)
    feature_names.append(features_train.columns[k])
    rf.fit(feature_train,target_train)
    accs.append(rf.score(feature_test,target_test))

In [9]:
results = pd.DataFrame()
results['feature']  = feature_names
results['accuracy']  = accs
results.sort_values('accuracy',ascending=False).round(3)

Unnamed: 0,feature,accuracy
134,R_age,0.627
67,R_avg_KD,0.622
125,R_win_by_Decision_Split,0.621
121,R_wins,0.621
126,R_win_by_Decision_Unanimous,0.618
...,...,...
15,B_avg_TOTAL_STR_att,0.555
11,B_avg_SIG_STR_att,0.549
83,R_avg_opp_TOTAL_STR_att,0.544
47,B_avg_CTRL_time(seconds),0.540


In [10]:
baseline_score = sum(target_test == "Red") / len(target_test)
print("Baseline Accuracy:",round(baseline_score,3))
print("Features Better Than Baseline")
results[results.accuracy > baseline_score].sort_values('accuracy',ascending=False).round(3)

Baseline Accuracy: 0.616
Features Better Than Baseline


Unnamed: 0,feature,accuracy
134,R_age,0.627
67,R_avg_KD,0.622
121,R_wins,0.621
125,R_win_by_Decision_Split,0.621
126,R_win_by_Decision_Unanimous,0.618
68,R_avg_opp_KD,0.617


In [11]:
ufc = pd.read_csv('../../Data/UFCDiffData.csv')

In [12]:
ufc = ufc.drop(ufc.columns[0],axis=1)
ufc_dummy = pd.get_dummies(ufc.loc[:, ufc.columns != 'Result'])
ufc_dummy['Result'] = ufc.Result
ufc = ufc_dummy

In [13]:
ufc_train,ufc_test = train_test_split(ufc,test_size=0.2,random_state=321,stratify=ufc.Result)

In [14]:
features_train = ufc_train.drop('Result',axis=1)
features_test = ufc_test.drop('Result',axis=1)
target_train = ufc_train.Result
target_test = ufc_test.Result

In [15]:
rf = RandomForestClassifier(random_state=123,n_estimators=850,max_depth=20)
feature_names = []
accs = []

In [16]:
for k in range(features_train.shape[1]):
    feature_train = features_train.iloc[:,k].values.reshape(-1,1)
    feature_test = features_test.iloc[:,k].values.reshape(-1,1)
    feature_names.append(features_train.columns[k])
    rf.fit(feature_train,target_train)
    accs.append(rf.score(feature_test,target_test))

In [17]:
results = pd.DataFrame()
results['feature']  = feature_names
results['accuracy']  = accs
results.sort_values('accuracy',ascending=False).round(3)

Unnamed: 0,feature,accuracy
65,Diff_age,0.546
57,DIFF_win_by_Decision_Split,0.514
18,DIFF_avg_TD_att,0.513
26,DIFF_avg_BODY_att,0.513
19,DIFF_avg_TD_landed,0.512
...,...,...
27,DIFF_avg_BODY_landed,0.473
0,DIFF_avg_KD,0.470
21,DIFF_avg_opp_TD_landed,0.467
13,DIFF_avg_opp_SIG_STR_landed,0.467


In [18]:
baseline_score = sum(target_test == "Win") / len(target_test)
print("Baseline Accuracy:",round(baseline_score,3))
print("Features Better Than Baseline")
results[results.accuracy > baseline_score].sort_values('accuracy',ascending=False).round(3)

Baseline Accuracy: 0.491
Features Better Than Baseline


Unnamed: 0,feature,accuracy
65,Diff_age,0.546
57,DIFF_win_by_Decision_Split,0.514
18,DIFF_avg_TD_att,0.513
26,DIFF_avg_BODY_att,0.513
19,DIFF_avg_TD_landed,0.512
36,DIFF_avg_opp_DISTANCE_att,0.511
43,DIFF_avg_GROUND_landed,0.508
55,DIFF_losses,0.508
50,DIFF_total_title_bouts,0.508
1,DIFF_avg_opp_KD,0.507
