In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [2]:
ufc = pd.read_csv('../../Data/UFCdata.csv')

In [3]:
ufc.date = pd.to_datetime(ufc.date)

In [4]:
drop_columns = ['B_fighter', 'R_fighter', 'Referee','date','location', "weight_class", "title_bout",
               'R_avg_KD', 'R_avg_opp_KD', 'R_avg_SIG_STR_pct', 'R_avg_opp_SIG_STR_pct',
               'R_avg_TD_pct', 'R_avg_opp_TD_pct', 'R_avg_SUB_ATT',
               'R_avg_opp_SUB_ATT', 'R_avg_REV', 'R_avg_opp_REV',
               'R_avg_SIG_STR_att', 'R_avg_SIG_STR_landed',
               'R_avg_opp_SIG_STR_att', 'R_avg_opp_SIG_STR_landed',
               'R_avg_TOTAL_STR_att', 'R_avg_TOTAL_STR_landed',
               'R_avg_opp_TOTAL_STR_att', 'R_avg_opp_TOTAL_STR_landed',
               'R_avg_TD_att', 'R_avg_TD_landed', 'R_avg_opp_TD_att',
               'R_avg_opp_TD_landed', 'R_avg_HEAD_att', 'R_avg_HEAD_landed',
               'R_avg_opp_HEAD_att', 'R_avg_opp_HEAD_landed', 'R_avg_BODY_att',
               'R_avg_BODY_landed', 'R_avg_opp_BODY_att', 'R_avg_opp_BODY_landed',
               'R_avg_LEG_att', 'R_avg_LEG_landed', 'R_avg_opp_LEG_att',
               'R_avg_opp_LEG_landed', 'R_avg_DISTANCE_att',
               'R_avg_DISTANCE_landed', 'R_avg_opp_DISTANCE_att',
               'R_avg_opp_DISTANCE_landed', 'R_avg_CLINCH_att',
               'R_avg_CLINCH_landed', 'R_avg_opp_CLINCH_att',
               'R_avg_opp_CLINCH_landed', 'R_avg_GROUND_att',
               'R_avg_GROUND_landed', 'R_avg_opp_GROUND_att',
               'R_avg_opp_GROUND_landed', 'R_avg_CTRL_time(seconds)',
               'R_avg_opp_CTRL_time(seconds)', 'R_total_time_fought(seconds)',
               'R_total_rounds_fought', 'R_total_title_bouts',
               'R_current_win_streak', 'R_current_lose_streak',
               'R_longest_win_streak', 'R_wins', 'R_losses', 'R_draw',
               'R_win_by_Decision_Majority', 'R_win_by_Decision_Split',
               'R_win_by_Decision_Unanimous', 'R_win_by_KO/TKO',
               'R_win_by_Submission', 'R_win_by_TKO_Doctor_Stoppage', 'R_Stance',
               'R_Height_cms', 'R_Reach_cms', 'R_Weight_lbs', 'R_age']
stat_columns = ['B_avg_KD', 'B_avg_opp_KD',
               'B_avg_SIG_STR_pct', 'B_avg_opp_SIG_STR_pct', 'B_avg_TD_pct',
               'B_avg_opp_TD_pct', 'B_avg_SUB_ATT', 'B_avg_opp_SUB_ATT',
               'B_avg_REV', 'B_avg_opp_REV', 'B_avg_SIG_STR_att',
               'B_avg_SIG_STR_landed', 'B_avg_opp_SIG_STR_att',
               'B_avg_opp_SIG_STR_landed', 'B_avg_TOTAL_STR_att',
               'B_avg_TOTAL_STR_landed', 'B_avg_opp_TOTAL_STR_att',
               'B_avg_opp_TOTAL_STR_landed', 'B_avg_TD_att', 'B_avg_TD_landed',
               'B_avg_opp_TD_att', 'B_avg_opp_TD_landed', 'B_avg_HEAD_att',
               'B_avg_HEAD_landed', 'B_avg_opp_HEAD_att', 'B_avg_opp_HEAD_landed',
               'B_avg_BODY_att', 'B_avg_BODY_landed', 'B_avg_opp_BODY_att',
               'B_avg_opp_BODY_landed', 'B_avg_LEG_att', 'B_avg_LEG_landed',
               'B_avg_opp_LEG_att', 'B_avg_opp_LEG_landed', 'B_avg_DISTANCE_att',
               'B_avg_DISTANCE_landed', 'B_avg_opp_DISTANCE_att',
               'B_avg_opp_DISTANCE_landed', 'B_avg_CLINCH_att',
               'B_avg_CLINCH_landed', 'B_avg_opp_CLINCH_att',
               'B_avg_opp_CLINCH_landed', 'B_avg_GROUND_att',
               'B_avg_GROUND_landed', 'B_avg_opp_GROUND_att',
               'B_avg_opp_GROUND_landed', 'B_avg_CTRL_time(seconds)',
               'B_avg_opp_CTRL_time(seconds)', 'B_total_time_fought(seconds)',
               'B_total_rounds_fought', 'B_total_title_bouts',
               'B_current_win_streak', 'B_current_lose_streak',
               'B_longest_win_streak', 'B_wins', 'B_losses', 'B_draw',
               'B_win_by_Decision_Majority', 'B_win_by_Decision_Split',
               'B_win_by_Decision_Unanimous', 'B_win_by_KO/TKO',
               'B_win_by_Submission', 'B_win_by_TKO_Doctor_Stoppage', 'B_Stance',
               'B_Height_cms', 'B_Reach_cms', 'B_Weight_lbs', "B_age"]
keep_columns = list(set(ufc.columns) - set(drop_columns) - set(stat_columns))
num_cols_iprocess = ["total_rounds_fought","total_title_bouts","current_win_streak",
                     "current_lose_streak","wins","losses","win_by_Decision_Majority",
                     "win_by_Decision_Split","win_by_Decision_Unanimous","win_by_KO/TKO",
                     "win_by_Submission","win_by_TKO_Doctor_Stoppage","longest_win_streak",
                     "draw"]

In [5]:
ufc_name_list = np.unique(np.append(ufc.R_fighter.values,ufc.B_fighter.values))
ufc_test = pd.DataFrame(columns=keep_columns+[s[2:] for s in stat_columns])
for fighter in ufc_name_list:
    fights = ufc[(ufc.R_fighter == fighter) | (ufc.B_fighter == fighter)]
    last_fight = fights.sort_values(by="date",ascending=False).head(1)
    last_fight.index = [fighter]
    last_fight = last_fight.drop(drop_columns,axis=1)
    last_fight.columns = ['Winner'] + list(last_fight[stat_columns].columns.str[2:])
    ufc_test = ufc_test.append(last_fight)

In [6]:
ufc_test["Stance_Open Stance"] = 0
ufc_test[num_cols_iprocess] = ufc_test[num_cols_iprocess].astype(int)

In [7]:
ufc_test = ufc_test.dropna()
ufc_test_dummy = pd.get_dummies(ufc_test.loc[:, ufc_test.columns!='Winner'])
ufc_test_dummy['Winner'] = ufc_test.Winner
ufc_test = ufc_test_dummy

In [8]:
features_test = ufc_test.drop('Winner',axis=1)
target_test = ufc_test.Winner

In [9]:
ufc_train = ufc.dropna()
ufc_train = ufc_train.drop(['R_fighter','B_fighter','Referee','date','location',"weight_class","title_bout"],axis=1)
ufc_train_dummy = pd.get_dummies(ufc_train.loc[:, ufc_train.columns!='Winner'])
ufc_train_dummy['Winner'] = ufc_train.Winner
ufc_train = ufc_train_dummy
ufc_train = ufc_train.reindex(sorted(ufc_train.columns), axis=1)

In [10]:
features_train = ufc_train.drop('Winner',axis=1)
target_train = ufc_train.Winner

In [11]:
rf = RandomForestClassifier(n_estimators=301,max_depth=21,random_state=321)
rf.fit(features_train,target_train)

RandomForestClassifier(max_depth=21, n_estimators=301, random_state=321)

In [12]:
def predict_fight(fighter1,fighter2):
    red = features_test[features_test.index==fighter1]
    red.columns = ["R_"+i for i in red.columns]
    red.index = [0]
    blue = features_test[features_test.index==fighter2]
    blue.columns = ["B_"+i for i in blue.columns]
    blue.index = [0]
    fight = pd.concat([red,blue],axis=1)
    fight = fight.reindex(sorted(fight.columns), axis=1)
    color = rf.predict(fight)[0]
    if color == "Red":
        return fighter1
    else:
        return fighter2

In [14]:
print(predict_fight("Khabib Nurmagomedov","Conor McGregor"))
print(predict_fight("Jan Blachowicz","Aleksandar Rakic"))
print(predict_fight("Ryan Spann","Ryan Spann"))
print(predict_fight("Katlyn Chookagian","Amanda Ribas"))

Khabib Nurmagomedov
Aleksandar Rakic
Ryan Spann
Katlyn Chookagian


In [15]:
print(predict_fight("Max Holloway","Dustin Poirier"))

Max Holloway
