In [1]:
import pickle
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [2]:
with open('trained_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [3]:
#we have an excel sheet with the upcoming fights
#let's use that and our current_fighters csv to create a new dataframe to test our model with!

fights = pd.read_excel('prediction_testing.xlsx')
fighters = pd.read_csv('current_fighters_processed.csv')

In [4]:
fights.head()

Unnamed: 0,fighter1,fighter2,weight_class,date,outcome
0,Holly Holm,Mayra Bueno Silva,Women's Bantamweight,2023-08-15,Mayra Bueno Silva
1,Jack Della Maddalena,Bassil Hafez,Welterweight,2023-08-15,Jack Della Maddalena
2,Francisco Prado,Ottman Azaitar,Lightweight,2023-08-15,Francisco Prado
3,JunYong Park,Albert Duraev,Middleweight,2023-08-15,JunYong Park
4,Norma Dumont,Chelsea Chandler,Women's Featherweight,2023-08-15,Norma Dumont


In [5]:
fighters.head()

Unnamed: 0.1,Unnamed: 0,full_nm,nickname,ht,wt,reach,w,l,d,belt,country,age_in_2023,sig_str_pm,str_acc_percentage,str_abs_pm,str_def_percentage,td_avg_15m,td_acc_percentage,td_def_percentage,sub_avg_15m,win_percentage,Orthodox,Southpaw,Switch,Bantamweight,Catch Weight,Featherweight,Flyweight,Heavyweight,Light Heavyweight,Lightweight,Middleweight,Welterweight,Women's Bantamweight,Women's Featherweight,Women's Flyweight,Women's Strawweight
0,0,Shamil Abdurakhimov,Abrek,75,235,76,20,8,0,0,RUS,29,2.41,0.44,3.02,0.55,1.01,0.23,0.45,0.1,0.714286,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,1,Mariya Agapova,Demonslayer,66,125,68,10,4,0,0,KAZ,27,4.67,0.55,3.82,0.52,0.59,0.66,0.47,0.9,0.714286,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,2,Kevin Aguilar,Angel of Death,67,155,73,17,5,0,0,USA,27,3.96,0.4,4.81,0.52,0.16,0.16,0.78,0.0,0.772727,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,3,Amir Albazi,The Prince,65,125,68,17,1,0,0,RUS,31,2.8,0.39,3.07,0.63,1.71,0.33,0.4,0.7,0.944444,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
4,4,Irene Aldana,,69,135,68,14,7,0,0,MEX,31,4.86,0.39,5.71,0.59,0.17,0.5,0.75,0.3,0.666667,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [6]:
#we'll do some quick cleaning before we merge them together

fighters.drop(['Unnamed: 0', 'nickname'], axis=1, inplace=True)

#for the model, we'll only need the fighter names from fights

new_fights = fights.drop(['weight_class', 'date', 'outcome'], axis=1)

In [7]:
#now we can merge!

prediction_df = new_fights.merge(fighters, left_on='fighter1', right_on='full_nm', suffixes=('', '_f1'))
prediction_df.drop('full_nm', axis=1, inplace=True)

prediction_df = prediction_df.merge(fighters, left_on='fighter2', right_on='full_nm', suffixes=('', '_f2'))
prediction_df.drop(['full_nm','Bantamweight_f2', 'Catch Weight_f2', 'Featherweight_f2',
       'Flyweight_f2', 'Heavyweight_f2', 'Light Heavyweight_f2',
       'Lightweight_f2', 'Middleweight_f2', 'Welterweight_f2',
       "Women's Bantamweight_f2", "Women's Featherweight_f2",
       "Women's Flyweight_f2", "Women's Strawweight_f2"], axis=1, inplace=True)

#let's also one-hot code the weight_class column

#prediction_df_pp = pd.get_dummies(prediction_df['weight_class'])

#prediction_df = pd.concat([prediction_df, prediction_df_pp], axis=1)
#prediction_df = prediction_df.drop('weight_class', axis=1)

#and also rename our fighter1 cols to include _f1 at the end

fighter1_cols = ['ht', 'wt', 'reach', 'w', 'l', 'd', 'belt',
       'country', 'age_in_2023', 'win_percentage', 'Orthodox',
       'Southpaw', 'Switch']

new_cols = {col: col + '_f1' for col in fighter1_cols}
prediction_df.rename(columns = new_cols, inplace=True)


In [8]:
#perfect!
prediction_df

Unnamed: 0,fighter1,fighter2,ht_f1,wt_f1,reach_f1,w_f1,l_f1,d_f1,belt_f1,country_f1,age_in_2023_f1,sig_str_pm,str_acc_percentage,str_abs_pm,str_def_percentage,td_avg_15m,td_acc_percentage,td_def_percentage,sub_avg_15m,win_percentage_f1,Orthodox_f1,Southpaw_f1,Switch_f1,Bantamweight,Catch Weight,Featherweight,Flyweight,Heavyweight,Light Heavyweight,Lightweight,Middleweight,Welterweight,Women's Bantamweight,Women's Featherweight,Women's Flyweight,Women's Strawweight,ht_f2,wt_f2,reach_f2,w_f2,l_f2,d_f2,belt_f2,country_f2,age_in_2023_f2,sig_str_pm_f2,str_acc_percentage_f2,str_abs_pm_f2,str_def_percentage_f2,td_avg_15m_f2,td_acc_percentage_f2,td_def_percentage_f2,sub_avg_15m_f2,win_percentage_f2,Orthodox_f2,Southpaw_f2,Switch_f2
0,Holly Holm,Mayra Bueno Silva,68,135,69,15,6,0,1,USA,31,3.17,0.4,2.77,0.56,0.92,0.31,0.78,0.1,0.714286,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,66,135,66,10,2,1,0,BRA,27,4.2,0.54,4.71,0.58,0.38,0.5,0.67,1.5,0.769231,1,0,0
1,Tucker Lutz,Melsik Baghdasaryan,68,145,72,12,3,0,0,USA,28,4.04,0.49,3.02,0.45,1.77,0.5,0.69,0.2,0.8,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,69,145,70,7,2,0,0,ARM,27,5.63,0.61,3.28,0.55,0.0,0.0,0.76,0.0,0.777778,0,1,0
2,Brandon Moreno,Alexandre Pantoja,67,125,70,21,6,2,1,MEX,31,3.55,0.4,3.19,0.56,1.8,0.45,0.67,0.6,0.724138,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,65,125,67,25,5,0,0,BRA,31,4.25,0.48,3.38,0.54,1.37,0.4,0.67,1.2,0.833333,1,0,0
3,Dricus Du Plessis,Robert Whittaker,73,185,76,19,2,0,0,USA,31,6.72,0.55,3.73,0.53,2.83,0.47,0.5,1.3,0.904762,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,72,185,73,25,6,0,1,AUS,29,4.48,0.42,3.26,0.61,0.82,0.37,0.84,0.0,0.806452,1,0,0
4,Jalin Turner,Dan Hooker,75,155,77,13,6,0,0,USA,31,5.63,0.46,3.79,0.43,0.89,0.66,0.74,1.6,0.684211,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,72,155,75,22,12,0,0,NZL,31,4.83,0.47,4.61,0.51,0.85,0.36,0.81,0.3,0.647059,0,0,1
5,Alonzo Menifield,Jimmy Crute,72,205,76,13,3,1,0,USA,31,3.07,0.27,7.33,0.52,0.0,0.0,0.0,0.0,0.764706,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,74,205,74,12,3,1,0,AUS,31,3.67,0.55,3.04,0.45,5.19,0.64,0.6,2.3,0.75,1,0,0


In [9]:
#as stated before, our dataset is for scientific exploration only, not gambling so it's okay that
#our current_fighters database doesn't include some of the newer fighters (hence the shortened dataset)
#but we can still use our model to see what we can predict!


In [10]:
#let's make some predictions!
#first, we need to make sure our columns match our model

prediction = prediction_df[['ht_f1', 'wt_f1','reach_f1', 'w_f1', 'l_f1', 'd_f1','belt_f1', 'sig_str_pm', 'str_acc_percentage',
                                                        'str_abs_pm', 'str_def_percentage',
                                                        'td_def_percentage', 'sub_avg_15m', 'win_percentage_f1','ht_f2', 'wt_f2','reach_f2', 'w_f2', 'l_f2', 'd_f2','belt_f2', 'sig_str_pm', 'str_acc_percentage',
                                                        'str_abs_pm_f2', 'str_def_percentage_f2',
                                                        'td_def_percentage_f2', 'sub_avg_15m_f2', 'win_percentage_f2','Bantamweight', 'Catch Weight', 'Featherweight',
       'Flyweight', 'Heavyweight', 'Light Heavyweight', 'Lightweight',
       'Middleweight', 'Welterweight', "Women's Bantamweight",
       "Women's Featherweight", "Women's Flyweight", "Women's Strawweight", 'Orthodox_f1','Southpaw_f1', "Orthodox_f2", "Southpaw_f2"]]

prediction

Unnamed: 0,ht_f1,wt_f1,reach_f1,w_f1,l_f1,d_f1,belt_f1,sig_str_pm,str_acc_percentage,str_abs_pm,str_def_percentage,td_def_percentage,sub_avg_15m,win_percentage_f1,ht_f2,wt_f2,reach_f2,w_f2,l_f2,d_f2,belt_f2,sig_str_pm.1,str_acc_percentage.1,str_abs_pm_f2,str_def_percentage_f2,td_def_percentage_f2,sub_avg_15m_f2,win_percentage_f2,Bantamweight,Catch Weight,Featherweight,Flyweight,Heavyweight,Light Heavyweight,Lightweight,Middleweight,Welterweight,Women's Bantamweight,Women's Featherweight,Women's Flyweight,Women's Strawweight,Orthodox_f1,Southpaw_f1,Orthodox_f2,Southpaw_f2
0,68,135,69,15,6,0,1,3.17,0.4,2.77,0.56,0.78,0.1,0.714286,66,135,66,10,2,1,0,3.17,0.4,4.71,0.58,0.67,1.5,0.769231,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0
1,68,145,72,12,3,0,0,4.04,0.49,3.02,0.45,0.69,0.2,0.8,69,145,70,7,2,0,0,4.04,0.49,3.28,0.55,0.76,0.0,0.777778,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1
2,67,125,70,21,6,2,1,3.55,0.4,3.19,0.56,0.67,0.6,0.724138,65,125,67,25,5,0,0,3.55,0.4,3.38,0.54,0.67,1.2,0.833333,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0
3,73,185,76,19,2,0,0,6.72,0.55,3.73,0.53,0.5,1.3,0.904762,72,185,73,25,6,0,1,6.72,0.55,3.26,0.61,0.84,0.0,0.806452,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0
4,75,155,77,13,6,0,0,5.63,0.46,3.79,0.43,0.74,1.6,0.684211,72,155,75,22,12,0,0,5.63,0.46,4.61,0.51,0.81,0.3,0.647059,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
5,72,205,76,13,3,1,0,3.07,0.27,7.33,0.52,0.0,0.0,0.764706,74,205,74,12,3,1,0,3.07,0.27,3.04,0.45,0.6,2.3,0.75,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0


In [11]:
predictions = loaded_model.predict(prediction)

In [12]:
prediction = prediction.copy()
prediction_df['predicted_outcome'] = predictions

In [13]:
prediction_df

Unnamed: 0,fighter1,fighter2,ht_f1,wt_f1,reach_f1,w_f1,l_f1,d_f1,belt_f1,country_f1,age_in_2023_f1,sig_str_pm,str_acc_percentage,str_abs_pm,str_def_percentage,td_avg_15m,td_acc_percentage,td_def_percentage,sub_avg_15m,win_percentage_f1,Orthodox_f1,Southpaw_f1,Switch_f1,Bantamweight,Catch Weight,Featherweight,Flyweight,Heavyweight,Light Heavyweight,Lightweight,Middleweight,Welterweight,Women's Bantamweight,Women's Featherweight,Women's Flyweight,Women's Strawweight,ht_f2,wt_f2,reach_f2,w_f2,l_f2,d_f2,belt_f2,country_f2,age_in_2023_f2,sig_str_pm_f2,str_acc_percentage_f2,str_abs_pm_f2,str_def_percentage_f2,td_avg_15m_f2,td_acc_percentage_f2,td_def_percentage_f2,sub_avg_15m_f2,win_percentage_f2,Orthodox_f2,Southpaw_f2,Switch_f2,predicted_outcome
0,Holly Holm,Mayra Bueno Silva,68,135,69,15,6,0,1,USA,31,3.17,0.4,2.77,0.56,0.92,0.31,0.78,0.1,0.714286,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,66,135,66,10,2,1,0,BRA,27,4.2,0.54,4.71,0.58,0.38,0.5,0.67,1.5,0.769231,1,0,0,fighter2_wins
1,Tucker Lutz,Melsik Baghdasaryan,68,145,72,12,3,0,0,USA,28,4.04,0.49,3.02,0.45,1.77,0.5,0.69,0.2,0.8,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,69,145,70,7,2,0,0,ARM,27,5.63,0.61,3.28,0.55,0.0,0.0,0.76,0.0,0.777778,0,1,0,fighter1_wins
2,Brandon Moreno,Alexandre Pantoja,67,125,70,21,6,2,1,MEX,31,3.55,0.4,3.19,0.56,1.8,0.45,0.67,0.6,0.724138,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,65,125,67,25,5,0,0,BRA,31,4.25,0.48,3.38,0.54,1.37,0.4,0.67,1.2,0.833333,1,0,0,fighter2_wins
3,Dricus Du Plessis,Robert Whittaker,73,185,76,19,2,0,0,USA,31,6.72,0.55,3.73,0.53,2.83,0.47,0.5,1.3,0.904762,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,72,185,73,25,6,0,1,AUS,29,4.48,0.42,3.26,0.61,0.82,0.37,0.84,0.0,0.806452,1,0,0,fighter1_wins
4,Jalin Turner,Dan Hooker,75,155,77,13,6,0,0,USA,31,5.63,0.46,3.79,0.43,0.89,0.66,0.74,1.6,0.684211,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,72,155,75,22,12,0,0,NZL,31,4.83,0.47,4.61,0.51,0.85,0.36,0.81,0.3,0.647059,0,0,1,fighter1_wins
5,Alonzo Menifield,Jimmy Crute,72,205,76,13,3,1,0,USA,31,3.07,0.27,7.33,0.52,0.0,0.0,0.0,0.0,0.764706,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,74,205,74,12,3,1,0,AUS,31,3.67,0.55,3.04,0.45,5.19,0.64,0.6,2.3,0.75,1,0,0,fighter2_wins


In [37]:
#now we can compare the predictions with the actual outcomes

comparison = fights.copy()
comparison['predicted_outcome'] = prediction_df['predicted_outcome']

for index, row in comparison.iterrows():
    if row['predicted_outcome'] == 'fighter2_wins':
        comparison.loc[index, 'predicted_outcome'] = row['fighter2']
    if row['predicted_outcome'] == 'fighter1_wins':
        comparison.loc[index, 'predicted_outcome'] = row['fighter1']

comparison.head(6)

Unnamed: 0,fighter1,fighter2,weight_class,date,outcome,predicted_outcome
0,Holly Holm,Mayra Bueno Silva,Women's Bantamweight,2023-08-15,Mayra Bueno Silva,Mayra Bueno Silva
1,Jack Della Maddalena,Bassil Hafez,Welterweight,2023-08-15,Jack Della Maddalena,Jack Della Maddalena
2,Francisco Prado,Ottman Azaitar,Lightweight,2023-08-15,Francisco Prado,Ottman Azaitar
3,JunYong Park,Albert Duraev,Middleweight,2023-08-15,JunYong Park,JunYong Park
4,Norma Dumont,Chelsea Chandler,Women's Featherweight,2023-08-15,Norma Dumont,Norma Dumont
5,Terrance McKinney,Nazim Sadykhov,Lightweight,2023-08-15,Nazim Sadykhov,Nazim Sadykhov


In [36]:
#let's show off the accuracy of the model

accuracy = len(comparison[comparison['predicted_outcome'] == comparison['outcome']]) / comparison['predicted_outcome'].count()

print(accuracy)

0.8333333333333334


In [38]:
#wow! An 83% accuracy is a huge jump up from our model's base accuracy of 71%!