# UFC Logit Model
## Marshall Ferguson 8/2021

This notebook will contain a logit model to predict the winners of UFC bouts. The data is from kaggle and ranges from 3/21/10 to 7/10/21.

In [27]:
# Imports

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [3]:
# Read in dataset 

df = pd.read_csv('ufc-master.csv')

df

Unnamed: 0,R_fighter,B_fighter,R_odds,B_odds,R_ev,B_ev,date,location,country,Winner,...,R_pass_bout,B_pass_bout,R_rev_bout,B_rev_bout,r_dec_odds,b_dec_odds,r_sub_odds,b_sub_odds,r_ko_odds,b_ko_odds
0,Dustin Poirier,Conor McGregor,-129,100,77.519380,100.000000,7/10/2021,"Las Vegas, Nevada, USA",USA,Red,...,,,,,550.0,700.0,575.0,2200.0,165.0,150.0
1,Gilbert Burns,Stephen Thompson,130,-162,130.000000,61.728395,7/10/2021,"Las Vegas, Nevada, USA",USA,Red,...,,,,,400.0,150.0,475.0,2500.0,525.0,250.0
2,Tai Tuivasa,Greg Hardy,-130,105,76.923077,105.000000,7/10/2021,"Las Vegas, Nevada, USA",USA,Red,...,,,,,350.0,350.0,1600.0,1800.0,160.0,265.0
3,Irene Aldana,Yana Kunitskaya,-107,-115,93.457944,86.956522,7/10/2021,"Las Vegas, Nevada, USA",USA,Red,...,,,,,160.0,150.0,1200.0,1200.0,425.0,800.0
4,Sean O'Malley,Kris Moutinho,-1000,580,10.000000,580.000000,7/10/2021,"Las Vegas, Nevada, USA",USA,Red,...,,,,,400.0,1400.0,800.0,1600.0,-300.0,1200.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4778,Duane Ludwig,Darren Elkins,-155,135,64.516129,135.000000,3/21/2010,"Broomfield, Colorado, USA",USA,Blue,...,0.0,0.0,0.0,0.0,,,,,,
4779,John Howard,Daniel Roberts,-210,175,47.619048,175.000000,3/21/2010,"Broomfield, Colorado, USA",USA,Red,...,0.0,1.0,0.0,1.0,,,,,,
4780,Brendan Schaub,Chase Gormley,-260,220,38.461538,220.000000,3/21/2010,"Broomfield, Colorado, USA",USA,Red,...,0.0,0.0,0.0,0.0,,,,,,
4781,Mike Pierce,Julio Paulino,-420,335,23.809524,335.000000,3/21/2010,"Broomfield, Colorado, USA",USA,Red,...,6.0,0.0,0.0,0.0,,,,,,


In [5]:
vars_of_interest = ['streak_dif','win_streak_dif','longest_win_streak_dif','win_dif','loss_dif','total_round_dif','total_title_bout_dif','ko_dif','sub_dif','height_dif','reach_dif','age_dif','sig_str_dif','avg_sub_att_dif','avg_td_dif']

df_dif = df.filter(vars_of_interest, axis=1)

df_dif

Unnamed: 0,win_streak_dif,longest_win_streak_dif,win_dif,loss_dif,total_round_dif,total_title_bout_dif,ko_dif,sub_dif,height_dif,reach_dif,age_dif,sig_str_dif,avg_sub_att_dif,avg_td_dif
0,-2,2,-10,-3,-37,2,-2,-3,0.00,5.08,0,-0.300000,-1.100000,-0.830000
1,2,1,-1,0,9,1,2,-4,5.08,10.16,4,0.940000,-0.600000,-1.860000
2,-2,-1,1,0,5,0,1,0,7.62,12.70,4,0.360000,0.000000,0.200000
3,2,-1,-1,-2,-11,1,-1,-1,-7.62,0.00,-2,-1.330000,0.100000,1.310000
4,-1,-5,-6,-1,-13,0,-4,0,-10.16,-10.16,2,-3.440000,-0.600000,-0.630000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4778,0,-2,-2,1,-5,0,-1,0,0.00,2.54,6,-13.666667,0.000000,0.000000
4779,-3,-3,-3,0,-9,0,-1,0,7.62,7.62,-2,-18.000000,-1.000000,-4.666667
4780,0,0,0,0,0,-1,0,0,-2.54,-2.12,0,-4.000000,1.000000,1.000000
4781,0,-1,-1,1,-6,0,0,0,10.16,7.62,-5,-40.500000,0.000000,-3.500000


In [33]:
y = df['Winner'].apply(lambda x: x=='Blue' and 1 or 0)
y.name = 'B_win'

In [40]:
X_train, X_test, y_train, y_test = train_test_split(df_dif, y, test_size=0.33,
                                                    random_state=42)

In [36]:
X_train = sm.add_constant(X_train)
sm_logit_model = sm.Logit(y_train, X_train).fit(disp=0)
sm_logit_model.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,B_win,No. Observations:,3204.0
Model:,Logit,Df Residuals:,3189.0
Method:,MLE,Df Model:,14.0
Date:,"Sat, 21 Aug 2021",Pseudo R-squ.:,0.02632
Time:,14:46:05,Log-Likelihood:,-2096.5
converged:,True,LL-Null:,-2153.2
Covariance Type:,nonrobust,LLR p-value:,1.249e-17

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.4103,0.040,-10.249,0.000,-0.489,-0.332
win_streak_dif,0.0442,0.024,1.864,0.062,-0.002,0.091
longest_win_streak_dif,0.0614,0.029,2.109,0.035,0.004,0.118
win_dif,-0.0366,0.026,-1.397,0.162,-0.088,0.015
loss_dif,0.0043,0.015,0.280,0.780,-0.026,0.034
total_round_dif,-0.0020,0.004,-0.496,0.620,-0.010,0.006
total_title_bout_dif,-0.0119,0.025,-0.475,0.635,-0.061,0.037
ko_dif,0.0170,0.028,0.610,0.542,-0.038,0.071
sub_dif,0.0286,0.030,0.956,0.339,-0.030,0.087


In [26]:
y_train = y_train.reset_index(drop=True)
sm_yhat = sm_logit_model.predict(X_train)
sm_yhat = pd.Series(sm_yhat).reset_index(drop=True)

sm_train_preds = pd.concat([y_train, sm_yhat], axis=1)
sm_train_preds.columns = ['True Result', 'Predicted Result']
sm_train_preds['Subset'] = 'Training'

sm_train_preds

Unnamed: 0,True Result,Predicted Result,Subset
0,0,0.374246,Training
1,0,0.336623,Training
2,1,0.379847,Training
3,1,0.407528,Training
4,1,0.401227,Training
...,...,...,...
3199,0,0.648163,Training
3200,0,0.349002,Training
3201,0,0.407474,Training
3202,0,0.395481,Training


In [41]:
X_train, X_test, y_train, y_test = train_test_split(df_dif, y, test_size=0.33,
                                                    random_state=5)

In [48]:
y_train = y_train.reset_index(drop=True)


skl_logit_model = LogisticRegression(random_state=5).fit(X_train, y_train)
skl_yhat = skl_logit_model.predict(X_train)
skl_yhat = pd.Series(skl_yhat).reset_index(drop=True)

skl_train_preds = pd.concat([y_train, skl_yhat], axis=1)
skl_train_preds.columns = ['True Result', 'Predicted Result']
skl_train_preds['Subset'] = 'Training'

skl_train_preds

Unnamed: 0,True Result,Predicted Result,Subset
0,0,0,Training
1,0,0,Training
2,1,0,Training
3,1,0,Training
4,1,0,Training
...,...,...,...
3199,0,1,Training
3200,0,0,Training
3201,0,0,Training
3202,0,0,Training


In [54]:
skl_logit_model.score(X_test, y_test)

0.5794806839772008

In [56]:
# Read in upcomign event data

df_upcoming = pd.read_csv('data//upcoming-event-8.21.21.csv')

df_upcoming

Unnamed: 0,R_fighter,B_fighter,R_odds,B_odds,R_ev,B_ev,date,location,country,Winner,...,finish_details,finish_round,finish_round_time,total_fight_time_secs,r_dec_odds,b_dec_odds,r_sub_odds,b_sub_odds,r_ko_odds,b_ko_odds
0,Jared Cannonier,Kelvin Gastelum,-150.0,130.0,66.666667,130.0,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,180.0,200.0,2200.0,1400.0,275.0,475.0
1,Clay Guida,Mark Madsen,145.0,-165.0,145.0,60.606061,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,260.0,125.0,675.0,525.0,725.0,825.0
2,Parker Porter,Chase Sherman,160.0,-190.0,160.0,52.631579,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,425.0,290.0,550.0,1800.0,700.0,115.0
3,Mana Martinez,Trevin Jones,,,,,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,,,,,,
4,Vinc Pichel,Austin Hubbard,-115.0,-105.0,86.956522,95.238095,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,150.0,195.0,575.0,1000.0,1100.0,450.0
5,Alexandre Pantoja,Brandon Royval,-165.0,145.0,60.606061,145.0,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,165.0,300.0,575.0,700.0,375.0,700.0
6,Austin Lingo,Luis Saldana,105.0,-125.0,105.0,80.0,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,320.0,210.0,1200.0,550.0,400.0,300.0
7,Brian Kelleher,Domingo Pilarte,-180.0,155.0,55.555556,155.0,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,350.0,380.0,,,,
8,Bea Malecki,Josiane Nunes,-155.0,135.0,64.516129,135.0,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,165.0,315.0,800.0,900.0,350.0,425.0
9,William Knight,Fabio Cherant,-190.0,160.0,52.631579,160.0,2021-08-21,"Las Vegas, Nevada, USA",USA,,...,,,,,350.0,475.0,750.0,675.0,125.0,550.0


In [57]:
df_upcoming_dif = df_upcoming.filter(vars_of_interest, axis=1)

df_upcoming_dif

Unnamed: 0,win_streak_dif,longest_win_streak_dif,win_dif,loss_dif,total_round_dif,total_title_bout_dif,ko_dif,sub_dif,height_dif,reach_dif,age_dif,sig_str_dif,avg_sub_att_dif,avg_td_dif
0,0,2,5,2,29,1,-2,2,-5.08,-15.24,-8,-0.18,0.1,1.0
1,1,-2,-16,-15,-86,0,-1,-4,2.54,5.08,-3,-0.16,-0.7,4.97
2,-1,1,2,5,18,0,2,0,10.16,7.62,-5,-1.69,-1.1,-1.65
3,1,1,1,-1,3,0,1,0,-7.62,0.0,6,1.73,-6.3,1.17
4,-1,-3,-3,1,-4,0,0,0,0.0,-2.54,-9,0.46,-0.3,-3.02
5,-1,-1,-5,-2,-19,0,-2,0,10.16,2.54,-2,-0.53,1.5,-0.15
6,1,1,1,-1,0,0,1,0,2.54,7.62,3,0.82,0.6,-1.0
7,0,-1,-5,-4,-16,0,-2,-2,15.24,17.78,-4,-1.88,0.6,1.0
8,-2,-2,-2,0,-5,0,0,-1,-17.78,-17.78,-2,-6.78,-1.4,0.0
9,0,-3,-3,1,-7,0,-2,0,7.62,0.0,-7,-2.56,-0.3,-2.56


In [58]:
upcoming_preds = skl_logit_model.predict(df_upcoming_dif)

upcoming_preds

array([0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)