# UFC Fight Predction Project (Michael and Shayan)

### Importing and Cleaning the Data

In [28]:
import pandas as pd                  # Pandas
import numpy as np                   # Numpy
from matplotlib import pyplot as plt # Matplotlib

# Package to implement Random Forest
import sklearn
from sklearn.ensemble import RandomForestClassifier

# Package to implement Grid Search Cross Validation
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import KFold 

# Package for generating confusion matrix
from sklearn.metrics import plot_confusion_matrix

# Package for generating classification report
from sklearn.metrics import classification_report

# Package to record time
import time

# Package for Data pretty printer
from pprint import pprint

%matplotlib inline

In [29]:
ufc_df = pd.read_csv('ufc-master.csv')
ufc_df.head()

Unnamed: 0,R_fighter,B_fighter,R_odds,B_odds,R_ev,B_ev,date,location,country,Winner,...,finish_details,finish_round,finish_round_time,total_fight_time_secs,r_dec_odds,b_dec_odds,r_sub_odds,b_sub_odds,r_ko_odds,b_ko_odds
0,Thiago Santos,Johnny Walker,-150.0,130,66.666667,130.0,2021-10-02,"Las Vegas, Nevada, USA",USA,Red,...,,5.0,5:00,1500.0,800.0,900.0,2000.0,1600.0,-110.0,175.0
1,Alex Oliveira,Niko Price,170.0,-200,170.0,50.0,2021-10-02,"Las Vegas, Nevada, USA",USA,Blue,...,,3.0,5:00,900.0,450.0,350.0,700.0,1100.0,550.0,120.0
2,Misha Cirkunov,Krzysztof Jotko,110.0,-130,110.0,76.923077,2021-10-02,"Las Vegas, Nevada, USA",USA,Blue,...,,3.0,5:00,900.0,550.0,275.0,275.0,1400.0,600.0,185.0
3,Alexander Hernandez,Mike Breeden,-675.0,475,14.814815,475.0,2021-10-02,"Las Vegas, Nevada, USA",USA,Red,...,Punch,1.0,1:20,80.0,175.0,900.0,500.0,3500.0,110.0,1100.0
4,Joe Solecki,Jared Gordon,-135.0,115,74.074074,115.0,2021-10-02,"Las Vegas, Nevada, USA",USA,Blue,...,,3.0,5:00,900.0,165.0,200.0,400.0,1200.0,900.0,600.0


In [30]:
#Dropping all columns that are either strings or unneccessary for analysis, and include information about the fight
# that happended after the fact (like finish, time etc.) which won't matter if we are trying to predict in the future
#if we are only focused on moneyline betting odds (winner and loser)

ufc_df = ufc_df.drop(['R_fighter','B_fighter','date','location','country','gender','finish','finish_details','empty_arena','constant_1','finish_round','finish_round_time','total_fight_time_secs'], axis = 1)
ufc_df = ufc_df.drop(['B_match_weightclass_rank','R_match_weightclass_rank',"R_Women's Flyweight_rank","R_Women's Featherweight_rank","R_Women's Strawweight_rank","R_Women's Bantamweight_rank",'R_Heavyweight_rank','R_Light Heavyweight_rank','R_Middleweight_rank','R_Welterweight_rank','R_Lightweight_rank','R_Featherweight_rank','R_Bantamweight_rank','R_Flyweight_rank','R_Pound-for-Pound_rank'], axis = 1)
ufc_df = ufc_df.drop(["B_Women's Flyweight_rank","B_Women's Featherweight_rank","B_Women's Strawweight_rank","B_Women's Bantamweight_rank",'B_Heavyweight_rank','B_Light Heavyweight_rank','B_Middleweight_rank','B_Welterweight_rank','B_Lightweight_rank','B_Featherweight_rank','B_Bantamweight_rank','B_Flyweight_rank','B_Pound-for-Pound_rank'], axis = 1)                     
                     

ufc_df


Unnamed: 0,R_odds,B_odds,R_ev,B_ev,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,...,sig_str_dif,avg_sub_att_dif,avg_td_dif,better_rank,r_dec_odds,b_dec_odds,r_sub_odds,b_sub_odds,r_ko_odds,b_ko_odds
0,-150.0,130,66.666667,130.000000,Red,False,Light Heavyweight,5,0,1,...,-0.530000,0.600000,-0.370000,Red,800.0,900.0,2000.0,1600.0,-110.0,175.0
1,170.0,-200,170.000000,50.000000,Blue,False,Welterweight,3,2,0,...,2.190000,0.300000,-1.480000,neither,450.0,350.0,700.0,1100.0,550.0,120.0
2,110.0,-130,110.000000,76.923077,Blue,False,Middleweight,3,1,0,...,-0.850000,-1.600000,-3.330000,neither,550.0,275.0,275.0,1400.0,600.0,185.0
3,-675.0,475,14.814815,475.000000,Red,False,Lightweight,3,1,0,...,0.250000,0.000000,-1.570000,neither,175.0,900.0,500.0,3500.0,110.0,1100.0
4,-135.0,115,74.074074,115.000000,Blue,False,Lightweight,3,0,2,...,2.580000,-0.600000,-0.310000,neither,165.0,200.0,400.0,1200.0,900.0,600.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4891,-155.0,135,64.516129,135.000000,Blue,False,Lightweight,3,0,0,...,-13.666667,0.000000,0.000000,neither,,,,,,
4892,-210.0,175,47.619048,175.000000,Red,False,Welterweight,3,0,0,...,-18.000000,-1.000000,-4.666667,neither,,,,,,
4893,-260.0,220,38.461538,220.000000,Red,False,Heavyweight,3,1,0,...,-4.000000,1.000000,1.000000,neither,,,,,,
4894,-420.0,335,23.809524,335.000000,Red,False,Welterweight,3,0,0,...,-40.500000,0.000000,-3.500000,neither,,,,,,


In [31]:
ufc_df['weight_class'].value_counts()

Lightweight              855
Welterweight             830
Middleweight             593
Featherweight            566
Bantamweight             493
Light Heavyweight        394
Heavyweight              385
Flyweight                235
Women's Strawweight      203
Women's Bantamweight     156
Women's Flyweight        136
Catch Weight              33
Women's Featherweight     17
Name: weight_class, dtype: int64

In [32]:
ufc_df['Winner'].replace(['Red', 'Blue'], [0, 1], inplace=True)
ufc_df['B_Stance'].replace(['Orthodox', 'Southpaw','Switch','Switch '], [0, 1, 2, 2], inplace=True)
ufc_df['R_Stance'].replace(['Orthodox', 'Southpaw','Switch','Switch '], [0, 1, 2, 2], inplace=True)
ufc_df['better_rank'].replace(['Red', 'Blue', 'neither'], [0, 1,2], inplace=True)
ufc_df['weight_class'].replace(['Flyweight', 'Bantamweight', 'Featherweight','Lightweight','Welterweight','Middleweight','Light Heavyweight','Heavyweight',"Women's Strawweight","Women's Bantamweight","Women's Flyweight","Catch Weight","Women's Featherweight"], [0, 1,2,3,4,5,6,7,8,9,10,11,12], inplace=True)


ufc_df = ufc_df.dropna()

In [33]:
#SELECT THE WEIGHTCLASS
#weightclass = ['Heavyweight']
#ufc_df = ufc_df[ufc_df['weight_class'].isin(weightclass)]
#ufc_df = ufc_df.drop(['weight_class'], axis = 1)
ufc_df


Unnamed: 0,R_odds,B_odds,R_ev,B_ev,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,...,sig_str_dif,avg_sub_att_dif,avg_td_dif,better_rank,r_dec_odds,b_dec_odds,r_sub_odds,b_sub_odds,r_ko_odds,b_ko_odds
0,-150.0,130,66.666667,130.000000,0,False,6,5,0,1,...,-0.530000,0.600000,-0.370000,0,800.0,900.0,2000.0,1600.0,-110.0,175.0
1,170.0,-200,170.000000,50.000000,1,False,4,3,2,0,...,2.190000,0.300000,-1.480000,2,450.0,350.0,700.0,1100.0,550.0,120.0
2,110.0,-130,110.000000,76.923077,1,False,5,3,1,0,...,-0.850000,-1.600000,-3.330000,2,550.0,275.0,275.0,1400.0,600.0,185.0
3,-675.0,475,14.814815,475.000000,0,False,3,3,1,0,...,0.250000,0.000000,-1.570000,2,175.0,900.0,500.0,3500.0,110.0,1100.0
4,-135.0,115,74.074074,115.000000,1,False,3,3,0,2,...,2.580000,-0.600000,-0.310000,2,165.0,200.0,400.0,1200.0,900.0,600.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4275,130.0,-140,130.000000,71.428571,0,False,6,3,1,0,...,63.333333,1.833333,0.166667,2,378.0,411.0,600.0,1640.0,450.0,150.0
4283,150.0,-160,150.000000,62.500000,0,False,3,5,0,1,...,-20.733333,1.050000,0.650000,2,309.0,135.0,475.0,485.0,1330.0,632.0
4284,123.0,-133,123.000000,75.187970,1,False,4,3,0,3,...,9.150000,0.275000,0.950000,2,240.0,200.0,1275.0,1615.0,575.0,265.0
4285,-210.0,190,47.619048,190.000000,1,False,5,3,0,3,...,12.527778,-1.027778,-1.111111,2,325.0,665.0,105.0,940.0,1195.0,460.0


In [34]:
ufc_df.to_csv('ALL_Weightclasses.csv')