In [64]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import log_loss, make_scorer
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn import preprocessing

In [101]:
pd.set_option('display.max_columns', None)

In [102]:
df = pd.read_csv('data/data.csv')

In [103]:
df["year"] = df["date"].apply(lambda x: x[:-6])
df["month"] = df["date"].apply(lambda x: x[5:-3])

In [104]:
df.groupby(["year","month"])["year"].count()

year  month
1993  11        8
1994  03       15
      09        6
      12        8
1995  04        9
      07        8
      09        9
      12        7
1996  02        8
      05        7
      07        8
      09        6
      12        7
1997  02        7
      05        9
      07        9
      10        7
      12        7
1998  03        6
      05        8
      10        6
1999  01        7
      03        7
      05        7
      07        7
      09        8
      11        7
2000  03        7
      04        6
      06        6
               ..
2017  01       24
      02       33
      03       34
      04       39
      05       24
      06       43
      07       58
      08       12
      09       43
      10       34
      11       49
      12       58
2018  01       34
      02       47
      03       22
      04       34
      05       37
      06       38
      07       62
      08       25
      09       39
      10       25
      11       61
      12       5

In [105]:
df.groupby("weight_class")["weight_class"].count()

weight_class
Bantamweight             379
Catch Weight              38
Featherweight            442
Flyweight                187
Heavyweight              507
Light Heavyweight        502
Lightweight              989
Middleweight             725
Open Weight               92
Welterweight             969
Women's Bantamweight     111
Women's Featherweight     10
Women's Flyweight         50
Women's Strawweight      143
Name: weight_class, dtype: int64

In [106]:
df.groupby("R_fighter")["R_fighter"].count()

R_fighter
Aaron Phillips            1
Aaron Riley               4
Aaron Rosa                1
Aaron Simpson             8
Abdul Razak Alhassan      2
Abel Trujillo             4
Adam Milstead             2
Adam Wieczorek            1
Adam Yandiev              1
Adriano Martins           3
Aiemann Zahabi            3
Aisling Daly              2
Akbarh Arreola            1
Akihiro Gono              1
Akira Corassani           4
Al Iaquinta               6
Alan Belcher              7
Alan Jouban               7
Alan Omer                 2
Alan Patrick              4
Albert Cheng              1
Albert Morales            3
Albert Tumenov            2
Alberto Mina              2
Alberto Uda               1
Alejandro Perez           6
Aleksei Kunchenko         1
Aleksei Oleinik           5
Alessio Di Chirico        3
Alessio Sakara            9
                         ..
William Patolino          4
Wilson Gouveia            6
Wilson Reis               6
Wu Yanan                  2
Wuliji Bur

In [109]:
df[df["R_fighter"] == "Aaron Phillips"]

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_KD,B_avg_LEG_att,B_avg_LEG_landed,B_avg_PASS,B_avg_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_SIG_STR_pct,B_avg_SUB_ATT,B_avg_TD_att,B_avg_TD_landed,B_avg_TD_pct,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_longest_win_streak,B_losses,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_opp_KD,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_opp_PASS,B_avg_opp_REV,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_opp_SIG_STR_pct,B_avg_opp_SUB_ATT,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_opp_TD_pct,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_total_rounds_fought,B_total_time_fought(seconds),B_total_title_bouts,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_wins,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_current_lose_streak,R_current_win_streak,R_draw,R_avg_BODY_att,R_avg_BODY_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_KD,R_avg_LEG_att,R_avg_LEG_landed,R_avg_PASS,R_avg_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_SIG_STR_pct,R_avg_SUB_ATT,R_avg_TD_att,R_avg_TD_landed,R_avg_TD_pct,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_longest_win_streak,R_losses,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_opp_KD,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_opp_PASS,R_avg_opp_REV,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_opp_SIG_STR_pct,R_avg_opp_SUB_ATT,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_opp_TD_pct,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_total_rounds_fought,R_total_time_fought(seconds),R_total_title_bouts,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_wins,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,year,month
2297,Aaron Phillips,Matt Hobar,Kevin Nix,2014-08-23,"Tulsa, Oklahoma, USA",Blue,False,Bantamweight,3,1.0,0.0,0.0,2.0,1.0,2.0,2.0,25.0,5.0,0.0,0.0,21.0,2.0,0.0,4.0,4.0,0.0,0.0,27.0,7.0,0.25,0.0,0.0,0.0,0.0,27.0,7.0,0.0,1.0,4.0,1.0,2.0,2.0,35.0,16.0,6.0,6.0,39.0,23.0,0.0,0.0,0.0,0.0,0.0,43.0,24.0,0.55,1.0,2.0,2.0,1.0,46.0,27.0,1.0,167.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Southpaw,177.8,177.8,135.0,1.0,0.0,0.0,14.0,12.0,6.0,3.0,26.0,9.0,8.0,6.0,23.0,5.0,0.0,3.0,1.0,1.0,1.0,40.0,18.0,0.45,1.0,0.0,0.0,0.0,137.0,109.0,0.0,1.0,13.0,8.0,6.0,4.0,31.0,12.0,31.0,21.0,53.0,28.0,0.0,2.0,1.0,7.0,1.0,68.0,37.0,0.54,1.0,8.0,5.0,0.62,129.0,95.0,3.0,900.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Southpaw,175.26,177.8,135.0,27.0,25.0,2014,8


In [110]:
df[df["R_fighter"] == "Matt Hobar"]

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_KD,B_avg_LEG_att,B_avg_LEG_landed,B_avg_PASS,B_avg_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_SIG_STR_pct,B_avg_SUB_ATT,B_avg_TD_att,B_avg_TD_landed,B_avg_TD_pct,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_longest_win_streak,B_losses,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_opp_KD,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_opp_PASS,B_avg_opp_REV,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_opp_SIG_STR_pct,B_avg_opp_SUB_ATT,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_opp_TD_pct,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_total_rounds_fought,B_total_time_fought(seconds),B_total_title_bouts,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_wins,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_current_lose_streak,R_current_win_streak,R_draw,R_avg_BODY_att,R_avg_BODY_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_KD,R_avg_LEG_att,R_avg_LEG_landed,R_avg_PASS,R_avg_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_SIG_STR_pct,R_avg_SUB_ATT,R_avg_TD_att,R_avg_TD_landed,R_avg_TD_pct,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_longest_win_streak,R_losses,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_opp_KD,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_opp_PASS,R_avg_opp_REV,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_opp_SIG_STR_pct,R_avg_opp_SUB_ATT,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_opp_TD_pct,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_total_rounds_fought,R_total_time_fought(seconds),R_total_title_bouts,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_wins,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,year,month


In [113]:
list(df.columns)


['R_fighter',
 'B_fighter',
 'Referee',
 'date',
 'location',
 'Winner',
 'title_bout',
 'weight_class',
 'no_of_rounds',
 'B_current_lose_streak',
 'B_current_win_streak',
 'B_draw',
 'B_avg_BODY_att',
 'B_avg_BODY_landed',
 'B_avg_CLINCH_att',
 'B_avg_CLINCH_landed',
 'B_avg_DISTANCE_att',
 'B_avg_DISTANCE_landed',
 'B_avg_GROUND_att',
 'B_avg_GROUND_landed',
 'B_avg_HEAD_att',
 'B_avg_HEAD_landed',
 'B_avg_KD',
 'B_avg_LEG_att',
 'B_avg_LEG_landed',
 'B_avg_PASS',
 'B_avg_REV',
 'B_avg_SIG_STR_att',
 'B_avg_SIG_STR_landed',
 'B_avg_SIG_STR_pct',
 'B_avg_SUB_ATT',
 'B_avg_TD_att',
 'B_avg_TD_landed',
 'B_avg_TD_pct',
 'B_avg_TOTAL_STR_att',
 'B_avg_TOTAL_STR_landed',
 'B_longest_win_streak',
 'B_losses',
 'B_avg_opp_BODY_att',
 'B_avg_opp_BODY_landed',
 'B_avg_opp_CLINCH_att',
 'B_avg_opp_CLINCH_landed',
 'B_avg_opp_DISTANCE_att',
 'B_avg_opp_DISTANCE_landed',
 'B_avg_opp_GROUND_att',
 'B_avg_opp_GROUND_landed',
 'B_avg_opp_HEAD_att',
 'B_avg_opp_HEAD_landed',
 'B_avg_opp_KD',
 'B_av

In [114]:
df.head()

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_KD,B_avg_LEG_att,B_avg_LEG_landed,B_avg_PASS,B_avg_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_SIG_STR_pct,B_avg_SUB_ATT,B_avg_TD_att,B_avg_TD_landed,B_avg_TD_pct,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_longest_win_streak,B_losses,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_opp_KD,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_opp_PASS,B_avg_opp_REV,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_opp_SIG_STR_pct,B_avg_opp_SUB_ATT,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_opp_TD_pct,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_total_rounds_fought,B_total_time_fought(seconds),B_total_title_bouts,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_wins,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_current_lose_streak,R_current_win_streak,R_draw,R_avg_BODY_att,R_avg_BODY_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_KD,R_avg_LEG_att,R_avg_LEG_landed,R_avg_PASS,R_avg_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_SIG_STR_pct,R_avg_SUB_ATT,R_avg_TD_att,R_avg_TD_landed,R_avg_TD_pct,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_longest_win_streak,R_losses,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_opp_KD,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_opp_PASS,R_avg_opp_REV,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_opp_SIG_STR_pct,R_avg_opp_SUB_ATT,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_opp_TD_pct,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_total_rounds_fought,R_total_time_fought(seconds),R_total_title_bouts,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_wins,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,year,month
0,Henry Cejudo,Marlon Moraes,Marc Goddard,2019-06-08,"Chicago, Illinois, USA",Red,True,Bantamweight,5,0.0,4.0,0.0,9.2,6.0,0.2,0.0,62.6,20.6,2.6,2.0,48.6,11.2,0.8,7.6,5.4,0.4,0.0,65.4,22.6,0.466,0.4,0.8,0.2,0.1,66.4,23.6,4.0,1.0,6.4,4.0,1.0,0.6,51.2,17.4,0.6,0.2,39.6,9.4,0.2,6.8,4.8,0.0,0.0,52.8,18.2,0.236,0.0,1.0,0.4,0.1,53.8,19.2,9.0,419.4,0.0,0.0,1.0,0.0,2.0,1.0,0.0,4.0,Orthodox,167.64,170.18,135.0,0.0,4.0,0.0,21.9,16.4,17.0,11.0,75.0,26.5,9.4,6.5,74.2,23.9,0.4,5.3,3.7,1.2,0.0,101.4,44.0,0.466,0.1,5.3,1.9,0.458,129.9,69.1,4.0,2.0,13.3,8.8,7.5,5.1,90.5,26.8,0.8,0.3,76.1,17.3,0.1,9.4,6.1,0.0,0.0,98.8,32.2,0.336,0.0,0.9,0.1,0.05,110.5,43.3,27.0,742.6,3.0,0.0,2.0,4.0,2.0,0.0,0.0,8.0,Orthodox,162.56,162.56,135.0,31.0,32.0,2019,6
1,Valentina Shevchenko,Jessica Eye,Robert Madrigal,2019-06-08,"Chicago, Illinois, USA",Red,True,Women's Flyweight,5,0.0,3.0,0.0,14.6,9.1,11.8,7.3,124.7,42.1,2.4,1.9,112.0,32.0,0.0,12.3,10.2,0.8,0.0,138.9,51.3,0.399,0.7,1.0,0.5,0.225,158.7,69.6,3.0,6.0,13.0,9.3,12.8,9.6,101.7,32.0,8.1,6.9,97.7,30.8,0.1,11.9,8.4,1.4,0.0,122.6,48.5,0.408,0.7,2.3,0.9,0.231,151.5,75.4,29.0,849.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0,4.0,Orthodox,167.64,167.64,125.0,0.0,2.0,0.0,12.0,7.714286,9.285714,6.857143,88.142857,36.142857,18.428571,16.428571,84.571429,37.0,0.0,19.285714,14.714286,1.714286,0.142857,115.857143,59.428571,0.575714,0.428571,5.142857,2.428571,0.601429,161.571429,102.857143,2.0,2.0,24.571429,14.142857,10.571429,7.857143,98.571429,32.571429,6.428571,4.285714,61.857143,12.428571,0.0,29.142857,18.142857,1.142857,0.0,115.571429,44.714286,0.437143,0.285714,3.285714,0.857143,0.147143,158.142857,82.285714,25.0,1062.0,2.0,0.0,1.0,2.0,0.0,2.0,0.0,5.0,Southpaw,165.1,167.64,125.0,32.0,31.0,2019,6
2,Tony Ferguson,Donald Cerrone,Dan Miragliotta,2019-06-08,"Chicago, Illinois, USA",Red,False,Lightweight,3,0.0,3.0,0.0,15.354839,11.322581,6.741935,4.387097,84.741935,38.580645,5.516129,3.806452,67.645161,23.258065,0.645161,14.0,12.193548,0.935484,0.096774,97.0,46.774194,0.496129,0.354839,2.16129,0.677419,0.295484,103.709677,52.548387,8.0,8.0,17.903226,11.870968,8.419355,5.83871,84.548387,38.064516,1.741935,0.935484,67.645161,25.483871,0.225806,9.16129,7.483871,0.032258,0.032258,94.709677,44.83871,0.453226,0.096774,2.096774,0.225806,0.063548,100.387097,49.774194,68.0,581.870968,1.0,0.0,0.0,7.0,10.0,6.0,0.0,23.0,Orthodox,185.42,185.42,155.0,0.0,11.0,0.0,13.866667,8.666667,2.866667,1.733333,116.133333,49.466667,5.333333,4.266667,96.733333,35.6,0.2,13.733333,11.2,0.333333,0.133333,124.333333,55.466667,0.43,1.0,0.933333,0.4,0.277333,133.0,63.4,11.0,1.0,14.466667,8.133333,2.8,0.733333,91.066667,32.2,4.866667,2.8,78.266667,23.2,0.266667,6.0,4.4,0.333333,0.133333,98.733333,35.733333,0.34,0.066667,2.866667,0.666667,0.131333,102.133333,38.6,33.0,604.4,2.0,0.0,1.0,3.0,3.0,6.0,1.0,14.0,Orthodox,180.34,193.04,155.0,36.0,35.0,2019,6
3,Jimmie Rivera,Petr Yan,Kevin MacDonald,2019-06-08,"Chicago, Illinois, USA",Blue,False,Bantamweight,3,0.0,4.0,0.0,17.0,14.0,13.75,11.0,109.5,48.75,13.0,10.5,116.25,53.75,0.5,3.0,2.5,0.5,0.25,136.25,70.25,0.55,0.25,2.5,1.25,0.2875,154.75,86.75,4.0,0.0,12.25,6.0,6.0,3.75,94.25,26.75,1.75,1.25,82.5,21.5,0.25,7.25,4.25,0.0,0.0,102.0,31.75,0.3375,0.0,4.5,0.75,0.0975,104.75,34.25,9.0,652.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,4.0,Switch,170.18,170.18,135.0,1.0,0.0,0.0,18.25,10.25,5.875,4.125,104.875,41.0,1.0,0.625,80.5,24.0,0.375,13.0,11.5,0.125,0.0,111.75,45.75,0.36625,0.0,2.25,0.625,0.10375,117.375,50.75,5.0,2.0,20.25,13.375,6.875,5.625,103.125,38.5,0.875,0.75,77.375,20.375,0.125,13.25,11.125,0.0,0.0,110.875,44.875,0.44625,0.0,2.375,0.0,0.0,115.125,48.875,20.0,690.25,0.0,0.0,1.0,4.0,1.0,0.0,0.0,6.0,Orthodox,162.56,172.72,135.0,26.0,29.0,2019,6
4,Tai Tuivasa,Blagoy Ivanov,Dan Miragliotta,2019-06-08,"Chicago, Illinois, USA",Blue,False,Heavyweight,3,0.0,1.0,0.0,17.0,14.5,2.5,2.0,201.0,59.5,0.0,0.0,184.5,45.0,0.0,2.0,2.0,0.0,0.0,203.5,61.5,0.31,0.0,0.0,0.0,0.0,204.0,62.0,1.0,1.0,42.5,23.5,0.5,0.5,205.0,89.5,0.0,0.0,152.5,56.5,0.0,10.5,10.0,0.0,0.0,205.5,90.0,0.43,0.0,0.5,0.0,0.0,205.5,90.0,8.0,1200.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,Southpaw,180.34,185.42,250.0,1.0,0.0,0.0,7.75,6.75,11.0,7.25,50.75,24.75,0.5,0.5,50.75,22.75,0.5,3.75,3.0,0.25,0.0,62.25,32.5,0.545,0.0,0.5,0.0,0.0,63.5,32.75,3.0,1.0,6.25,4.75,4.5,3.5,42.75,16.25,7.75,2.75,43.25,14.0,0.25,5.5,3.75,0.75,0.0,55.0,22.5,0.3975,0.0,1.0,0.0,0.0,60.5,27.75,7.0,440.75,0.0,0.0,0.0,1.0,2.0,0.0,0.0,3.0,Southpaw,187.96,190.5,264.0,32.0,26.0,2019,6


In [6]:
cols = list(df.columns)
cols

['R_fighter',
 'B_fighter',
 'Referee',
 'date',
 'location',
 'Winner',
 'title_bout',
 'weight_class',
 'no_of_rounds',
 'B_current_lose_streak',
 'B_current_win_streak',
 'B_draw',
 'B_avg_BODY_att',
 'B_avg_BODY_landed',
 'B_avg_CLINCH_att',
 'B_avg_CLINCH_landed',
 'B_avg_DISTANCE_att',
 'B_avg_DISTANCE_landed',
 'B_avg_GROUND_att',
 'B_avg_GROUND_landed',
 'B_avg_HEAD_att',
 'B_avg_HEAD_landed',
 'B_avg_KD',
 'B_avg_LEG_att',
 'B_avg_LEG_landed',
 'B_avg_PASS',
 'B_avg_REV',
 'B_avg_SIG_STR_att',
 'B_avg_SIG_STR_landed',
 'B_avg_SIG_STR_pct',
 'B_avg_SUB_ATT',
 'B_avg_TD_att',
 'B_avg_TD_landed',
 'B_avg_TD_pct',
 'B_avg_TOTAL_STR_att',
 'B_avg_TOTAL_STR_landed',
 'B_longest_win_streak',
 'B_losses',
 'B_avg_opp_BODY_att',
 'B_avg_opp_BODY_landed',
 'B_avg_opp_CLINCH_att',
 'B_avg_opp_CLINCH_landed',
 'B_avg_opp_DISTANCE_att',
 'B_avg_opp_DISTANCE_landed',
 'B_avg_opp_GROUND_att',
 'B_avg_opp_GROUND_landed',
 'B_avg_opp_HEAD_att',
 'B_avg_opp_HEAD_landed',
 'B_avg_opp_KD',
 'B_av

# Modeling Test
-Define X, y

-Split

-Model

-Evaluate

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 0)

In [None]:
clf = RandomForestClassifier()
clf.fit(X_train,y_train)

In [None]:
clf.score(X_test,y_test)