In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Objectives: find the below information
- The chances of fights finishing via decision/knockout/submission
- The chances of getting knocked out or submitted round by round
- Where most submissions happen (neck, arms or legs)
- What the most successful finish of all time in the UFC is
- What the current trends are for submissions and knock out finishes
- Fighters performance by age
- Analysis of individual fighter performance
- Analysis of overall MMA 

In [3]:
fighters_df = pd.read_csv("../datasets/01-ufc-fight/fighters.csv")
matches_df = pd.read_csv("../datasets/01-ufc-fight/matches.csv", )

print(fighters_df.shape)
print(matches_df.shape)

(3313, 6)
(5144, 145)


In [4]:
fighters_df.head()

Unnamed: 0,fighter_name,Height,Weight,Reach,Stance,DOB
0,AJ Fonseca,"5' 4""",145 lbs.,,,
1,AJ Matthews,"5' 11""",185 lbs.,,,
2,AJ McKee,"5' 10""",145 lbs.,,,
3,AJ Siscoe,"5' 7""",135 lbs.,,,
4,Aalon Cruz,"6' 0""",145 lbs.,,,


### Data Cleaning:
1. For fighters dataset
- Convert Height to cms, Weight to kg
- Convert Reach and Stance to numerical values
- Extract DOB to get year --> get age of fighter

2. For matches dataset:
- Convert date to datetime type

3. Merge fighters data to match data

In [58]:
# Since height and weights are object types, we need to convert them to numbers, 
# TODO: convert height to cm and weight to kg
fighters_df.dtypes

fighter_name    object
Height          object
Weight          object
Reach           object
Stance          object
DOB             object
dtype: object

In [69]:
def convert_height(string):
    """
    input is a string of 5' 10" or NaN
    output is height in cms
    """
    if pd.isna(string):
        return None
    new_string = string.split(" ")
    feet = int(new_string[0].replace("\'", ''))
    inches = int(new_string[1].replace('"', '')  )
    return feet * 30.48 + inches * 2.54
    
#convert_height('''5' 10"''') # output should be 177.8

In [70]:
fighters_df.Height[0]

'5\' 4"'

In [76]:
fighters_df.Height = fighters_df.Height.apply(convert_height)
fighters_df.dtypes

fighter_name     object
Height          float64
Weight           object
Reach            object
Stance           object
DOB              object
dtype: object

In [77]:
# Percentage of missing data per column
fighters_df.isnull().sum() 

fighter_name       0
Height           263
Weight            75
Reach           1744
Stance           874
DOB              740
dtype: int64

In [85]:
#### Convert weight to kg
def convert_weight(string):
    """
    input: string contains weight in lbs
    output: weight in kg
    """
    if pd.isna(string):
        return None
    weight = string.split(" ")[0]
    return int(weight) * 0.453592

In [87]:
fighters_df.Weight = fighters_df.Weight.apply(convert_weight)

In [88]:
fighters_df.dtypes

fighter_name     object
Height          float64
Weight          float64
Reach            object
Stance           object
DOB              object
dtype: object

In [90]:
def convert_reach(string):
    """
    input: NaN or 76'' (inches)
    output: number in cms
    """
    if pd.isna(string):
        return string
    reach = string.replace('"', "")
    return int(reach) * 2.54
    

In [93]:
fighters_df.Reach = fighters_df.Reach.apply(convert_reach)

In [94]:
fighters_df.dtypes

fighter_name     object
Height          float64
Weight          float64
Reach           float64
Stance           object
DOB              object
dtype: object

In [100]:
# Extract year from DOB
def extract_year(string):
    if pd.isna(string):
        return string
    year = string.split(" ")[2]
    return int(year)

In [104]:
fighters_df['year'] = fighters_df.DOB.apply(extract_year)

In [105]:
fighters_df.dtypes

fighter_name     object
Height          float64
Weight          float64
Reach           float64
Stance           object
DOB              object
year            float64
dtype: object

In [107]:
fighters_df.head(20)

Unnamed: 0,fighter_name,Height,Weight,Reach,Stance,DOB,year
0,AJ Fonseca,162.56,65.77084,,,,
1,AJ Matthews,180.34,83.91452,,,,
2,AJ McKee,177.8,65.77084,,,,
3,AJ Siscoe,170.18,61.23492,,,,
4,Aalon Cruz,182.88,65.77084,,,,
5,Aaron Brink,190.5,92.98636,,Orthodox,"Nov 12, 1974",1974.0
6,Aaron Ely,172.72,61.23492,,,"Mar 18, 1989",1989.0
7,Aaron Jeffery,187.96,83.91452,,,"Nov 14, 1992",1992.0
8,Aaron Lanfranco,,70.30676,,,"Aug 26, 1986",1986.0
9,Aaron Miller,175.26,65.77084,,,,


In [109]:
# save cleaned fighters_df to csv file
fighters_df.to_csv("cleaned_fighters.csv", index=False)

In [75]:
matches_df.dtypes

R_fighter                        object
B_fighter                        object
Referee                          object
date                             object
location                         object
Winner                           object
title_bout                         bool
weight_class                     object
no_of_rounds                      int64
B_current_lose_streak           float64
B_current_win_streak            float64
B_draw                          float64
B_avg_BODY_att                  float64
B_avg_BODY_landed               float64
B_avg_CLINCH_att                float64
B_avg_CLINCH_landed             float64
B_avg_DISTANCE_att              float64
B_avg_DISTANCE_landed           float64
B_avg_GROUND_att                float64
B_avg_GROUND_landed             float64
B_avg_HEAD_att                  float64
B_avg_HEAD_landed               float64
B_avg_KD                        float64
B_avg_LEG_att                   float64
B_avg_LEG_landed                float64


In [6]:
matches_df.head(10)

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_KD,B_avg_LEG_att,B_avg_LEG_landed,B_avg_PASS,B_avg_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_SIG_STR_pct,B_avg_SUB_ATT,B_avg_TD_att,B_avg_TD_landed,B_avg_TD_pct,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_longest_win_streak,B_losses,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_opp_KD,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_opp_PASS,B_avg_opp_REV,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_opp_SIG_STR_pct,B_avg_opp_SUB_ATT,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_opp_TD_pct,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_total_rounds_fought,B_total_time_fought(seconds),B_total_title_bouts,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_wins,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_current_lose_streak,R_current_win_streak,R_draw,R_avg_BODY_att,R_avg_BODY_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_KD,R_avg_LEG_att,R_avg_LEG_landed,R_avg_PASS,R_avg_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_SIG_STR_pct,R_avg_SUB_ATT,R_avg_TD_att,R_avg_TD_landed,R_avg_TD_pct,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_longest_win_streak,R_losses,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_opp_KD,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_opp_PASS,R_avg_opp_REV,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_opp_SIG_STR_pct,R_avg_opp_SUB_ATT,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_opp_TD_pct,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_total_rounds_fought,R_total_time_fought(seconds),R_total_title_bouts,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_wins,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,Henry Cejudo,Marlon Moraes,Marc Goddard,2019-06-08,"Chicago, Illinois, USA",Red,True,Bantamweight,5,0.0,4.0,0.0,9.2,6.0,0.2,0.0,62.6,20.6,2.6,2.0,48.6,11.2,0.8,7.6,5.4,0.4,0.0,65.4,22.6,0.466,0.4,0.8,0.2,0.1,66.4,23.6,4.0,1.0,6.4,4.0,1.0,0.6,51.2,17.4,0.6,0.2,39.6,9.4,0.2,6.8,4.8,0.0,0.0,52.8,18.2,0.236,0.0,1.0,0.4,0.1,53.8,19.2,9.0,419.4,0.0,0.0,1.0,0.0,2.0,1.0,0.0,4.0,Orthodox,167.64,170.18,135.0,0.0,4.0,0.0,21.9,16.4,17.0,11.0,75.0,26.5,9.4,6.5,74.2,23.9,0.4,5.3,3.7,1.2,0.0,101.4,44.0,0.466,0.1,5.3,1.9,0.458,129.9,69.1,4.0,2.0,13.3,8.8,7.5,5.1,90.5,26.8,0.8,0.3,76.1,17.3,0.1,9.4,6.1,0.0,0.0,98.8,32.2,0.336,0.0,0.9,0.1,0.05,110.5,43.3,27.0,742.6,3.0,0.0,2.0,4.0,2.0,0.0,0.0,8.0,Orthodox,162.56,162.56,135.0,31.0,32.0
1,Valentina Shevchenko,Jessica Eye,Robert Madrigal,2019-06-08,"Chicago, Illinois, USA",Red,True,Women's Flyweight,5,0.0,3.0,0.0,14.6,9.1,11.8,7.3,124.7,42.1,2.4,1.9,112.0,32.0,0.0,12.3,10.2,0.8,0.0,138.9,51.3,0.399,0.7,1.0,0.5,0.225,158.7,69.6,3.0,6.0,13.0,9.3,12.8,9.6,101.7,32.0,8.1,6.9,97.7,30.8,0.1,11.9,8.4,1.4,0.0,122.6,48.5,0.408,0.7,2.3,0.9,0.231,151.5,75.4,29.0,849.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0,4.0,Orthodox,167.64,167.64,125.0,0.0,2.0,0.0,12.0,7.714286,9.285714,6.857143,88.142857,36.142857,18.428571,16.428571,84.571429,37.0,0.0,19.285714,14.714286,1.714286,0.142857,115.857143,59.428571,0.575714,0.428571,5.142857,2.428571,0.601429,161.571429,102.857143,2.0,2.0,24.571429,14.142857,10.571429,7.857143,98.571429,32.571429,6.428571,4.285714,61.857143,12.428571,0.0,29.142857,18.142857,1.142857,0.0,115.571429,44.714286,0.437143,0.285714,3.285714,0.857143,0.147143,158.142857,82.285714,25.0,1062.0,2.0,0.0,1.0,2.0,0.0,2.0,0.0,5.0,Southpaw,165.1,167.64,125.0,32.0,31.0
2,Tony Ferguson,Donald Cerrone,Dan Miragliotta,2019-06-08,"Chicago, Illinois, USA",Red,False,Lightweight,3,0.0,3.0,0.0,15.354839,11.322581,6.741935,4.387097,84.741935,38.580645,5.516129,3.806452,67.645161,23.258065,0.645161,14.0,12.193548,0.935484,0.096774,97.0,46.774194,0.496129,0.354839,2.16129,0.677419,0.295484,103.709677,52.548387,8.0,8.0,17.903226,11.870968,8.419355,5.83871,84.548387,38.064516,1.741935,0.935484,67.645161,25.483871,0.225806,9.16129,7.483871,0.032258,0.032258,94.709677,44.83871,0.453226,0.096774,2.096774,0.225806,0.063548,100.387097,49.774194,68.0,581.870968,1.0,0.0,0.0,7.0,10.0,6.0,0.0,23.0,Orthodox,185.42,185.42,155.0,0.0,11.0,0.0,13.866667,8.666667,2.866667,1.733333,116.133333,49.466667,5.333333,4.266667,96.733333,35.6,0.2,13.733333,11.2,0.333333,0.133333,124.333333,55.466667,0.43,1.0,0.933333,0.4,0.277333,133.0,63.4,11.0,1.0,14.466667,8.133333,2.8,0.733333,91.066667,32.2,4.866667,2.8,78.266667,23.2,0.266667,6.0,4.4,0.333333,0.133333,98.733333,35.733333,0.34,0.066667,2.866667,0.666667,0.131333,102.133333,38.6,33.0,604.4,2.0,0.0,1.0,3.0,3.0,6.0,1.0,14.0,Orthodox,180.34,193.04,155.0,36.0,35.0
3,Jimmie Rivera,Petr Yan,Kevin MacDonald,2019-06-08,"Chicago, Illinois, USA",Blue,False,Bantamweight,3,0.0,4.0,0.0,17.0,14.0,13.75,11.0,109.5,48.75,13.0,10.5,116.25,53.75,0.5,3.0,2.5,0.5,0.25,136.25,70.25,0.55,0.25,2.5,1.25,0.2875,154.75,86.75,4.0,0.0,12.25,6.0,6.0,3.75,94.25,26.75,1.75,1.25,82.5,21.5,0.25,7.25,4.25,0.0,0.0,102.0,31.75,0.3375,0.0,4.5,0.75,0.0975,104.75,34.25,9.0,652.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,4.0,Switch,170.18,170.18,135.0,1.0,0.0,0.0,18.25,10.25,5.875,4.125,104.875,41.0,1.0,0.625,80.5,24.0,0.375,13.0,11.5,0.125,0.0,111.75,45.75,0.36625,0.0,2.25,0.625,0.10375,117.375,50.75,5.0,2.0,20.25,13.375,6.875,5.625,103.125,38.5,0.875,0.75,77.375,20.375,0.125,13.25,11.125,0.0,0.0,110.875,44.875,0.44625,0.0,2.375,0.0,0.0,115.125,48.875,20.0,690.25,0.0,0.0,1.0,4.0,1.0,0.0,0.0,6.0,Orthodox,162.56,172.72,135.0,26.0,29.0
4,Tai Tuivasa,Blagoy Ivanov,Dan Miragliotta,2019-06-08,"Chicago, Illinois, USA",Blue,False,Heavyweight,3,0.0,1.0,0.0,17.0,14.5,2.5,2.0,201.0,59.5,0.0,0.0,184.5,45.0,0.0,2.0,2.0,0.0,0.0,203.5,61.5,0.31,0.0,0.0,0.0,0.0,204.0,62.0,1.0,1.0,42.5,23.5,0.5,0.5,205.0,89.5,0.0,0.0,152.5,56.5,0.0,10.5,10.0,0.0,0.0,205.5,90.0,0.43,0.0,0.5,0.0,0.0,205.5,90.0,8.0,1200.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,Southpaw,180.34,185.42,250.0,1.0,0.0,0.0,7.75,6.75,11.0,7.25,50.75,24.75,0.5,0.5,50.75,22.75,0.5,3.75,3.0,0.25,0.0,62.25,32.5,0.545,0.0,0.5,0.0,0.0,63.5,32.75,3.0,1.0,6.25,4.75,4.5,3.5,42.75,16.25,7.75,2.75,43.25,14.0,0.25,5.5,3.75,0.75,0.0,55.0,22.5,0.3975,0.0,1.0,0.0,0.0,60.5,27.75,7.0,440.75,0.0,0.0,0.0,1.0,2.0,0.0,0.0,3.0,Southpaw,187.96,190.5,264.0,32.0,26.0
5,Tatiana Suarez,Nina Ansaroff,Robert Madrigal,2019-06-08,"Chicago, Illinois, USA",Red,False,Women's Strawweight,3,0.0,4.0,0.0,19.5,12.333333,11.833333,7.166667,142.333333,63.833333,6.0,4.166667,117.833333,42.666667,0.0,22.833333,20.166667,1.333333,0.166667,160.166667,75.166667,0.47,0.666667,0.833333,0.333333,0.25,183.5,95.666667,4.0,2.0,12.0,7.333333,9.666667,7.0,95.166667,38.333333,5.166667,3.5,86.666667,33.166667,0.0,11.333333,8.333333,1.5,0.166667,110.0,48.833333,0.426667,0.0,6.0,1.166667,0.14,131.5,68.666667,18.0,886.5,0.0,0.0,0.0,3.0,0.0,1.0,0.0,4.0,Orthodox,165.1,162.56,115.0,0.0,4.0,0.0,8.75,7.5,3.0,2.25,12.75,4.75,42.25,35.75,44.75,31.25,0.0,4.5,4.0,7.75,0.0,58.0,42.75,0.6375,0.5,5.5,4.5,0.8175,101.5,80.5,4.0,0.0,3.0,2.25,3.5,3.0,5.75,2.0,2.0,1.5,8.0,4.0,0.0,0.25,0.25,0.0,0.5,11.25,6.5,0.54,0.75,0.5,0.0,0.0,38.0,26.5,8.0,540.0,1.0,0.0,0.0,1.0,1.0,2.0,0.0,4.0,,165.1,167.64,115.0,33.0,28.0
6,Aljamain Sterling,Pedro Munhoz,Marc Goddard,2019-06-08,"Chicago, Illinois, USA",Red,False,Bantamweight,3,0.0,3.0,0.0,15.0,7.416667,6.083333,3.416667,84.916667,34.666667,5.083333,3.416667,66.583333,24.0,0.416667,14.5,10.083333,0.583333,0.083333,96.083333,41.5,0.499167,0.666667,2.75,0.583333,0.233333,102.666667,47.5,4.0,4.0,13.166667,7.25,2.666667,1.333333,103.166667,41.666667,1.666667,1.166667,88.833333,32.333333,0.0,5.5,4.583333,0.083333,0.0,107.5,44.166667,0.4025,0.0,2.166667,0.5,0.194167,110.333333,46.5,23.0,495.25,0.0,0.0,0.0,2.0,3.0,3.0,0.0,8.0,Orthodox,167.64,165.1,135.0,0.0,3.0,0.0,25.166667,18.25,14.0,11.083333,72.583333,29.416667,16.083333,11.75,63.0,23.333333,0.0,14.5,10.666667,1.5,0.083333,102.666667,52.25,0.49,0.75,5.333333,1.75,0.4175,138.333333,85.75,4.0,3.0,8.916667,4.666667,5.75,4.0,51.0,12.916667,1.833333,1.166667,43.416667,8.916667,0.25,6.25,4.5,0.833333,0.083333,58.583333,18.083333,0.305833,0.083333,2.25,1.083333,0.251667,80.583333,37.833333,32.0,750.666667,0.0,0.0,0.0,5.0,1.0,3.0,0.0,9.0,Orthodox,170.18,180.34,135.0,32.0,29.0
7,Karolina Kowalkiewicz,Alexa Grasso,Kevin MacDonald,2019-06-08,"Chicago, Illinois, USA",Blue,False,Women's Strawweight,3,1.0,0.0,0.0,16.25,11.0,17.5,11.25,105.75,40.0,3.0,2.5,94.25,29.75,0.0,15.75,13.0,0.25,0.25,126.25,53.75,0.5025,0.25,0.5,0.25,0.125,165.5,87.5,1.0,2.0,18.75,10.5,8.0,6.25,101.0,27.0,4.75,3.5,90.0,23.5,0.0,5.0,2.75,2.75,0.25,113.75,36.75,0.325,0.25,5.5,2.0,0.38,132.75,53.75,10.0,716.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,Orthodox,165.1,167.64,115.0,2.0,0.0,0.0,25.222222,17.555556,27.555556,20.777778,155.777778,49.777778,3.777778,3.0,149.0,46.777778,0.0,12.888889,9.222222,0.777778,0.111111,187.111111,73.555556,0.38,0.0,0.777778,0.111111,0.055556,211.222222,95.0,3.0,4.0,26.222222,14.666667,20.555556,14.111111,133.111111,51.666667,1.666667,1.555556,110.111111,38.222222,0.111111,19.0,14.444444,0.555556,0.111111,155.333333,67.333333,0.414444,0.222222,4.777778,0.777778,0.271111,170.777778,81.888889,25.0,800.111111,1.0,0.0,2.0,3.0,0.0,0.0,0.0,5.0,Orthodox,160.02,162.56,115.0,25.0,33.0
8,Ricardo Lamas,Calvin Kattar,Dan Miragliotta,2019-06-08,"Chicago, Illinois, USA",Blue,False,Featherweight,3,0.0,1.0,0.0,7.25,4.75,1.75,0.5,125.0,50.75,8.75,5.0,122.25,46.5,0.5,6.0,5.0,0.0,0.0,135.5,56.25,0.42,0.0,1.5,0.5,0.165,139.25,59.0,2.0,1.0,20.75,13.25,4.75,2.0,162.5,72.5,0.0,0.0,123.5,42.75,0.0,23.0,18.5,0.0,0.0,167.25,74.5,0.4125,0.0,1.0,0.25,0.125,168.75,75.75,10.0,670.75,0.0,0.0,0.0,1.0,2.0,0.0,0.0,3.0,Orthodox,180.34,182.88,145.0,0.0,1.0,0.0,8.333333,6.733333,7.2,6.4,54.266667,21.2,6.933333,4.666667,45.933333,15.266667,0.133333,14.133333,10.266667,0.666667,0.4,68.4,32.266667,0.478,0.666667,3.733333,1.066667,0.301333,80.0,43.533333,4.0,5.0,9.0,5.8,7.466667,5.133333,59.866667,21.266667,3.466667,2.866667,56.133333,19.266667,0.2,5.666667,4.2,0.933333,0.0,70.8,29.266667,0.402,0.466667,2.0,1.133333,0.38,88.666667,45.2,34.0,624.066667,1.0,0.0,0.0,3.0,4.0,3.0,0.0,10.0,Orthodox,172.72,180.34,145.0,31.0,37.0
9,Yan Xiaonan,Angela Hill,Robert Madrigal,2019-06-08,"Chicago, Illinois, USA",Red,False,Women's Strawweight,3,0.0,1.0,0.0,25.4,17.9,22.5,16.8,121.1,55.6,2.6,1.9,108.3,45.3,0.2,12.5,11.1,0.1,0.1,146.2,74.3,0.48,0.0,0.8,0.3,0.25,159.0,85.8,1.0,6.0,29.7,16.3,21.3,11.9,168.2,52.0,1.7,1.2,149.5,38.7,0.0,12.0,10.1,1.0,0.0,191.2,65.1,0.362,0.2,4.7,1.2,0.364,208.6,80.2,26.0,763.1,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,Orthodox,160.02,162.56,115.0,0.0,3.0,0.0,9.333333,5.333333,10.666667,7.333333,244.333333,97.666667,1.333333,0.666667,213.666667,79.0,0.0,33.333333,21.333333,0.0,0.0,256.333333,105.666667,0.403333,0.0,0.333333,0.0,0.0,260.666667,109.0,3.0,0.0,21.333333,11.333333,10.0,6.0,175.333333,53.0,3.666667,1.666667,160.0,42.333333,0.0,7.666667,7.0,0.666667,0.0,189.0,60.666667,0.32,0.0,2.0,0.666667,0.11,222.0,89.666667,9.0,900.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,Orthodox,165.1,160.02,115.0,34.0,29.0


In [112]:
#### Convert datetime column to datetime type
matches_df['date'] = pd.to_datetime(matches_df['date'])

In [113]:
matches_df.isnull().sum().sort_values(ascending=False)

B_avg_opp_DISTANCE_att          1265
B_avg_TD_pct                    1265
B_avg_opp_TD_att                1265
B_avg_opp_TD_landed             1265
B_avg_opp_TOTAL_STR_att         1265
B_avg_opp_TOTAL_STR_landed      1265
B_avg_opp_CLINCH_att            1265
B_total_time_fought(seconds)    1265
B_avg_opp_BODY_landed           1265
B_avg_opp_BODY_att              1265
B_avg_TOTAL_STR_landed          1265
B_avg_TOTAL_STR_att             1265
B_avg_TD_landed                 1265
B_avg_opp_SIG_STR_pct           1265
B_avg_LEG_att                   1265
B_avg_LEG_landed                1265
B_avg_TD_att                    1265
B_avg_SUB_ATT                   1265
B_avg_SIG_STR_pct               1265
B_avg_SIG_STR_landed            1265
B_avg_SIG_STR_att               1265
B_avg_REV                       1265
B_avg_PASS                      1265
B_avg_opp_CLINCH_landed         1265
B_avg_opp_SUB_ATT               1265
B_avg_opp_TD_pct                1265
B_avg_opp_SIG_STR_landed        1265
B

In [8]:
matches_df.shape

(5144, 145)

In [14]:
pd.unique(fighters_df['fighter_name'])

array(['AJ Fonseca', 'AJ Matthews', 'AJ McKee', ..., 'Zoila Frausto',
       'Zu Anyanwu', 'Zubaira Tukhugov'], dtype=object)

In [21]:
missing_reach_fighters = list(pd.unique(fighters_df.loc[ fighters_df.Reach.isnull(), ].fighter_name ))
len(list(missing_reach_fighters))

1744

In [24]:
matches_df.loc[ matches_df.R_fighter.isin(missing_reach_fighters) | matches_df.B_fighter.isin(missing_reach_fighters),  ]

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_KD,B_avg_LEG_att,B_avg_LEG_landed,B_avg_PASS,B_avg_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_SIG_STR_pct,B_avg_SUB_ATT,B_avg_TD_att,B_avg_TD_landed,B_avg_TD_pct,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_longest_win_streak,B_losses,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_opp_KD,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_opp_PASS,B_avg_opp_REV,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_opp_SIG_STR_pct,B_avg_opp_SUB_ATT,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_opp_TD_pct,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_total_rounds_fought,B_total_time_fought(seconds),B_total_title_bouts,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_wins,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_current_lose_streak,R_current_win_streak,R_draw,R_avg_BODY_att,R_avg_BODY_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_KD,R_avg_LEG_att,R_avg_LEG_landed,R_avg_PASS,R_avg_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_SIG_STR_pct,R_avg_SUB_ATT,R_avg_TD_att,R_avg_TD_landed,R_avg_TD_pct,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_longest_win_streak,R_losses,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_opp_KD,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_opp_PASS,R_avg_opp_REV,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_opp_SIG_STR_pct,R_avg_opp_SUB_ATT,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_opp_TD_pct,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_total_rounds_fought,R_total_time_fought(seconds),R_total_title_bouts,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_wins,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
114,John Makdessi,Jesus Pinedo,Herb Dean,2019-03-23,"Nashville, Tennessee, USA",Red,False,Lightweight,3,0.0,1.0,0.0,25.0,16.0,8.0,7.0,106.0,38.0,28.0,15.0,103.0,35.0,0.0,14.0,9.0,0.0,0.0,142.0,60.0,0.42,0.0,5.0,5.0,1.000,153.0,68.0,1.0,0.0,28.0,15.0,14.0,7.0,113.0,50.0,12.0,1.0,78.0,18.0,0.0,33.0,25.0,0.0,0.0,139.0,58.0,0.41,0.0,1.0,0.0,0.0,157.0,68.0,3.0,900.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,177.80,,155.0,0.0,2.0,0.0,25.933333,15.200000,6.133333,3.066667,111.133333,56.466667,3.666667,1.800000,79.133333,33.533333,0.400000,15.866667,12.6,0.000000,0.0,120.933333,61.333333,0.487333,0.000000,0.200000,0.000000,0.000000,127.066667,66.133333,3.0,6.0,16.6,8.200000,2.933333,1.466667,144.600000,40.333333,2.533333,1.666667,119.733333,24.400000,0.2,13.733333,10.866667,0.133333,0.000000,150.066667,43.466667,0.316667,0.066667,2.733333,0.333333,0.078000,152.333333,45.533333,36.0,670.866667,0.0,0.0,1.0,5.0,3.0,0.0,0.0,9.0,Orthodox,172.72,172.72,155.0,22.0,33.0
118,Bryce Mitchell,Bobby Moffett,Ricky Parker,2019-03-23,"Nashville, Tennessee, USA",Red,False,Featherweight,3,0.0,1.0,0.0,2.0,1.0,4.0,1.0,51.0,18.0,1.0,1.0,47.0,13.0,0.0,7.0,6.0,1.0,0.0,56.0,20.0,0.35,1.0,1.0,1.0,1.000,57.0,21.0,1.0,0.0,4.0,3.0,16.0,15.0,44.0,15.0,0.0,0.0,55.0,26.0,0.0,1.0,1.0,2.0,1.0,60.0,30.0,0.50,0.0,2.0,1.0,0.5,81.0,47.0,2.0,463.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,Orthodox,177.80,187.96,145.0,0.0,1.0,0.0,18.000000,9.000000,10.000000,7.000000,39.000000,13.000000,20.000000,18.000000,50.000000,29.000000,0.000000,1.000000,0.0,0.000000,0.0,69.000000,38.000000,0.550000,1.000000,10.000000,3.000000,0.300000,78.000000,45.000000,1.0,0.0,14.0,12.000000,5.000000,1.000000,62.000000,20.000000,40.000000,30.000000,88.000000,34.000000,0.0,5.000000,5.000000,5.000000,2.000000,107.000000,51.000000,0.470000,0.000000,4.000000,3.000000,0.750000,129.000000,72.000000,3.0,900.000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,Southpaw,177.80,,145.0,28.0,24.0
152,Tecia Torres,Weili Zhang,Chris Tognoni,2019-03-02,"Las Vegas, Nevada, USA",Blue,False,Women's Strawweight,3,0.0,2.0,0.0,19.0,11.0,5.0,3.5,97.0,37.0,24.0,20.5,77.5,33.5,0.0,29.5,16.5,1.5,0.0,126.0,61.0,0.58,1.0,2.5,1.0,0.415,139.5,73.0,2.0,0.0,10.0,8.0,6.5,6.0,61.0,15.5,0.0,0.0,53.0,11.0,0.0,4.5,2.5,0.0,0.0,67.5,21.5,0.51,0.5,1.0,0.0,0.0,69.0,23.0,4.0,560.5,0.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,Switch,162.56,,115.0,2.0,0.0,0.0,17.222222,11.777778,12.444444,8.555556,109.777778,49.888889,4.777778,3.666667,95.222222,38.333333,0.111111,14.555556,12.0,1.222222,0.0,127.000000,62.111111,0.493333,0.111111,4.555556,0.777778,0.351111,162.888889,93.000000,3.0,3.0,25.0,17.222222,13.666667,9.222222,106.333333,28.777778,6.555556,5.555556,94.222222,20.333333,0.0,7.333333,6.000000,1.333333,0.222222,126.555556,43.555556,0.323333,0.000000,3.111111,1.777778,0.313333,146.666667,62.000000,26.0,839.222222,0.0,0.0,0.0,5.0,0.0,1.0,0.0,6.0,Orthodox,154.94,152.40,115.0,29.0,29.0
158,Edmen Shahbazyan,Charles Byrd,Mark Smith,2019-03-02,"Las Vegas, Nevada, USA",Red,False,Middleweight,3,1.0,0.0,0.0,2.0,1.5,9.5,6.5,22.5,12.5,6.5,5.5,34.5,21.5,0.0,2.0,1.5,1.5,0.0,38.5,24.5,0.69,0.5,2.0,0.5,0.500,63.0,46.5,1.0,1.0,3.0,3.0,9.0,7.0,24.0,5.0,0.0,0.0,29.5,8.5,0.5,0.5,0.5,0.0,0.0,33.0,12.0,0.18,0.0,0.0,0.0,0.0,35.5,14.5,3.0,337.5,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,Orthodox,177.80,185.42,185.0,0.0,1.0,0.0,8.000000,6.000000,15.000000,11.000000,21.000000,10.000000,1.000000,1.000000,25.000000,12.000000,0.000000,4.000000,4.0,3.000000,0.0,37.000000,22.000000,0.590000,0.000000,21.000000,8.000000,0.380000,43.000000,27.000000,1.0,0.0,6.0,4.000000,16.000000,12.000000,40.000000,11.000000,15.000000,11.000000,63.000000,29.000000,0.0,2.000000,1.000000,0.000000,0.000000,71.000000,34.000000,0.470000,0.000000,3.000000,1.000000,0.330000,84.000000,46.000000,3.0,900.000000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,Orthodox,187.96,,185.0,35.0,21.0
173,Damir Ismagulov,Joel Alvarez,Jakub Muller,2019-02-23,"Prague, Czech Republic",Red,False,Lightweight,3,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Orthodox,190.50,195.58,155.0,0.0,1.0,0.0,13.000000,9.000000,3.000000,3.000000,32.000000,20.000000,20.000000,11.000000,38.000000,22.000000,0.000000,4.000000,3.0,4.000000,0.0,55.000000,34.000000,0.610000,0.000000,11.000000,5.000000,0.450000,137.000000,103.000000,1.0,0.0,5.0,3.000000,5.000000,5.000000,26.000000,5.000000,0.000000,0.000000,21.000000,5.000000,0.0,5.000000,2.000000,0.000000,0.000000,31.000000,10.000000,0.320000,0.000000,0.000000,0.000000,0.000000,43.000000,22.000000,3.0,900.000000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,Orthodox,177.80,,155.0,24.0,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5139,Gerard Gordeau,Kevin Rosier,Joao Alberto Barreto,1993-11-12,"Denver, Colorado, USA",Red,False,Open Weight,1,0.0,1.0,0.0,4.0,3.0,9.0,4.0,10.0,4.0,8.0,7.0,23.0,12.0,2.0,0.0,0.0,0.0,0.0,27.0,15.0,0.55,0.0,0.0,0.0,0.000,53.0,38.0,1.0,0.0,6.0,3.0,19.0,10.0,7.0,0.0,2.0,2.0,19.0,7.0,0.0,3.0,2.0,0.0,0.0,28.0,12.0,0.42,0.0,0.0,0.0,0.0,29.0,13.0,1.0,260.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,Orthodox,193.04,,275.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0.000000,3.000000,1.000000,2.000000,2.000000,5.000000,3.000000,0.000000,0.000000,0.0,0.000000,0.0,5.000000,3.000000,0.600000,0.000000,0.000000,0.000000,0.000000,5.000000,3.000000,1.0,0.0,0.0,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.000000,1.0,26.000000,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,Orthodox,195.58,,216.0,,34.0
5140,Ken Shamrock,Patrick Smith,Joao Alberto Barreto,1993-11-12,"Denver, Colorado, USA",Red,False,Open Weight,1,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Orthodox,187.96,,225.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Orthodox,185.42,182.88,205.0,30.0,29.0
5141,Royce Gracie,Art Jimmerson,Joao Alberto Barreto,1993-11-12,"Denver, Colorado, USA",Red,False,Open Weight,1,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Orthodox,185.42,,196.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Southpaw,185.42,,175.0,30.0,26.0
5142,Kevin Rosier,Zane Frazier,Joao Alberto Barreto,1993-11-12,"Denver, Colorado, USA",Red,False,Open Weight,1,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Orthodox,195.58,,250.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Orthodox,193.04,,275.0,,
