In [76]:
import pandas as pd
import numpy as np
import re

# Data Load and Combine:

In [77]:
data = []

for i in np.arange(1, 7):
    data.append(pd.read_csv('Data/fight_details_batch{}.csv'.format(i), index_col = 0))

In [78]:
df = pd.DataFrame()

for item in data:
    df = pd.concat([df, item])
    
df.shape

(5564, 55)

In [79]:
df.reset_index(drop = True, inplace = True)

# Clean Data:

### Find Null Values:

In [80]:
drop_index = df[df['KD'] == 99999].index

In [81]:
df.drop(drop_index, axis = 0, inplace = True)

### Change Data Formats:

In [82]:
df.dtypes

Fighter                  object
KD                        int64
Sig. str. %              object
Td %                     object
Sub. att                  int64
Pass                      int64
Rev.                      int64
Sig. str. Hits            int64
Sig. str. Attempts        int64
Total str. Hits           int64
Total str. Attempts       int64
Td Hits                   int64
Td Attempts               int64
Head Hits                 int64
Head Attempts             int64
Body Hits                 int64
Body Attempts             int64
Leg Hits                  int64
Leg Attempts              int64
Distance Hits             int64
Distance Attempts         int64
Clinch Hits               int64
Clinch Attempts           int64
Ground Hits               int64
Ground Attempts           int64
Fighter.1                object
KD.1                      int64
Sig. str. %.1            object
Td %.1                   object
Sub. att.1                int64
Pass.1                    int64
Rev..1  

In [83]:
df.columns

Index(['Fighter', 'KD', 'Sig. str. %', 'Td %', 'Sub. att', 'Pass', 'Rev.',
       'Sig. str. Hits', 'Sig. str. Attempts', 'Total str. Hits',
       'Total str. Attempts', 'Td Hits', 'Td Attempts', 'Head Hits',
       'Head Attempts', 'Body Hits', 'Body Attempts', 'Leg Hits',
       'Leg Attempts', 'Distance Hits', 'Distance Attempts', 'Clinch Hits',
       'Clinch Attempts', 'Ground Hits', 'Ground Attempts', 'Fighter.1',
       'KD.1', 'Sig. str. %.1', 'Td %.1', 'Sub. att.1', 'Pass.1', 'Rev..1',
       'Sig. str. Hits.1', 'Sig. str. Attempts.1', 'Total str. Hits.1',
       'Total str. Attempts.1', 'Td Hits.1', 'Td Attempts.1', 'Head Hits.1',
       'Head Attempts.1', 'Body Hits.1', 'Body Attempts.1', 'Leg Hits.1',
       'Leg Attempts.1', 'Distance Hits.1', 'Distance Attempts.1',
       'Clinch Hits.1', 'Clinch Attempts.1', 'Ground Hits.1',
       'Ground Attempts.1', 'rounds', 'time', 'format', 'referee', 'url'],
      dtype='object')

In [84]:
df['Sig. str. %'] = df['Sig. str. %'].apply(lambda x: re.sub('%', '', x))
df['Td %'] = df['Td %'].apply(lambda x: re.sub('%', '', x))

df['Sig. str. %.1'] = df['Sig. str. %.1'].apply(lambda x: re.sub('%', '', x))
df['Td %.1'] = df['Td %.1'].apply(lambda x: re.sub('%', '', x))

In [85]:
df['Sig. str. %'] = df['Sig. str. %'].astype(float)
df['Sig. str. %'] = df['Sig. str. %'] / 100

df['Td %'] = df['Td %'].astype(float)
df['Td %'] = df['Td %'] / 100

df['Sig. str. %.1'] = df['Sig. str. %.1'].astype(float)
df['Sig. str. %.1'] = df['Sig. str. %.1'] / 100

df['Td %.1'] = df['Td %.1'].astype(float)
df['Td %.1'] = df['Td %.1'] / 100

In [86]:
df.head()

Unnamed: 0,Fighter,KD,Sig. str. %,Td %,Sub. att,Pass,Rev.,Sig. str. Hits,Sig. str. Attempts,Total str. Hits,...,Distance Attempts.1,Clinch Hits.1,Clinch Attempts.1,Ground Hits.1,Ground Attempts.1,rounds,time,format,referee,url
0,Kevin Lee,0,0.51,0.66,0,2,1,41,80,61,...,56,2,2,6,7,3,00:00:28,5 Rnd (5-5-5-5-5),Mike Beltran,http://ufcstats.com/fight-details/e0b323dae5bf...
1,Demian Maia,0,0.57,1.0,0,1,0,4,7,4,...,7,0,0,8,9,1,00:02:34,3 Rnd (5-5-5),Osiris Maia,http://ufcstats.com/fight-details/e0b323dae5bf...
2,Renato Moicano,0,0.5,1.0,1,1,0,1,2,4,...,5,0,0,0,0,1,00:00:44,3 Rnd (5-5-5),Eduardo Herdy,http://ufcstats.com/fight-details/e0b323dae5bf...
3,Johnny Walker,0,0.74,0.0,0,2,1,37,50,91,...,18,2,3,33,37,3,00:05:00,3 Rnd (5-5-5),Mike Beltran,http://ufcstats.com/fight-details/e0b323dae5bf...
4,Francisco Trinaldo,0,0.43,0.0,0,0,0,55,126,55,...,121,2,2,0,0,3,00:05:00,3 Rnd (5-5-5),Osiris Maia,http://ufcstats.com/fight-details/e0b323dae5bf...


In [87]:
df.dtypes

Fighter                   object
KD                         int64
Sig. str. %              float64
Td %                     float64
Sub. att                   int64
Pass                       int64
Rev.                       int64
Sig. str. Hits             int64
Sig. str. Attempts         int64
Total str. Hits            int64
Total str. Attempts        int64
Td Hits                    int64
Td Attempts                int64
Head Hits                  int64
Head Attempts              int64
Body Hits                  int64
Body Attempts              int64
Leg Hits                   int64
Leg Attempts               int64
Distance Hits              int64
Distance Attempts          int64
Clinch Hits                int64
Clinch Attempts            int64
Ground Hits                int64
Ground Attempts            int64
Fighter.1                 object
KD.1                       int64
Sig. str. %.1            float64
Td %.1                   float64
Sub. att.1                 int64
Pass.1    

# Combine with main_event:

In [88]:
event_data = pd.read_csv('Data/event_level_data.csv', index_col = 0)

In [91]:
event_data.drop(drop_index, axis = 0, inplace = True)

In [92]:
event_data.shape

(5538, 21)

In [93]:
combined_df = pd.concat([event_data, df], axis = 1)
combined_df.head(5)

Unnamed: 0,Winner,R_fighter,B_fighter,R_STR,B_STR,R_TD,B_TD,R_SUB,B_SUB,R_PASS,...,Distance Attempts.1,Clinch Hits.1,Clinch Attempts.1,Ground Hits.1,Ground Attempts.1,rounds,time,format,referee,url
0,win,Charles Oliveira,Kevin Lee,43,41,0,2,2,0,0,...,56,2,2,6,7,3,00:00:28,5 Rnd (5-5-5-5-5),Mike Beltran,http://ufcstats.com/fight-details/e0b323dae5bf...
1,win,Gilbert Burns,Demian Maia,13,4,0,2,0,0,0,...,7,0,0,8,9,1,00:02:34,3 Rnd (5-5-5),Osiris Maia,http://ufcstats.com/fight-details/e0b323dae5bf...
2,win,Renato Moicano,Damir Hadzovic,1,1,1,0,1,0,1,...,5,0,0,0,0,1,00:00:44,3 Rnd (5-5-5),Eduardo Herdy,http://ufcstats.com/fight-details/e0b323dae5bf...
3,win,Nikita Krylov,Johnny Walker,45,37,3,0,0,0,4,...,18,2,3,33,37,3,00:05:00,3 Rnd (5-5-5),Mike Beltran,http://ufcstats.com/fight-details/e0b323dae5bf...
4,win,Francisco Trinaldo,John Makdessi,55,67,0,0,0,0,0,...,121,2,2,0,0,3,00:05:00,3 Rnd (5-5-5),Osiris Maia,http://ufcstats.com/fight-details/e0b323dae5bf...


In [100]:
combined_df.columns

Index(['Winner', 'R_fighter', 'B_fighter', 'R_STR', 'B_STR', 'R_TD', 'B_TD',
       'R_SUB', 'B_SUB', 'R_PASS', 'B_PASS', 'WEIGHT_CLASS', 'METHOD',
       'DETAIL', 'ROUND', 'TIME', 'title_bout', 'link', 'date', 'location',
       'attendance', 'Fighter', 'KD', 'Sig. str. %', 'Td %', 'Sub. att',
       'Pass', 'Rev.', 'Sig. str. Hits', 'Sig. str. Attempts',
       'Total str. Hits', 'Total str. Attempts', 'Td Hits', 'Td Attempts',
       'Head Hits', 'Head Attempts', 'Body Hits', 'Body Attempts', 'Leg Hits',
       'Leg Attempts', 'Distance Hits', 'Distance Attempts', 'Clinch Hits',
       'Clinch Attempts', 'Ground Hits', 'Ground Attempts', 'Fighter.1',
       'KD.1', 'Sig. str. %.1', 'Td %.1', 'Sub. att.1', 'Pass.1', 'Rev..1',
       'Sig. str. Hits.1', 'Sig. str. Attempts.1', 'Total str. Hits.1',
       'Total str. Attempts.1', 'Td Hits.1', 'Td Attempts.1', 'Head Hits.1',
       'Head Attempts.1', 'Body Hits.1', 'Body Attempts.1', 'Leg Hits.1',
       'Leg Attempts.1', 'Distance Hit

In [105]:
drop_columns = ['R_STR', 'B_STR', 'R_TD', 'B_TD',
       'R_SUB', 'B_SUB', 'R_PASS', 'B_PASS', 'TIME', 'ROUND', 'link']

In [111]:
combined_df.drop(drop_columns, axis = 1, inplace = True)

In [112]:
combined_df

Unnamed: 0,Winner,R_fighter,B_fighter,WEIGHT_CLASS,METHOD,DETAIL,ROUND,TIME,title_bout,link,...,Distance Attempts.1,Clinch Hits.1,Clinch Attempts.1,Ground Hits.1,Ground Attempts.1,rounds,time,format,referee,url
0,win,Charles Oliveira,Kevin Lee,Lightweight,SUB,Guillotine Choke,3,00:00:28,0.0,http://ufcstats.com/fight-details/e0b323dae5bf...,...,56,2,2,6,7,3,00:00:28,5 Rnd (5-5-5-5-5),Mike Beltran,http://ufcstats.com/fight-details/e0b323dae5bf...
1,win,Gilbert Burns,Demian Maia,Welterweight,KO/TKO,Punch,1,00:02:34,0.0,http://ufcstats.com/fight-details/5cee1d8f1e43...,...,7,0,0,8,9,1,00:02:34,3 Rnd (5-5-5),Osiris Maia,http://ufcstats.com/fight-details/e0b323dae5bf...
2,win,Renato Moicano,Damir Hadzovic,Lightweight,SUB,Rear Naked Choke,1,00:00:44,0.0,http://ufcstats.com/fight-details/c26a3f4c0833...,...,5,0,0,0,0,1,00:00:44,3 Rnd (5-5-5),Eduardo Herdy,http://ufcstats.com/fight-details/e0b323dae5bf...
3,win,Nikita Krylov,Johnny Walker,Light Heavyweight,U-DEC,,3,00:05:00,0.0,http://ufcstats.com/fight-details/5bba49d88db7...,...,18,2,3,33,37,3,00:05:00,3 Rnd (5-5-5),Mike Beltran,http://ufcstats.com/fight-details/e0b323dae5bf...
4,win,Francisco Trinaldo,John Makdessi,Lightweight,U-DEC,,3,00:05:00,0.0,http://ufcstats.com/fight-details/dc45c8d70e25...,...,121,2,2,0,0,3,00:05:00,3 Rnd (5-5-5),Osiris Maia,http://ufcstats.com/fight-details/e0b323dae5bf...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5559,win,Gerard Gordeau,Kevin Rosier,Open Weight,KO/TKO,,1,00:00:59,0.0,http://ufcstats.com/fight-details/ac7ca2ec38b9...,...,3,0,0,0,0,1,00:00:59,No Time Limit,Joao Alberto Barreto,http://ufcstats.com/fight-details/ac7ca2ec38b9...
5560,win,Ken Shamrock,Patrick Smith,Open Weight,SUB,Heel Hook,1,00:01:49,0.0,http://ufcstats.com/fight-details/46acd54cc0c9...,...,1,1,1,2,6,1,00:01:49,No Time Limit,Joao Alberto Barreto,http://ufcstats.com/fight-details/46acd54cc0c9...
5561,win,Royce Gracie,Art Jimmerson,Open Weight,SUB,Other,1,00:02:18,0.0,http://ufcstats.com/fight-details/cecdc0da5842...,...,0,0,0,0,0,1,00:02:18,No Time Limit,Joao Alberto Barreto,http://ufcstats.com/fight-details/cecdc0da5842...
5562,win,Kevin Rosier,Zane Frazier,Open Weight,KO/TKO,,1,00:04:20,0.0,http://ufcstats.com/fight-details/2d2bbc86e941...,...,7,10,19,2,2,1,00:04:20,No Time Limit,Joao Alberto Barreto,http://ufcstats.com/fight-details/2d2bbc86e941...
