# Purpose
This notebook is designed to convert the existing databases into a new fighter_round_performance table.
This notebook only creates stats for significant strikes.

### Result

Fighter Round Performance:
 - TDA - Takedown Attempts
 - TDS - Takedown Successes
 - TD_AC - Takedown Accuracy
 - TD_DE - Takedown Defense
 - TDA_DI - Takedown Differential
 - TDS_DI - Takedown Differential
 - TDA_P15M - Takedown Attempts Per 1 Minute
 - TDS_P15M - Takedown Successes Per 1 Minute

In [45]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join(os.pardir, os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
from sqlalchemy import create_engine
from src import local
from src import functions

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [46]:
# Credentials
USER = local.user 
PASS = local.password
HOST = local.host
PORT = local.port

#create engine
engine = create_engine(f'postgresql://{USER}:{PASS}@{HOST}:{PORT}/match_finder')

# Get data from postgres database
### Join tables

- get the date from the events table
- use the bouts table to join the dates to the general table
- use the general table to join the bouts with the fighters


#### Accuracy case statement

In [47]:
query = """
SELECT bout_link, fighter_link, td_a, 
td_s, "Date", round, "Time", 
"Round" as final_round, "Timeformat"
FROM general_cleaned
JOIN bouts ON bouts.link = general_cleaned.bout_link
JOIN events ON events.link = bouts.event_link
"""

data = pd.read_sql(query, engine)

In [48]:
data = functions.format_data(data, event=False)

In [49]:
data

Unnamed: 0,bout_link,fighter_link,td_a,td_s,Date,round,Time,final_round,Timeformat,date,fighter_id,bout_id
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
6,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
12,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
18,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
24,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,7,2,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
...,...,...,...,...,...,...,...,...,...,...,...,...
26447,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/a5c53b...,0,0,"November 12, 1993",1,2:18,1,No Time Limit,1993-11-12,a5c53b3ddb31cc7d,cecdc0da584274b9
26448,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,0,0,"November 12, 1993",1,4:20,1,No Time Limit,1993-11-12,598a58db87b890ee,2d2bbc86e941e05c
26449,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/d3711d...,0,0,"November 12, 1993",1,4:20,1,No Time Limit,1993-11-12,d3711d3784b76255,2d2bbc86e941e05c
26450,http://www.ufcstats.com/fight-details/567a09fd...,http://www.ufcstats.com/fighter-details/279093...,0,0,"November 12, 1993",1,0:26,1,No Time Limit,1993-11-12,279093302a6f44b3,567a09fd200cfa05


In [50]:
data_original = data

### Calculate accuracy

In [53]:
def get_accuracy(row, stat):
    if row[stat+'_a'] == 0:
        return pd.NA
    else:
        return row[stat+'_s']/row[stat+'_a']

In [54]:
data[stat+'_ac'] = data.apply(lambda x: get_accuracy(x, 'td'), axis=1)

In [55]:
data

Unnamed: 0,bout_link,fighter_link,td_a,td_s,Date,round,Time,final_round,Timeformat,date,fighter_id,bout_id,td_ac
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0
6,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,
12,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0
18,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0
24,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,7,2,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.285714
...,...,...,...,...,...,...,...,...,...,...,...,...,...
26447,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/a5c53b...,0,0,"November 12, 1993",1,2:18,1,No Time Limit,1993-11-12,a5c53b3ddb31cc7d,cecdc0da584274b9,
26448,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,0,0,"November 12, 1993",1,4:20,1,No Time Limit,1993-11-12,598a58db87b890ee,2d2bbc86e941e05c,
26449,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/d3711d...,0,0,"November 12, 1993",1,4:20,1,No Time Limit,1993-11-12,d3711d3784b76255,2d2bbc86e941e05c,
26450,http://www.ufcstats.com/fight-details/567a09fd...,http://www.ufcstats.com/fighter-details/279093...,0,0,"November 12, 1993",1,0:26,1,No Time Limit,1993-11-12,279093302a6f44b3,567a09fd200cfa05,


In order to get the striking defense, we need each column to include the fighters opponent. 

In [56]:
data_0 = functions.merge_fighter_instances(data, rounds=True)
data_1 = functions.merge_fighter_instances(data, rounds=True, flip=True)

data = pd.concat((data_0, data_1))

In [57]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26214 entries, 0 to 13106
Data columns (total 29 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   bout_link_0     26214 non-null  object        
 1   fighter_link_0  26214 non-null  object        
 2   td_a_0          26214 non-null  int64         
 3   td_s_0          26214 non-null  int64         
 4   Date_0          26214 non-null  object        
 5   round_0         26214 non-null  object        
 6   Time_0          26214 non-null  object        
 7   final_round_0   26214 non-null  int64         
 8   Timeformat_0    26214 non-null  object        
 9   date_0          26214 non-null  datetime64[ns]
 10  fighter_id_0    26214 non-null  object        
 11  bout_id_0       26214 non-null  object        
 12  td_ac_0         14290 non-null  object        
 13  round_id        26214 non-null  object        
 14  inst_id_0       26214 non-null  object        
 15  bo

In [58]:
stat = 'td'

In [59]:
data

Unnamed: 0,bout_link_0,fighter_link_0,td_a_0,td_s_0,Date_0,round_0,Time_0,final_round_0,Timeformat_0,date_0,...,Date_1,round_1,Time_1,final_round_1,Timeformat_1,date_1,fighter_id_1,bout_id_1,td_ac_1,inst_id_1
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7
1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7
2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7
3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7
4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,7,2,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13102,http://www.ufcstats.com/fight-details/ac7ca2ec...,http://www.ufcstats.com/fighter-details/279093...,0,0,"November 12, 1993",1,0:59,1,No Time Limit,1993-11-12,...,"November 12, 1993",1,0:59,1,No Time Limit,1993-11-12,598a58db87b890ee,ac7ca2ec38b96c1a,,ac7ca2ec38b96c1a598a58db87b890ee
13103,http://www.ufcstats.com/fight-details/46acd54c...,http://www.ufcstats.com/fighter-details/46c8ec...,0,0,"November 12, 1993",1,1:49,1,No Time Limit,1993-11-12,...,"November 12, 1993",1,1:49,1,No Time Limit,1993-11-12,63b65af1c5cb02cb,46acd54cc0c905fb,0.5,46acd54cc0c905fb63b65af1c5cb02cb
13104,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/429e7d...,1,1,"November 12, 1993",1,2:18,1,No Time Limit,1993-11-12,...,"November 12, 1993",1,2:18,1,No Time Limit,1993-11-12,a5c53b3ddb31cc7d,cecdc0da584274b9,,cecdc0da584274b9a5c53b3ddb31cc7d
13105,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,0,0,"November 12, 1993",1,4:20,1,No Time Limit,1993-11-12,...,"November 12, 1993",1,4:20,1,No Time Limit,1993-11-12,d3711d3784b76255,2d2bbc86e941e05c,,2d2bbc86e941e05cd3711d3784b76255


### Calculate defense by subtracting the opponents accuracy from 1

In [63]:
data[stat+'_de_0'] = 1 - data[stat+'_ac_1']

In [64]:
data

Unnamed: 0,bout_link_0,fighter_link_0,td_a_0,td_s_0,Date_0,round_0,Time_0,final_round_0,Timeformat_0,date_0,...,Time_1,final_round_1,Timeformat_1,date_1,fighter_id_1,bout_id_1,td_ac_1,inst_id_1,td_de,td_de_0
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,
1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,
2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,
3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,
4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,7,2,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,...,5:00,5,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13102,http://www.ufcstats.com/fight-details/ac7ca2ec...,http://www.ufcstats.com/fighter-details/279093...,0,0,"November 12, 1993",1,0:59,1,No Time Limit,1993-11-12,...,0:59,1,No Time Limit,1993-11-12,598a58db87b890ee,ac7ca2ec38b96c1a,,ac7ca2ec38b96c1a598a58db87b890ee,,
13103,http://www.ufcstats.com/fight-details/46acd54c...,http://www.ufcstats.com/fighter-details/46c8ec...,0,0,"November 12, 1993",1,1:49,1,No Time Limit,1993-11-12,...,1:49,1,No Time Limit,1993-11-12,63b65af1c5cb02cb,46acd54cc0c905fb,0.5,46acd54cc0c905fb63b65af1c5cb02cb,0.5,0.5
13104,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/429e7d...,1,1,"November 12, 1993",1,2:18,1,No Time Limit,1993-11-12,...,2:18,1,No Time Limit,1993-11-12,a5c53b3ddb31cc7d,cecdc0da584274b9,,cecdc0da584274b9a5c53b3ddb31cc7d,,
13105,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,0,0,"November 12, 1993",1,4:20,1,No Time Limit,1993-11-12,...,4:20,1,No Time Limit,1993-11-12,d3711d3784b76255,2d2bbc86e941e05c,,2d2bbc86e941e05cd3711d3784b76255,,


Clean the columns for continuity

### Calculating significant strike differential

In [65]:
data[stat+'_a_di_0'] = data[stat+'_a_0'] - data[stat+'_a_1']
data[stat+'_s_di_0'] = data[stat+'_s_0'] - data[stat+'_s_1']

In [66]:
data.reset_index(inplace = True)

In [67]:
data[data['bout_id_0'] == data['bout_id_0'][0]]

Unnamed: 0,index,bout_link_0,fighter_link_0,td_a_0,td_s_0,Date_0,round_0,Time_0,final_round_0,Timeformat_0,...,Timeformat_1,date_1,fighter_id_1,bout_id_1,td_ac_1,inst_id_1,td_de,td_de_0,td_a_di_0,td_s_di_0
0,0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,2,0
1,1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,0,0
2,2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,2,0
3,3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,2,0
4,4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,7,2,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,7,2
13107,0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.0,11f715fa5e825e51e1147d3d2dabe1ce,1.0,1.0,-2,0
13108,1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,,11f715fa5e825e51e1147d3d2dabe1ce,,,0,0
13109,2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.0,11f715fa5e825e51e1147d3d2dabe1ce,1.0,1.0,-2,0
13110,3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.0,11f715fa5e825e51e1147d3d2dabe1ce,1.0,1.0,-2,0
13111,4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),...,5 Rnd (5-5-5-5-5),2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.285714,11f715fa5e825e51e1147d3d2dabe1ce,0.714286,0.714286,-7,-2


### Calculate SSA_P1M
This is the Significant Strike Attempts per Minute. 

### Create round length column
First we need to have a time for each round. The current time column only measures the time on the clock at which the fight was stopped. This number can only be used for the last round. We'll group all the bout_ids and create a datafram that matches the highest round_id value to the time column.

We'll only look at fights with five minute rounds to make the calculation easier.

In [83]:
non_standard_rounds = ['No Time Limit', '1 Rnd + OT (31-5)', '1 Rnd (20)', '1 Rnd (30)',
                   '1 Rnd + OT (30-5)', '1 Rnd + OT (30-3)', '1 Rnd (15)', '1 Rnd (18)',
                   '1 Rnd + OT (27-3)', '1 Rnd (10)', '1 Rnd + 2OT (15-3-3)',
                   '1 Rnd + OT (12-3)', '1 Rnd + 2OT (24-3-3)', '1 Rnd + OT (15-3)',
                   '1 Rnd (12)']

mask = data.Timeformat_0.map(lambda x: functions.black_list_entry(x, non_standard_rounds))
data = data[mask]
data.Timeformat_0.value_counts()

3 Rnd (5-5-5)           22652
5 Rnd (5-5-5-5-5)        2964
3 Rnd + OT (5-5-5-5)      106
2 Rnd (5-5)                50
Name: Timeformat_0, dtype: int64

We still have most of our fights, so we'll use this as our timeframe from now on.

In [84]:
bout_groups = data.groupby('bout_id_0')
round_id = bout_groups.round_id.max()
round_length = bout_groups.Time_0.max()

final_round_lengths = pd.DataFrame(dict(round_id = round_id, round_length = round_length))

In [85]:
final_round_lengths.set_index('round_id', inplace=True)
final_round_lengths

Unnamed: 0_level_0,round_length
round_id,Unnamed: 1_level_1
000da3152b7b5ab13,5:00
0019ec81fd706ade3,5:00
0027e179b743c86c3,3:12
002921976d27b7da1,4:13
002c1562708ac3071,4:06
...,...
ffd3e3d37cba32da3,5:00
ffe4379d6bd1e82b2,1:43
ffe629a5232a878b1,1:59
ffea776913451b6d1,2:37


In [86]:
new_data = data.join(final_round_lengths, on='round_id', how='outer')
new_data.head(15)

Unnamed: 0,index,bout_link_0,fighter_link_0,td_a_0,td_s_0,Date_0,round_0,Time_0,final_round_0,Timeformat_0,...,date_1,fighter_id_1,bout_id_1,td_ac_1,inst_id_1,td_de,td_de_0,td_a_di_0,td_s_di_0,round_length
0,0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,2,0,
13107,0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.0,11f715fa5e825e51e1147d3d2dabe1ce,1.0,1.0,-2,0,
1,1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,0,0,
13108,1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,,11f715fa5e825e51e1147d3d2dabe1ce,,,0,0,
2,2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,2,0,
13109,2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.0,11f715fa5e825e51e1147d3d2dabe1ce,1.0,1.0,-2,0,
3,3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,2,0,
13110,3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",4,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.0,11f715fa5e825e51e1147d3d2dabe1ce,1.0,1.0,-2,0,
4,4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,7,2,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,9ce6d5a03af801b7,11f715fa5e825e51,,11f715fa5e825e519ce6d5a03af801b7,,,7,2,5:00
13111,4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",5,5:00,5,5 Rnd (5-5-5-5-5),...,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51,0.285714,11f715fa5e825e51e1147d3d2dabe1ce,0.714286,0.714286,-7,-2,5:00


 Now that we have the final rounds filled in, ever null value should be '5:00'.

In [87]:
new_data.round_length = new_data.round_length.fillna('5:00')

In [88]:
new_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25772 entries, 0 to 25992
Data columns (total 35 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   index           25772 non-null  int64         
 1   bout_link_0     25772 non-null  object        
 2   fighter_link_0  25772 non-null  object        
 3   td_a_0          25772 non-null  int64         
 4   td_s_0          25772 non-null  int64         
 5   Date_0          25772 non-null  object        
 6   round_0         25772 non-null  object        
 7   Time_0          25772 non-null  object        
 8   final_round_0   25772 non-null  int64         
 9   Timeformat_0    25772 non-null  object        
 10  date_0          25772 non-null  datetime64[ns]
 11  fighter_id_0    25772 non-null  object        
 12  bout_id_0       25772 non-null  object        
 13  td_ac_0         14055 non-null  object        
 14  round_id        25772 non-null  object        
 15  in

### Calculate assa-m

before calculating, we need to convert the round length column into a timedelta object.

In [93]:
new_data.round_length = '00:0' + new_data.round_length

new_data.round_length = pd.to_timedelta(new_data.round_length)

new_data.round_length.describe()

TypeError: can only concatenate str (not "TimedeltaArray") to str

In [91]:
new_data['minutes'] = new_data.round_length.map(lambda x: x.total_seconds()/60)

In [99]:
new_data[stat+'_a_p1m_0'] = new_data[stat+'_a_0'] / new_data.minutes
new_data[stat+'_s_p1m_0'] = new_data[stat+'_s_0'] / new_data.minutes
new_data

Unnamed: 0,index,bout_link_0,fighter_link_0,td_a_0,td_s_0,Date_0,round_0,Time_0,final_round_0,Timeformat_0,...,td_de,td_de_0,td_a_di_0,td_s_di_0,round_length,minutes,td_a_p1m,td_s_p1m,td_a_p1m_0,td_s_p1m_0
0,0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),...,,,2,0,00:05:00,5.00,0.4,0.0,0.4,0.0
13107,0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",1,5:00,5,5 Rnd (5-5-5-5-5),...,1,1,-2,0,00:05:00,5.00,0.0,0.0,0.0,0.0
1,1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),...,,,0,0,00:05:00,5.00,0.0,0.0,0.0,0.0
13108,1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/9ce6d5...,0,0,"July 25, 2020",2,5:00,5,5 Rnd (5-5-5-5-5),...,,,0,0,00:05:00,5.00,0.0,0.0,0.0,0.0
2,2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,2,0,"July 25, 2020",3,5:00,5,5 Rnd (5-5-5-5-5),...,,,2,0,00:05:00,5.00,0.4,0.0,0.4,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25990,12883,http://www.ufcstats.com/fight-details/c6f85419...,http://www.ufcstats.com/fighter-details/a54a35...,0,0,"July 16, 1999",1,2:51,1,2 Rnd (5-5),...,,,0,0,00:02:51,2.85,0.0,0.0,0.0,0.0
12884,12884,http://www.ufcstats.com/fight-details/1db8bed8...,http://www.ufcstats.com/fighter-details/c2a762...,4,0,"July 16, 1999",1,5:00,2,2 Rnd (5-5),...,0.5,0.5,2,-1,00:05:00,5.00,0.8,0.0,0.8,0.0
25991,12884,http://www.ufcstats.com/fight-details/1db8bed8...,http://www.ufcstats.com/fighter-details/911fb2...,2,1,"July 16, 1999",1,5:00,2,2 Rnd (5-5),...,1,1,-2,1,00:05:00,5.00,0.4,0.2,0.4,0.2
12885,12885,http://www.ufcstats.com/fight-details/1db8bed8...,http://www.ufcstats.com/fighter-details/c2a762...,1,0,"July 16, 1999",2,5:00,2,2 Rnd (5-5),...,,,1,0,00:05:00,5.00,0.2,0.0,0.2,0.0


In [98]:
new_data.td_a.describe()

AttributeError: 'DataFrame' object has no attribute 'td_a'

In [81]:
data=new_data

## Clean up
Clean the new dataframe so that it has all of the info in the correct places and then I'm going to merge it with the original strikes table.

In [82]:
data.columns

Index(['index', 'bout_link', 'fighter_link', 'ssa', 'sss', 'date', 'round',
       'ss_ac', 'fighter_id', 'bout_id', 'round_id', 'inst_id', 'ssa_1',
       'sss_1', 'ss_ac_1', 'time', 'timeformat', 'fighter_id_1', 'ss_de',
       'sss_di', 'ssa_di', 'round_length', 'minutes', 'ssa_p1m', 'sss_p1m'],
      dtype='object')

In [83]:
to_drop = ['index', 'round_length', 'timeformat', 'time']
data.drop(to_drop, axis=1, inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25772 entries, 0 to 25992
Data columns (total 21 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   bout_link     25772 non-null  object        
 1   fighter_link  25772 non-null  object        
 2   ssa           25772 non-null  int64         
 3   sss           25772 non-null  int64         
 4   date          25772 non-null  datetime64[ns]
 5   round         25772 non-null  object        
 6   ss_ac         25772 non-null  float64       
 7   fighter_id    25772 non-null  object        
 8   bout_id       25772 non-null  object        
 9   round_id      25772 non-null  object        
 10  inst_id       25772 non-null  object        
 11  ssa_1         25772 non-null  int64         
 12  sss_1         25772 non-null  int64         
 13  ss_ac_1       25772 non-null  float64       
 14  fighter_id_1  25772 non-null  object        
 15  ss_de         25772 non-null  float6

In [84]:
data.columns

Index(['bout_link', 'fighter_link', 'ssa', 'sss', 'date', 'round', 'ss_ac',
       'fighter_id', 'bout_id', 'round_id', 'inst_id', 'ssa_1', 'sss_1',
       'ss_ac_1', 'fighter_id_1', 'ss_de', 'sss_di', 'ssa_di', 'minutes',
       'ssa_p1m', 'sss_p1m'],
      dtype='object')

In [85]:
data = data.loc[:,['date', 'bout_link', 'fighter_link', 'round', 'minutes',
            'ssa', 'sss', 'ss_ac', 'ss_de', 'sss_di', 'ssa_di', 'ssa_p1m', 'sss_p1m',
            'fighter_id_1', 'ssa_1', 'sss_1', 'ss_ac_1']]

In [86]:
data.to_csv('../../data/ufcstats_data/fighter_round_performance.csv', index=False)