# Purpose
The next model iteration will incorporate significant strike accuracy (SSA).
SSA is the percentage of significant strikes that land out of the total number attempted.
This might give us good predictive power because it tells us how reliable a striker is.
Accuracy is one of the most commonly used MMA statistics.

### Result
This notebook will create a dataframe that will have a row for each bout and will include:
 - Features:
     - average successful significant strikes for each fighter (ASSS)
     - average significant strike accuracy (ASSA)
 - Target:
     - total successful significant strikes for a sing bout (TSSS_bout)
 

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join(os.pardir, os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
from sqlalchemy import create_engine
from src import local
from src import functions

In [2]:
# Credentials
USER = local.user 
PASS = local.password
HOST = local.host
PORT = local.port

#create engine
engine = create_engine(f'postgresql://{USER}:{PASS}@{HOST}:{PORT}/match_finder')

# Get data from postgres database
### Join tables

- get the date from the events table
- use the bouts table to join the dates to the general table
- use the general table to join the bouts with the fighters


#### Accuracy case statement

In [3]:
accuracy_column = """
CASE 
    WHEN (sig_str_attempted > 0) THEN (CAST(sig_str_successful AS FLOAT)/CAST(sig_str_attempted AS FLOAT))
    ELSE 0
END AS accuracy
"""

In [4]:
query = """
SELECT bout_link, fighter_link, sig_str_attempted, sig_str_successful, "Date", round,
"""+accuracy_column+"""
FROM strikes_cleaned
JOIN bouts ON bouts.link = strikes_cleaned.bout_link
JOIN events ON events.link = bouts.event_link
"""

data = pd.read_sql(query, engine)

In [5]:
data

Unnamed: 0,bout_link,fighter_link,sig_str_attempted,sig_str_successful,Date,round,accuracy
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
...,...,...,...,...,...,...,...
26447,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/a5c53b...,0,0,"November 12, 1993",1,0.000000
26448,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,27,15,"November 12, 1993",1,0.555556
26449,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/d3711d...,28,12,"November 12, 1993",1,0.428571
26450,http://www.ufcstats.com/fight-details/567a09fd...,http://www.ufcstats.com/fighter-details/279093...,5,3,"November 12, 1993",1,0.600000


In [6]:
data['round'] = data['round'].map(str)
data

Unnamed: 0,bout_link,fighter_link,sig_str_attempted,sig_str_successful,Date,round,accuracy
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667
...,...,...,...,...,...,...,...
26447,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/a5c53b...,0,0,"November 12, 1993",1,0.000000
26448,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,27,15,"November 12, 1993",1,0.555556
26449,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/d3711d...,28,12,"November 12, 1993",1,0.428571
26450,http://www.ufcstats.com/fight-details/567a09fd...,http://www.ufcstats.com/fighter-details/279093...,5,3,"November 12, 1993",1,0.600000


In [7]:
data = functions.get_all_ids(data, event=False)

In [8]:
data.drop_duplicates(inplace=True)

In [9]:
data

Unnamed: 0,bout_link,fighter_link,sig_str_attempted,sig_str_successful,Date,round,accuracy,fighter_id,bout_id
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667,e1147d3d2dabe1ce,11f715fa5e825e51
6,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,15,"July 25, 2020",2,0.500000,e1147d3d2dabe1ce,11f715fa5e825e51
12,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,32,13,"July 25, 2020",3,0.406250,e1147d3d2dabe1ce,11f715fa5e825e51
18,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,34,13,"July 25, 2020",4,0.382353,e1147d3d2dabe1ce,11f715fa5e825e51
24,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,31,17,"July 25, 2020",5,0.548387,e1147d3d2dabe1ce,11f715fa5e825e51
...,...,...,...,...,...,...,...,...,...
26447,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/a5c53b...,0,0,"November 12, 1993",1,0.000000,a5c53b3ddb31cc7d,cecdc0da584274b9
26448,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,27,15,"November 12, 1993",1,0.555556,598a58db87b890ee,2d2bbc86e941e05c
26449,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/d3711d...,28,12,"November 12, 1993",1,0.428571,d3711d3784b76255,2d2bbc86e941e05c
26450,http://www.ufcstats.com/fight-details/567a09fd...,http://www.ufcstats.com/fighter-details/279093...,5,3,"November 12, 1993",1,0.600000,279093302a6f44b3,567a09fd200cfa05


# Data cleaning

## Convert Date to datetime

In [10]:
data['Date'] = data.Date.map(pd.to_datetime)

## Create fighter-bout instance dataframe

A fighter-bout instance represents one fighter in one bout.
 - The same fighter has exactly one fighter-bout instance for every single bout he has been in. 
 - Every bout has exactly two fighter-bout instances, one for each fighter in the bout. 
  
In this case a fighter-bout instance is assigned a unique identifier comprised of the bout_id combined with the fighter_link.

In [57]:
data['fighter_bout_inst'] = data['bout_id'] + data['fighter_id']
fighter_bout_inst_group = data.groupby(['fighter_bout_inst'])

In this table, each row represents one match.

Features:
asss: Average Successful Significant Strikes. The average number of significant strikes that the fighter has landed in each round of his career before the date of the current bout.
assa: Average Significant Strike Accuracy. The average percentage of significant strikes that the fighter has lands in each round of his career before the date of the current bout.

Target
sss_bout: Successful Significant Strikes landed in the current bout.

In [58]:
sss_bout = fighter_bout_inst_group.sig_str_successful.sum()

date = fighter_bout_inst_group['Date'].max()

fighter_link = fighter_bout_inst_group['fighter_link'].max()

bout_id = fighter_bout_inst_group['bout_id'].max()

### Now we can create our fighter_bout_inst table

In [59]:
fighter_bout_inst = pd.DataFrame(dict(bout_id = bout_id, fighter_link = fighter_link, date = date, sss_bout = sss_bout))
fighter_bout_inst

Unnamed: 0_level_0,bout_id,fighter_link,date,sss_bout
fighter_bout_inst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000da3152b7b5ab16da99156486ed6c2,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/6da991...,2006-07-08,35
000da3152b7b5ab1d1a1314976c50bef,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/d1a131...,2006-07-08,19
0019ec81fd706ade326f94d6cfb1bf25,0019ec81fd706ade,http://www.ufcstats.com/fighter-details/326f94...,2019-10-18,26
0019ec81fd706ade85073dbd1be65ed9,0019ec81fd706ade,http://www.ufcstats.com/fighter-details/85073d...,2019-10-18,54
0027e179b743c86c3aa794cbe1e3484b,0027e179b743c86c,http://www.ufcstats.com/fighter-details/3aa794...,2015-03-14,9
...,...,...,...,...
ffe629a5232a878bb361180739bed4b0,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/b36118...,2003-06-06,0
ffea776913451b6d22a92d7f62195791,ffea776913451b6d,http://www.ufcstats.com/fighter-details/22a92d...,2015-02-28,11
ffea776913451b6d75e5fec9f72910ef,ffea776913451b6d,http://www.ufcstats.com/fighter-details/75e5fe...,2015-02-28,2
fffa21388cdd78b75d7bdab5e03e3216,fffa21388cdd78b7,http://www.ufcstats.com/fighter-details/5d7bda...,2013-10-19,57


## Calculate ASSA

For this I will need a function that takes in a fighter_link and a date and calculates the ASSA up until that date.

In [60]:
def calculate_metric_average(metric, fighter_link, date, df):
    """
    input: fighter_link - str, a unique fighter identifier
           date - datetime64, cut off date, metric will be calculated using every fight up until this date
           df - dataframe, a fighter-instance table containing the metric
    output: float, the metric for the fighter up until the date
    """
    fighter_history = df[(df['fighter_link']==fighter_link)&
                         (df['Date']<date)]
    fighter_metric = fighter_history[metric].mean()
    return fighter_metric

In [61]:
assa = fighter_bout_inst.apply(lambda row: calculate_metric_average('accuracy', row['fighter_link'], row['date'], data), axis=1)

In [62]:
assa

fighter_bout_inst
000da3152b7b5ab16da99156486ed6c2    0.601620
000da3152b7b5ab1d1a1314976c50bef    0.281351
0019ec81fd706ade326f94d6cfb1bf25         NaN
0019ec81fd706ade85073dbd1be65ed9    0.568124
0027e179b743c86c3aa794cbe1e3484b    0.359259
                                      ...   
ffe629a5232a878bb361180739bed4b0    0.493956
ffea776913451b6d22a92d7f62195791    0.333091
ffea776913451b6d75e5fec9f72910ef    0.343625
fffa21388cdd78b75d7bdab5e03e3216    0.446893
fffa21388cdd78b7c80095f6092271a7    0.489039
Length: 11362, dtype: float64

In [63]:
fighter_bout_inst['assa'] = assa

In [65]:
asss = fighter_bout_inst.apply(lambda row: calculate_metric_average('sig_str_successful', row['fighter_link'], row['date'], data), axis=1)

In [66]:
fighter_bout_inst['asss'] = asss

In [67]:
fighter_bout_inst

Unnamed: 0_level_0,bout_id,fighter_link,date,sss_bout,assa,asss
fighter_bout_inst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000da3152b7b5ab16da99156486ed6c2,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/6da991...,2006-07-08,35,0.601620,7.750000
000da3152b7b5ab1d1a1314976c50bef,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/d1a131...,2006-07-08,19,0.281351,9.800000
0019ec81fd706ade326f94d6cfb1bf25,0019ec81fd706ade,http://www.ufcstats.com/fighter-details/326f94...,2019-10-18,26,,
0019ec81fd706ade85073dbd1be65ed9,0019ec81fd706ade,http://www.ufcstats.com/fighter-details/85073d...,2019-10-18,54,0.568124,19.750000
0027e179b743c86c3aa794cbe1e3484b,0027e179b743c86c,http://www.ufcstats.com/fighter-details/3aa794...,2015-03-14,9,0.359259,6.666667
...,...,...,...,...,...,...
ffe629a5232a878bb361180739bed4b0,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/b36118...,2003-06-06,0,0.493956,8.055556
ffea776913451b6d22a92d7f62195791,ffea776913451b6d,http://www.ufcstats.com/fighter-details/22a92d...,2015-02-28,11,0.333091,16.266667
ffea776913451b6d75e5fec9f72910ef,ffea776913451b6d,http://www.ufcstats.com/fighter-details/75e5fe...,2015-02-28,2,0.343625,9.278689
fffa21388cdd78b75d7bdab5e03e3216,fffa21388cdd78b7,http://www.ufcstats.com/fighter-details/5d7bda...,2013-10-19,57,0.446893,9.500000


### Remove debut fights
There isn't any historical data for fighters with debut fights, so for now we will not use them in our analysis.

In [68]:
def black_list_entry(entry, black_list):
    return entry not in black_list

In [69]:
debut_bouts = list(fighter_bout_inst[fighter_bout_inst.asss.isna()].bout_id.unique())

mask = fighter_bout_inst['bout_id'].map(lambda x: black_list_entry(x, debut_bouts))

In [70]:
fighter_bout_inst = fighter_bout_inst[mask]

In [71]:
fighter_bout_inst

Unnamed: 0_level_0,bout_id,fighter_link,date,sss_bout,assa,asss
fighter_bout_inst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000da3152b7b5ab16da99156486ed6c2,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/6da991...,2006-07-08,35,0.601620,7.750000
000da3152b7b5ab1d1a1314976c50bef,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/d1a131...,2006-07-08,19,0.281351,9.800000
0027e179b743c86c3aa794cbe1e3484b,0027e179b743c86c,http://www.ufcstats.com/fighter-details/3aa794...,2015-03-14,9,0.359259,6.666667
0027e179b743c86c91ea901c458e95dd,0027e179b743c86c,http://www.ufcstats.com/fighter-details/91ea90...,2015-03-14,22,0.598198,12.900000
002921976d27b7dab4ad3a06ee4d660c,002921976d27b7da,http://www.ufcstats.com/fighter-details/b4ad3a...,2014-12-13,17,0.774253,21.444444
...,...,...,...,...,...,...
ffe629a5232a878bb361180739bed4b0,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/b36118...,2003-06-06,0,0.493956,8.055556
ffea776913451b6d22a92d7f62195791,ffea776913451b6d,http://www.ufcstats.com/fighter-details/22a92d...,2015-02-28,11,0.333091,16.266667
ffea776913451b6d75e5fec9f72910ef,ffea776913451b6d,http://www.ufcstats.com/fighter-details/75e5fe...,2015-02-28,2,0.343625,9.278689
fffa21388cdd78b75d7bdab5e03e3216,fffa21388cdd78b7,http://www.ufcstats.com/fighter-details/5d7bda...,2013-10-19,57,0.446893,9.500000


## Create the final dataframe

First I will get a list af all bout ids. Then I will create a dataframe with the first row and another dataframe with the second row. Then I will join those dataframes along the column axis.

In [72]:
fighter_bout_inst['inst_id'] = fighter_bout_inst.index

fighter_bout_inst

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fighter_bout_inst['inst_id'] = fighter_bout_inst.index


Unnamed: 0_level_0,bout_id,fighter_link,date,sss_bout,assa,asss,inst_id
fighter_bout_inst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
000da3152b7b5ab16da99156486ed6c2,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/6da991...,2006-07-08,35,0.601620,7.750000,000da3152b7b5ab16da99156486ed6c2
000da3152b7b5ab1d1a1314976c50bef,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/d1a131...,2006-07-08,19,0.281351,9.800000,000da3152b7b5ab1d1a1314976c50bef
0027e179b743c86c3aa794cbe1e3484b,0027e179b743c86c,http://www.ufcstats.com/fighter-details/3aa794...,2015-03-14,9,0.359259,6.666667,0027e179b743c86c3aa794cbe1e3484b
0027e179b743c86c91ea901c458e95dd,0027e179b743c86c,http://www.ufcstats.com/fighter-details/91ea90...,2015-03-14,22,0.598198,12.900000,0027e179b743c86c91ea901c458e95dd
002921976d27b7dab4ad3a06ee4d660c,002921976d27b7da,http://www.ufcstats.com/fighter-details/b4ad3a...,2014-12-13,17,0.774253,21.444444,002921976d27b7dab4ad3a06ee4d660c
...,...,...,...,...,...,...,...
ffe629a5232a878bb361180739bed4b0,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/b36118...,2003-06-06,0,0.493956,8.055556,ffe629a5232a878bb361180739bed4b0
ffea776913451b6d22a92d7f62195791,ffea776913451b6d,http://www.ufcstats.com/fighter-details/22a92d...,2015-02-28,11,0.333091,16.266667,ffea776913451b6d22a92d7f62195791
ffea776913451b6d75e5fec9f72910ef,ffea776913451b6d,http://www.ufcstats.com/fighter-details/75e5fe...,2015-02-28,2,0.343625,9.278689,ffea776913451b6d75e5fec9f72910ef
fffa21388cdd78b75d7bdab5e03e3216,fffa21388cdd78b7,http://www.ufcstats.com/fighter-details/5d7bda...,2013-10-19,57,0.446893,9.500000,fffa21388cdd78b75d7bdab5e03e3216


In [73]:
fighter_0 = list(fighter_bout_inst.groupby('bout_id').inst_id.max())

In [74]:
fighter_1 = list(fighter_bout_inst.groupby('bout_id').inst_id.min())

In [75]:
mask = fighter_bout_inst['inst_id'].map(lambda x: black_list_entry(x, fighter_0))
fighter_bout_inst_1 = fighter_bout_inst[mask]
fighter_bout_inst_1

Unnamed: 0_level_0,bout_id,fighter_link,date,sss_bout,assa,asss,inst_id
fighter_bout_inst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
000da3152b7b5ab16da99156486ed6c2,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/6da991...,2006-07-08,35,0.601620,7.750000,000da3152b7b5ab16da99156486ed6c2
0027e179b743c86c3aa794cbe1e3484b,0027e179b743c86c,http://www.ufcstats.com/fighter-details/3aa794...,2015-03-14,9,0.359259,6.666667,0027e179b743c86c3aa794cbe1e3484b
002921976d27b7dab4ad3a06ee4d660c,002921976d27b7da,http://www.ufcstats.com/fighter-details/b4ad3a...,2014-12-13,17,0.774253,21.444444,002921976d27b7dab4ad3a06ee4d660c
002c1562708ac30722a92d7f62195791,002c1562708ac307,http://www.ufcstats.com/fighter-details/22a92d...,2014-05-24,38,0.314773,18.111111,002c1562708ac30722a92d7f62195791
002cb1bb411c5f6022e47b53e4ceb27c,002cb1bb411c5f60,http://www.ufcstats.com/fighter-details/22e47b...,2006-03-04,21,0.484044,11.222222,002cb1bb411c5f6022e47b53e4ceb27c
...,...,...,...,...,...,...,...
ffbc12e4f821ec683591d0d5d382a381,ffbc12e4f821ec68,http://www.ufcstats.com/fighter-details/3591d0...,2014-02-15,19,0.519991,9.875000,ffbc12e4f821ec683591d0d5d382a381
ffd3e3d37cba32da7413b80dbb0f8f9f,ffd3e3d37cba32da,http://www.ufcstats.com/fighter-details/7413b8...,2014-10-25,26,0.424009,14.250000,ffd3e3d37cba32da7413b80dbb0f8f9f
ffe629a5232a878b08ae5cd9aef7ddd3,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/08ae5c...,2003-06-06,1,0.514747,6.400000,ffe629a5232a878b08ae5cd9aef7ddd3
ffea776913451b6d22a92d7f62195791,ffea776913451b6d,http://www.ufcstats.com/fighter-details/22a92d...,2015-02-28,11,0.333091,16.266667,ffea776913451b6d22a92d7f62195791


In [76]:
mask = fighter_bout_inst['inst_id'].map(lambda x: black_list_entry(x, fighter_1))
fighter_bout_inst_0 = fighter_bout_inst[mask]
fighter_bout_inst_0

Unnamed: 0_level_0,bout_id,fighter_link,date,sss_bout,assa,asss,inst_id
fighter_bout_inst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
000da3152b7b5ab1d1a1314976c50bef,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/d1a131...,2006-07-08,19,0.281351,9.800000,000da3152b7b5ab1d1a1314976c50bef
0027e179b743c86c91ea901c458e95dd,0027e179b743c86c,http://www.ufcstats.com/fighter-details/91ea90...,2015-03-14,22,0.598198,12.900000,0027e179b743c86c91ea901c458e95dd
002921976d27b7daebc1f40e00e0c481,002921976d27b7da,http://www.ufcstats.com/fighter-details/ebc1f4...,2014-12-13,2,0.437190,11.954545,002921976d27b7daebc1f40e00e0c481
002c1562708ac30744470bfd9483c7ad,002c1562708ac307,http://www.ufcstats.com/fighter-details/44470b...,2014-05-24,22,0.486772,7.666667,002c1562708ac30744470bfd9483c7ad
002cb1bb411c5f60d897897060f10a3a,002cb1bb411c5f60,http://www.ufcstats.com/fighter-details/d89789...,2006-03-04,127,0.471745,20.636364,002cb1bb411c5f60d897897060f10a3a
...,...,...,...,...,...,...,...
ffbc12e4f821ec687a703c565ccaa18f,ffbc12e4f821ec68,http://www.ufcstats.com/fighter-details/7a703c...,2014-02-15,16,0.541667,18.000000,ffbc12e4f821ec687a703c565ccaa18f
ffd3e3d37cba32da92a9aa9c93192871,ffd3e3d37cba32da,http://www.ufcstats.com/fighter-details/92a9aa...,2014-10-25,47,0.315385,12.272727,ffd3e3d37cba32da92a9aa9c93192871
ffe629a5232a878bb361180739bed4b0,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/b36118...,2003-06-06,0,0.493956,8.055556,ffe629a5232a878bb361180739bed4b0
ffea776913451b6d75e5fec9f72910ef,ffea776913451b6d,http://www.ufcstats.com/fighter-details/75e5fe...,2015-02-28,2,0.343625,9.278689,ffea776913451b6d75e5fec9f72910ef


In [82]:
model_df = pd.merge(fighter_bout_inst_0, fighter_bout_inst_1, on='bout_id', suffixes=('_0', '_1'))
model_df

Unnamed: 0,bout_id,fighter_link_0,date_0,sss_bout_0,assa_0,asss_0,inst_id_0,fighter_link_1,date_1,sss_bout_1,assa_1,asss_1,inst_id_1
0,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/6da991...,2006-07-08,35,0.601620,7.750000,000da3152b7b5ab16da99156486ed6c2,http://www.ufcstats.com/fighter-details/d1a131...,2006-07-08,19,0.281351,9.800000,000da3152b7b5ab1d1a1314976c50bef
1,0027e179b743c86c,http://www.ufcstats.com/fighter-details/3aa794...,2015-03-14,9,0.359259,6.666667,0027e179b743c86c3aa794cbe1e3484b,http://www.ufcstats.com/fighter-details/91ea90...,2015-03-14,22,0.598198,12.900000,0027e179b743c86c91ea901c458e95dd
2,002921976d27b7da,http://www.ufcstats.com/fighter-details/b4ad3a...,2014-12-13,17,0.774253,21.444444,002921976d27b7dab4ad3a06ee4d660c,http://www.ufcstats.com/fighter-details/ebc1f4...,2014-12-13,2,0.437190,11.954545,002921976d27b7daebc1f40e00e0c481
3,002c1562708ac307,http://www.ufcstats.com/fighter-details/22a92d...,2014-05-24,38,0.314773,18.111111,002c1562708ac30722a92d7f62195791,http://www.ufcstats.com/fighter-details/44470b...,2014-05-24,22,0.486772,7.666667,002c1562708ac30744470bfd9483c7ad
4,002cb1bb411c5f60,http://www.ufcstats.com/fighter-details/22e47b...,2006-03-04,21,0.484044,11.222222,002cb1bb411c5f6022e47b53e4ceb27c,http://www.ufcstats.com/fighter-details/d89789...,2006-03-04,127,0.471745,20.636364,002cb1bb411c5f60d897897060f10a3a
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4033,ffbc12e4f821ec68,http://www.ufcstats.com/fighter-details/3591d0...,2014-02-15,19,0.519991,9.875000,ffbc12e4f821ec683591d0d5d382a381,http://www.ufcstats.com/fighter-details/7a703c...,2014-02-15,16,0.541667,18.000000,ffbc12e4f821ec687a703c565ccaa18f
4034,ffd3e3d37cba32da,http://www.ufcstats.com/fighter-details/7413b8...,2014-10-25,26,0.424009,14.250000,ffd3e3d37cba32da7413b80dbb0f8f9f,http://www.ufcstats.com/fighter-details/92a9aa...,2014-10-25,47,0.315385,12.272727,ffd3e3d37cba32da92a9aa9c93192871
4035,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/08ae5c...,2003-06-06,1,0.514747,6.400000,ffe629a5232a878b08ae5cd9aef7ddd3,http://www.ufcstats.com/fighter-details/b36118...,2003-06-06,0,0.493956,8.055556,ffe629a5232a878bb361180739bed4b0
4036,ffea776913451b6d,http://www.ufcstats.com/fighter-details/22a92d...,2015-02-28,11,0.333091,16.266667,ffea776913451b6d22a92d7f62195791,http://www.ufcstats.com/fighter-details/75e5fe...,2015-02-28,2,0.343625,9.278689,ffea776913451b6d75e5fec9f72910ef


## Creating tsss_bout

tsss_bout: Total Successful Significant Strikes for the Bout. This metric measure the combined total number of significant strikes in a bout.

In [83]:
model_df['tsss_bout'] = model_df['sss_bout_0'] + model_df['sss_bout_1']
model_df

Unnamed: 0,bout_id,fighter_link_0,date_0,sss_bout_0,assa_0,asss_0,inst_id_0,fighter_link_1,date_1,sss_bout_1,assa_1,asss_1,inst_id_1,tsss_bout
0,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/6da991...,2006-07-08,35,0.601620,7.750000,000da3152b7b5ab16da99156486ed6c2,http://www.ufcstats.com/fighter-details/d1a131...,2006-07-08,19,0.281351,9.800000,000da3152b7b5ab1d1a1314976c50bef,54
1,0027e179b743c86c,http://www.ufcstats.com/fighter-details/3aa794...,2015-03-14,9,0.359259,6.666667,0027e179b743c86c3aa794cbe1e3484b,http://www.ufcstats.com/fighter-details/91ea90...,2015-03-14,22,0.598198,12.900000,0027e179b743c86c91ea901c458e95dd,31
2,002921976d27b7da,http://www.ufcstats.com/fighter-details/b4ad3a...,2014-12-13,17,0.774253,21.444444,002921976d27b7dab4ad3a06ee4d660c,http://www.ufcstats.com/fighter-details/ebc1f4...,2014-12-13,2,0.437190,11.954545,002921976d27b7daebc1f40e00e0c481,19
3,002c1562708ac307,http://www.ufcstats.com/fighter-details/22a92d...,2014-05-24,38,0.314773,18.111111,002c1562708ac30722a92d7f62195791,http://www.ufcstats.com/fighter-details/44470b...,2014-05-24,22,0.486772,7.666667,002c1562708ac30744470bfd9483c7ad,60
4,002cb1bb411c5f60,http://www.ufcstats.com/fighter-details/22e47b...,2006-03-04,21,0.484044,11.222222,002cb1bb411c5f6022e47b53e4ceb27c,http://www.ufcstats.com/fighter-details/d89789...,2006-03-04,127,0.471745,20.636364,002cb1bb411c5f60d897897060f10a3a,148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4033,ffbc12e4f821ec68,http://www.ufcstats.com/fighter-details/3591d0...,2014-02-15,19,0.519991,9.875000,ffbc12e4f821ec683591d0d5d382a381,http://www.ufcstats.com/fighter-details/7a703c...,2014-02-15,16,0.541667,18.000000,ffbc12e4f821ec687a703c565ccaa18f,35
4034,ffd3e3d37cba32da,http://www.ufcstats.com/fighter-details/7413b8...,2014-10-25,26,0.424009,14.250000,ffd3e3d37cba32da7413b80dbb0f8f9f,http://www.ufcstats.com/fighter-details/92a9aa...,2014-10-25,47,0.315385,12.272727,ffd3e3d37cba32da92a9aa9c93192871,73
4035,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/08ae5c...,2003-06-06,1,0.514747,6.400000,ffe629a5232a878b08ae5cd9aef7ddd3,http://www.ufcstats.com/fighter-details/b36118...,2003-06-06,0,0.493956,8.055556,ffe629a5232a878bb361180739bed4b0,1
4036,ffea776913451b6d,http://www.ufcstats.com/fighter-details/22a92d...,2015-02-28,11,0.333091,16.266667,ffea776913451b6d22a92d7f62195791,http://www.ufcstats.com/fighter-details/75e5fe...,2015-02-28,2,0.343625,9.278689,ffea776913451b6d75e5fec9f72910ef,13


In [84]:
model_df = model_df.loc[:,['asss_0', 'assa_0', 'asss_1', 'assa_1', 'tsss_bout']]

In [85]:
model_df

Unnamed: 0,asss_0,assa_0,asss_1,assa_1,tsss_bout
0,7.750000,0.601620,9.800000,0.281351,54
1,6.666667,0.359259,12.900000,0.598198,31
2,21.444444,0.774253,11.954545,0.437190,19
3,18.111111,0.314773,7.666667,0.486772,60
4,11.222222,0.484044,20.636364,0.471745,148
...,...,...,...,...,...
4033,9.875000,0.519991,18.000000,0.541667,35
4034,14.250000,0.424009,12.272727,0.315385,73
4035,6.400000,0.514747,8.055556,0.493956,1
4036,16.266667,0.333091,9.278689,0.343625,13


In [87]:
fsm_dataframe.to_csv('../../data/model_2_data.csv')