# Purpose
This notebook details how to calculate striking defense (SD). SD is the percentage of 
strikes that the fighter's opponent attempted but did not successfully land. Striking
Defense allows us to measure how well a fighter can avoid hits. It can also indicate
how good a fighters striking ability is in general, as defense is considered as, if
not more, important than offense.

### Result
This notebook will create a dataframe that will have a row for each bout and will include:
 - Features:
     - average successful significant strikes for each fighter (ASSS)
     - average significant strike accuracy (ASSA)
     - average significant strike defense (ASSD)
 - Target:
     - total successful significant strikes for a sing bout (TSSS_bout)
 

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join(os.pardir, os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
from sqlalchemy import create_engine
from src import local
from src import functions

In [2]:
# Credentials
USER = local.user 
PASS = local.password
HOST = local.host
PORT = local.port

#create engine
engine = create_engine(f'postgresql://{USER}:{PASS}@{HOST}:{PORT}/match_finder')

# Get data from postgres database
### Join tables

- get the date from the events table
- use the bouts table to join the dates to the general table
- use the general table to join the bouts with the fighters


#### Accuracy case statement

In [25]:
accuracy_column = """
CASE 
    WHEN (sig_str_a > 0) THEN (CAST(sig_str_s AS FLOAT)/CAST(sig_str_a AS FLOAT))
    ELSE 0
END AS accuracy
"""

In [26]:
query = """
SELECT bout_link, fighter_link, sig_str_a, sig_str_s, "Date", round,
"""+accuracy_column+"""
FROM strikes_cleaned
JOIN bouts ON bouts.link = strikes_cleaned.bout_link
JOIN events ON events.link = bouts.event_link
"""

data = pd.read_sql(query, engine)

In [27]:
data = functions.format_data(data, event=False)

In [28]:
data

Unnamed: 0,bout_link,fighter_link,sig_str_a,sig_str_s,Date,round,accuracy,date,fighter_id,bout_id
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,"July 25, 2020",1,0.366667,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
6,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,15,"July 25, 2020",2,0.500000,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
12,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,32,13,"July 25, 2020",3,0.406250,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
18,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,34,13,"July 25, 2020",4,0.382353,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
24,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,31,17,"July 25, 2020",5,0.548387,2020-07-25,e1147d3d2dabe1ce,11f715fa5e825e51
...,...,...,...,...,...,...,...,...,...,...
26447,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/a5c53b...,0,0,"November 12, 1993",1,0.000000,1993-11-12,a5c53b3ddb31cc7d,cecdc0da584274b9
26448,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,27,15,"November 12, 1993",1,0.555556,1993-11-12,598a58db87b890ee,2d2bbc86e941e05c
26449,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/d3711d...,28,12,"November 12, 1993",1,0.428571,1993-11-12,d3711d3784b76255,2d2bbc86e941e05c
26450,http://www.ufcstats.com/fight-details/567a09fd...,http://www.ufcstats.com/fighter-details/279093...,5,3,"November 12, 1993",1,0.600000,1993-11-12,279093302a6f44b3,567a09fd200cfa05


In order to get the striking defense, we need each column to include the fighters opponent. 

In [7]:
data_0 = functions.merge_fighter_instances(data, rounds=True)
data_1 = functions.merge_fighter_instances(data, rounds=True, flip=True)

data = pd.concat((data_0, data_1))

In [8]:
data.columns

Index(['bout_link_0', 'fighter_link_0', 'sig_str_attempted_0',
       'sig_str_successful_0', 'Date_0', 'round_0', 'accuracy_0',
       'fighter_id_0', 'bout_id_0', 'round_id', 'inst_id_0', 'bout_link_1',
       'fighter_link_1', 'sig_str_attempted_1', 'sig_str_successful_1',
       'Date_1', 'round_1', 'accuracy_1', 'fighter_id_1', 'bout_id_1',
       'inst_id_1'],
      dtype='object')

In [9]:
data = data.loc[:, ['bout_link_0', 'fighter_link_0', 'sig_str_attempted_0',
                'sig_str_successful_0', 'Date_0', 'round_0', 'accuracy_0',
                'fighter_id_0', 'bout_id_0', 'round_id', 'inst_id_0', 
                'sig_str_attempted_1', 'sig_str_successful_1', 'accuracy_1']]

In [10]:
data

Unnamed: 0,bout_link_0,fighter_link_0,sig_str_attempted_0,sig_str_successful_0,Date_0,round_0,accuracy_0,fighter_id_0,bout_id_0,round_id,inst_id_0,sig_str_attempted_1,sig_str_successful_1,accuracy_1
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,2020-07-25,1,0.366667,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e511,11f715fa5e825e51e1147d3d2dabe1ce,24,12,0.500000
1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,15,2020-07-25,2,0.500000,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e512,11f715fa5e825e51e1147d3d2dabe1ce,12,3,0.250000
2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,32,13,2020-07-25,3,0.406250,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e513,11f715fa5e825e51e1147d3d2dabe1ce,21,6,0.285714
3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,34,13,2020-07-25,4,0.382353,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e514,11f715fa5e825e51e1147d3d2dabe1ce,19,8,0.421053
4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,31,17,2020-07-25,5,0.548387,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e515,11f715fa5e825e51e1147d3d2dabe1ce,23,12,0.521739
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13102,http://www.ufcstats.com/fight-details/ac7ca2ec...,http://www.ufcstats.com/fighter-details/279093...,17,11,1993-11-12,1,0.647059,279093302a6f44b3,ac7ca2ec38b96c1a,ac7ca2ec38b96c1a1,ac7ca2ec38b96c1a279093302a6f44b3,3,0,0.000000
13103,http://www.ufcstats.com/fight-details/46acd54c...,http://www.ufcstats.com/fighter-details/46c8ec...,8,4,1993-11-12,1,0.500000,46c8ec317aff28ac,46acd54cc0c905fb,46acd54cc0c905fb1,46acd54cc0c905fb46c8ec317aff28ac,1,1,1.000000
13104,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/429e7d...,3,0,1993-11-12,1,0.000000,429e7d3725852ce9,cecdc0da584274b9,cecdc0da584274b91,cecdc0da584274b9429e7d3725852ce9,0,0,0.000000
13105,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,27,15,1993-11-12,1,0.555556,598a58db87b890ee,2d2bbc86e941e05c,2d2bbc86e941e05c1,2d2bbc86e941e05c598a58db87b890ee,28,12,0.428571


### Calculating significant strike defense can be done by simply subtracting their opponents accuracy from 1

In [11]:
data['ssd_0'] = 1 - data['accuracy_1']

In [12]:
data

Unnamed: 0,bout_link_0,fighter_link_0,sig_str_attempted_0,sig_str_successful_0,Date_0,round_0,accuracy_0,fighter_id_0,bout_id_0,round_id,inst_id_0,sig_str_attempted_1,sig_str_successful_1,accuracy_1,ssd_0
0,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,11,2020-07-25,1,0.366667,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e511,11f715fa5e825e51e1147d3d2dabe1ce,24,12,0.500000,0.500000
1,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,30,15,2020-07-25,2,0.500000,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e512,11f715fa5e825e51e1147d3d2dabe1ce,12,3,0.250000,0.750000
2,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,32,13,2020-07-25,3,0.406250,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e513,11f715fa5e825e51e1147d3d2dabe1ce,21,6,0.285714,0.714286
3,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,34,13,2020-07-25,4,0.382353,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e514,11f715fa5e825e51e1147d3d2dabe1ce,19,8,0.421053,0.578947
4,http://www.ufcstats.com/fight-details/11f715fa...,http://www.ufcstats.com/fighter-details/e1147d...,31,17,2020-07-25,5,0.548387,e1147d3d2dabe1ce,11f715fa5e825e51,11f715fa5e825e515,11f715fa5e825e51e1147d3d2dabe1ce,23,12,0.521739,0.478261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13102,http://www.ufcstats.com/fight-details/ac7ca2ec...,http://www.ufcstats.com/fighter-details/279093...,17,11,1993-11-12,1,0.647059,279093302a6f44b3,ac7ca2ec38b96c1a,ac7ca2ec38b96c1a1,ac7ca2ec38b96c1a279093302a6f44b3,3,0,0.000000,1.000000
13103,http://www.ufcstats.com/fight-details/46acd54c...,http://www.ufcstats.com/fighter-details/46c8ec...,8,4,1993-11-12,1,0.500000,46c8ec317aff28ac,46acd54cc0c905fb,46acd54cc0c905fb1,46acd54cc0c905fb46c8ec317aff28ac,1,1,1.000000,0.000000
13104,http://www.ufcstats.com/fight-details/cecdc0da...,http://www.ufcstats.com/fighter-details/429e7d...,3,0,1993-11-12,1,0.000000,429e7d3725852ce9,cecdc0da584274b9,cecdc0da584274b91,cecdc0da584274b9429e7d3725852ce9,0,0,0.000000,1.000000
13105,http://www.ufcstats.com/fight-details/2d2bbc86...,http://www.ufcstats.com/fighter-details/598a58...,27,15,1993-11-12,1,0.555556,598a58db87b890ee,2d2bbc86e941e05c,2d2bbc86e941e05c1,2d2bbc86e941e05c598a58db87b890ee,28,12,0.428571,0.571429


Clean the columns for continuity

In [13]:
data.columns = ['bout_link', 'fighter_link', 'sig_str_attempted',
       'sig_str_successful', 'Date', 'round', 'accuracy',
       'fighter_id', 'bout_id', 'round_id', 'inst_id',
       'sig_str_attempted_1', 'sig_str_successful_1', 'accuracy_1', 'ssd']

## Create fighter-bout instance dataframe

A fighter-bout instance represents one fighter in one bout.
 - The same fighter has exactly one fighter-bout instance for every single bout he has been in. 
 - Every bout has exactly two fighter-bout instances, one for each fighter in the bout. 
  
In this case a fighter-bout instance is assigned a unique identifier comprised of the bout_id combined with the fighter_link.

In [14]:
fighter_bout_inst = functions.create_fighter_bout_instance_table(data)

In [15]:
fighter_bout_inst

Unnamed: 0_level_0,bout_id,fighter_id,date,sss_bout
fighter_bout_inst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000da3152b7b5ab16da99156486ed6c2,000da3152b7b5ab1,6da99156486ed6c2,2006-07-08,35
000da3152b7b5ab1d1a1314976c50bef,000da3152b7b5ab1,d1a1314976c50bef,2006-07-08,19
0019ec81fd706ade326f94d6cfb1bf25,0019ec81fd706ade,326f94d6cfb1bf25,2019-10-18,26
0019ec81fd706ade85073dbd1be65ed9,0019ec81fd706ade,85073dbd1be65ed9,2019-10-18,54
0027e179b743c86c3aa794cbe1e3484b,0027e179b743c86c,3aa794cbe1e3484b,2015-03-14,9
...,...,...,...,...
ffe629a5232a878bb361180739bed4b0,ffe629a5232a878b,b361180739bed4b0,2003-06-06,0
ffea776913451b6d22a92d7f62195791,ffea776913451b6d,22a92d7f62195791,2015-02-28,11
ffea776913451b6d75e5fec9f72910ef,ffea776913451b6d,75e5fec9f72910ef,2015-02-28,2
fffa21388cdd78b75d7bdab5e03e3216,fffa21388cdd78b7,5d7bdab5e03e3216,2013-10-19,57


## Calculate metrics

The metrics I will using in this notebook are:
 - average successful significant strikes for each fighter (ASSS)
 - average significant strike accuracy (ASSA)
 - average significant strike defense (ASSD)

In [16]:
assa = fighter_bout_inst.apply(lambda row: functions.calculate_metric_average('accuracy', row['fighter_id'], row['date'], data), axis=1)
fighter_bout_inst['assa'] = assa

In [18]:
asss = fighter_bout_inst.apply(lambda row: functions.calculate_metric_average('sig_str_successful', row['fighter_id'], row['date'], data), axis=1)
fighter_bout_inst['asss'] = asss

In [19]:
assd = fighter_bout_inst.apply(lambda row: functions.calculate_metric_average('ssd', row['fighter_id'], row['date'], data), axis=1)
fighter_bout_inst['assd'] = assd

In [20]:
fighter_bout_inst

Unnamed: 0_level_0,bout_id,fighter_id,date,sss_bout,assa,asss,assd
fighter_bout_inst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
000da3152b7b5ab16da99156486ed6c2,000da3152b7b5ab1,6da99156486ed6c2,2006-07-08,35,0.601620,7.750000,0.563305
000da3152b7b5ab1d1a1314976c50bef,000da3152b7b5ab1,d1a1314976c50bef,2006-07-08,19,0.281351,9.800000,0.543474
0019ec81fd706ade326f94d6cfb1bf25,0019ec81fd706ade,326f94d6cfb1bf25,2019-10-18,26,,,
0019ec81fd706ade85073dbd1be65ed9,0019ec81fd706ade,85073dbd1be65ed9,2019-10-18,54,0.568124,19.750000,0.610863
0027e179b743c86c3aa794cbe1e3484b,0027e179b743c86c,3aa794cbe1e3484b,2015-03-14,9,0.359259,6.666667,0.561818
...,...,...,...,...,...,...,...
ffe629a5232a878bb361180739bed4b0,ffe629a5232a878b,b361180739bed4b0,2003-06-06,0,0.493956,8.055556,0.487001
ffea776913451b6d22a92d7f62195791,ffea776913451b6d,22a92d7f62195791,2015-02-28,11,0.333091,16.266667,0.717127
ffea776913451b6d75e5fec9f72910ef,ffea776913451b6d,75e5fec9f72910ef,2015-02-28,2,0.343625,9.278689,0.646161
fffa21388cdd78b75d7bdab5e03e3216,fffa21388cdd78b7,5d7bdab5e03e3216,2013-10-19,57,0.446893,9.500000,0.572856


### Remove debut fights
There isn't any historical data for fighters with debut fights, so for now we will not use them in our analysis.

In [21]:
def black_list_entry(entry, black_list):
    return entry not in black_list

In [25]:
import pickle

In [27]:
debut_bouts = list(fighter_bout_inst[fighter_bout_inst.asss.isna()].bout_id.unique())
pickle.dump(debut_bouts, open('../../src/debut_bouts.pkl', 'wb')) # this will be used when I create a function for this step

In [32]:
mask = fighter_bout_inst['bout_id'].map(lambda x: black_list_entry(x, debut_bouts))

fighter_bout_inst = fighter_bout_inst[mask]

fighter_bout_inst

## Create the final dataframe

First I will get a list af all bout ids. Then I will create a dataframe with the first row and another dataframe with the second row. Then I will join those dataframes along the column axis.

In [35]:
fighter_bout_inst['fighter_id'] = fighter_bout_inst['fighter_link'].map(functions.get_id)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fighter_bout_inst['fighter_id'] = fighter_bout_inst['fighter_link'].map(functions.get_id)


In [37]:
model_df = functions.merge_fighter_instances(fighter_bout_inst)

## Creating tsss_bout

tsss_bout: Total Successful Significant Strikes for the Bout. This metric measure the combined total number of significant strikes in a bout.

In [38]:
model_df['tsss_bout'] = model_df['sss_bout_0'] + model_df['sss_bout_1']
model_df

Unnamed: 0,bout_id,fighter_link_0,date_0,sss_bout_0,assa_0,asss_0,assd_0,fighter_id_0,inst_id_0,fighter_link_1,date_1,sss_bout_1,assa_1,asss_1,assd_1,fighter_id_1,inst_id_1,tsss_bout
0,000da3152b7b5ab1,http://www.ufcstats.com/fighter-details/d1a131...,2006-07-08,19,0.281351,9.800000,0.543474,d1a1314976c50bef,000da3152b7b5ab1d1a1314976c50bef,http://www.ufcstats.com/fighter-details/6da991...,2006-07-08,35,0.601620,7.750000,0.563305,6da99156486ed6c2,000da3152b7b5ab16da99156486ed6c2,54
1,0027e179b743c86c,http://www.ufcstats.com/fighter-details/91ea90...,2015-03-14,22,0.598198,12.900000,0.546091,91ea901c458e95dd,0027e179b743c86c91ea901c458e95dd,http://www.ufcstats.com/fighter-details/3aa794...,2015-03-14,9,0.359259,6.666667,0.561818,3aa794cbe1e3484b,0027e179b743c86c3aa794cbe1e3484b,31
2,002921976d27b7da,http://www.ufcstats.com/fighter-details/ebc1f4...,2014-12-13,2,0.437190,11.954545,0.487594,ebc1f40e00e0c481,002921976d27b7daebc1f40e00e0c481,http://www.ufcstats.com/fighter-details/b4ad3a...,2014-12-13,17,0.774253,21.444444,0.667550,b4ad3a06ee4d660c,002921976d27b7dab4ad3a06ee4d660c,19
3,002c1562708ac307,http://www.ufcstats.com/fighter-details/44470b...,2014-05-24,22,0.486772,7.666667,0.775809,44470bfd9483c7ad,002c1562708ac30744470bfd9483c7ad,http://www.ufcstats.com/fighter-details/22a92d...,2014-05-24,38,0.314773,18.111111,0.673899,22a92d7f62195791,002c1562708ac30722a92d7f62195791,60
4,002cb1bb411c5f60,http://www.ufcstats.com/fighter-details/d89789...,2006-03-04,127,0.471745,20.636364,0.647542,d897897060f10a3a,002cb1bb411c5f60d897897060f10a3a,http://www.ufcstats.com/fighter-details/22e47b...,2006-03-04,21,0.484044,11.222222,0.498367,22e47b53e4ceb27c,002cb1bb411c5f6022e47b53e4ceb27c,148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4033,ffbc12e4f821ec68,http://www.ufcstats.com/fighter-details/7a703c...,2014-02-15,16,0.541667,18.000000,0.811688,7a703c565ccaa18f,ffbc12e4f821ec687a703c565ccaa18f,http://www.ufcstats.com/fighter-details/3591d0...,2014-02-15,19,0.519991,9.875000,0.617094,3591d0d5d382a381,ffbc12e4f821ec683591d0d5d382a381,35
4034,ffd3e3d37cba32da,http://www.ufcstats.com/fighter-details/92a9aa...,2014-10-25,47,0.315385,12.272727,0.634401,92a9aa9c93192871,ffd3e3d37cba32da92a9aa9c93192871,http://www.ufcstats.com/fighter-details/7413b8...,2014-10-25,26,0.424009,14.250000,0.543519,7413b80dbb0f8f9f,ffd3e3d37cba32da7413b80dbb0f8f9f,73
4035,ffe629a5232a878b,http://www.ufcstats.com/fighter-details/b36118...,2003-06-06,0,0.493956,8.055556,0.487001,b361180739bed4b0,ffe629a5232a878bb361180739bed4b0,http://www.ufcstats.com/fighter-details/08ae5c...,2003-06-06,1,0.514747,6.400000,0.547333,08ae5cd9aef7ddd3,ffe629a5232a878b08ae5cd9aef7ddd3,1
4036,ffea776913451b6d,http://www.ufcstats.com/fighter-details/75e5fe...,2015-02-28,2,0.343625,9.278689,0.646161,75e5fec9f72910ef,ffea776913451b6d75e5fec9f72910ef,http://www.ufcstats.com/fighter-details/22a92d...,2015-02-28,11,0.333091,16.266667,0.717127,22a92d7f62195791,ffea776913451b6d22a92d7f62195791,13


In [39]:
model_df = model_df.loc[:,['asss_0', 'assa_0', 'assd_0', 'asss_1', 'assa_1', 'assd_1', 'tsss_bout']]

In [40]:
model_df

Unnamed: 0,asss_0,assa_0,assd_0,asss_1,assa_1,assd_1,tsss_bout
0,9.800000,0.281351,0.543474,7.750000,0.601620,0.563305,54
1,12.900000,0.598198,0.546091,6.666667,0.359259,0.561818,31
2,11.954545,0.437190,0.487594,21.444444,0.774253,0.667550,19
3,7.666667,0.486772,0.775809,18.111111,0.314773,0.673899,60
4,20.636364,0.471745,0.647542,11.222222,0.484044,0.498367,148
...,...,...,...,...,...,...,...
4033,18.000000,0.541667,0.811688,9.875000,0.519991,0.617094,35
4034,12.272727,0.315385,0.634401,14.250000,0.424009,0.543519,73
4035,8.055556,0.493956,0.487001,6.400000,0.514747,0.547333,1
4036,9.278689,0.343625,0.646161,16.266667,0.333091,0.717127,13


In [42]:
model_df.to_csv('../../data/model_3_data.csv')