#### Imports

In [1]:
import os
import time
import datetime
import pandas as pd

from tqdm import tqdm

from nba_api.stats.static import players, teams

In [9]:
def _get_seconds_per_possession_data(filepath='data/seconds-per-possession/playoffs/'):
    '''
    returns DataFrame of Seconds Per Possession - Offense Data for each Player every Season & Team
    '''
    filenames = os.listdir(filepath)
    
    # initialize dataframe
    spp_df = pd.DataFrame()
    
    # loop through the filenames and get data
    for filename in tqdm(filenames):
        temp_df = pd.read_csv(filepath + filename)
        spp_df = pd.concat([spp_df, temp_df]).reset_index(drop=True)
        
    return spp_df

def _get_seasonal_touches_data(filepath='data/touches/playoff-totals/'):
    '''
    returns DataFrame of Tracking Touches Data for each season
    '''
    filenames = os.listdir(filepath)
    
    # initialize dataframe
    tracking_df = pd.DataFrame()
    
    # loop through the filenames and get data
    for filename in tqdm(filenames):
        temp_df = pd.read_csv(filepath + filename)
        tracking_df = pd.concat([tracking_df, temp_df]).reset_index(drop=True)
    
    return tracking_df

def _get_possessions_data(filepath='data/playoff-careerlogs/'):
    '''
    returns DataFrame of Player's Offensive Possession Totals by Season
    '''
    filenames = os.listdir(filepath)
    
    # initialize dataframe
    possessions_df = pd.DataFrame()
    
    # loop through the filenames and get data
    for filename in tqdm(filenames):
        temp_df = pd.read_csv(filepath + filename).rename(columns={'OffPoss': 'OFF_POSS'})
        season_param = filename.split('_')[-1].replace('.csv','')
        temp_df['SEASON'] = season_param
        possessions_df = pd.concat([possessions_df, temp_df]).reset_index(drop=True)
        
    possessions_df = possessions_df.rename(columns={'Name': 'PLAYER_NAME', 'TeamAbbreviation': 'TEAM'})
    return possessions_df[['SEASON','TEAM','PLAYER_NAME','OFF_POSS']].copy()

def _get_ids_data():
    '''
    returns DataFrame of Player IDs
    '''
    ids_df = pd.read_csv('data/ids/players_modern_database.csv')
    return ids_df

In [39]:
ids_df = _get_ids_data()
spp_df = _get_seconds_per_possession_data()
tracking_df = _get_seasonal_touches_data()
possessions_df = _get_possessions_data()

100%|███████████████████████████████████████████████████████████████████████████████| 160/160 [00:00<00:00, 600.16it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 457.01it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 249.94it/s]


In [40]:
# create player_name to player_id map
playerID_map = dict(zip(ids_df['PLAYER_NAME'], ids_df['PLAYER_ID']))

# create a team ID column
id_to_abb_map = dict()

# create team abbreviation to team ID mapping
for team in teams.get_teams():
    id_to_abb_map[team['abbreviation']] = team['id']
    
tracking_df['TEAM_ID'] = tracking_df['TEAM_ABBREVIATION'].map(id_to_abb_map)
possessions_df['TEAM_ID'] = possessions_df['TEAM'].map(id_to_abb_map)

pbp_to_nba_map = {
     'CJ Wilcox': 'C.J. Wilcox',
     'Cameron Reynolds': 'Cam Reynolds',
     'Charles Brown Jr.': 'Charlie Brown Jr.',
     'Danuel House': 'Danuel House Jr.',
     'Enes Kanter': 'Enes Freedom',
     'Frank Mason': 'Frank Mason III',
     'Jeff Dowtin': 'Jeff Dowtin Jr.',
     'Juan Hernangomez': 'Juancho Hernangomez',
     'Kevin Knox': 'Kevin Knox II',
     'Marcus Morris': 'Marcus Morris Sr.',
     'Michael Frazier': 'Melvin Frazier Jr.',
     'Nicolas Claxton': 'Nic Claxton',
     'OG Anunoby': 'O.G. Anunoby',
     'P.J. Dozier': 'PJ Dozier',
     'PJ Tucker': 'P.J. Tucker',
     'TJ Leaf': 'T.J. Leaf',
     'TJ Warren': 'T.J. Warren',
     'Walter Lemon Jr.': 'Walt Lemon Jr.'
}


possessions_df['PLAYER_NAME'] = possessions_df['PLAYER_NAME'].apply(lambda name: pbp_to_nba_map[name] if name in pbp_to_nba_map.keys() else name)
possessions_df['PLAYER_ID'] = possessions_df['PLAYER_NAME'].map(playerID_map)

spp_df['PLAYER_NAME'] = spp_df['PLAYER_NAME'].apply(lambda name: pbp_to_nba_map[name] if name in pbp_to_nba_map.keys() else name)
spp_df['PLAYER_ID'] = spp_df['PLAYER_NAME'].map(playerID_map)

In [41]:
tracking_df.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ABBREVIATION,TIME_OF_POSS,TOUCHES,FRONT_CT_TOUCHES,AVG_DRIB_PER_TOUCH,SEASON,TEAM_ID
0,1733,Al Harrington,WAS,2.1,77,36,0.73,2013-14,1610612764
1,2744,Al Jefferson,CHA,3.5,124,98,0.44,2013-14,1610612766
2,101187,Alan Anderson,BKN,10.4,294,185,1.31,2013-14,1610612751
3,101161,Amir Johnson,TOR,7.3,263,166,0.67,2013-14,1610612761
4,101154,Andray Blatche,BKN,8.4,266,180,0.74,2013-14,1610612751


In [42]:
spp_df[spp_df['PLAYER_NAME'].str.contains('s Morr')]

Unnamed: 0,PLAYER_NAME,MINUTES_ON,MINUTES_OFF,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,SECONDS_PER_POSS_OFFENSE_PLAYER_OFF,SECONDS_PER_POSS_OFFENSE_PLAYER_ON_OFF,SEASON,TEAM_ID,PLAYER_ID
142,Marcus Morris Sr.,562,360,15.831,16.458,-0.63,2017-18,1610612738,202694
150,Marcus Morris Sr.,254,178,14.245,14.448,-0.2,2018-19,1610612738,202694
809,Marcus Morris Sr.,388,241,14.429,14.505,-0.08,2019-20,1610612746,202694
818,Marcus Morris Sr.,604,308,15.323,15.351,-0.03,2020-21,1610612746,202694
839,Marcus Morris Sr.,68,172,13.868,14.694,-0.83,2022-23,1610612746,202694
1167,Darius Morris,5,288,13.778,16.054,-2.28,2014-15,1610612751,202721
2146,Marcus Morris Sr.,144,48,15.201,16.393,-1.19,2015-16,1610612765,202694


In [34]:
tracking_df[tracking_df['PLAYER_NAME'].str.contains('Morris S')]

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ABBREVIATION,TIME_OF_POSS,TOUCHES,FRONT_CT_TOUCHES,AVG_DRIB_PER_TOUCH,SEASON,TEAM_ID
557,202694,Marcus Morris Sr.,DET,6.6,199,150,0.76,2015-16,1610612765
978,202694,Marcus Morris Sr.,BOS,30.0,825,527,1.18,2017-18,1610612738
1187,202694,Marcus Morris Sr.,BOS,12.7,363,187,1.14,2018-19,1610612738
1393,202694,Marcus Morris Sr.,LAC,15.6,475,277,1.07,2019-20,1610612746
1631,202694,Marcus Morris Sr.,LAC,21.7,666,449,0.84,2020-21,1610612746
2075,202694,Marcus Morris Sr.,LAC,3.0,80,50,1.32,2022-23,1610612746


In [77]:
merged_df = pd.merge(spp_df[['SEASON','MINUTES_ON','PLAYER_ID','TEAM_ID','SECONDS_PER_POSS_OFFENSE_PLAYER_ON']],
                     tracking_df[['SEASON','PLAYER_ID','TEAM_ID','TIME_OF_POSS']],
                     on=['SEASON','PLAYER_ID','TEAM_ID'],
                     how='left'
                    )
merged_df = pd.merge(merged_df, possessions_df, on=['SEASON','PLAYER_ID','TEAM_ID'], how='left')
merged_df.head()

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS
0,2013-14,7,203471,1610612737,15.769,1.4,ATL,Dennis Schroder,13.0
1,2013-14,15,201858,1610612737,16.786,0.6,ATL,Cartier Martin,28.0
2,2013-14,5,203488,1610612737,14.667,0.1,ATL,Mike Muscala,9.0
3,2013-14,81,1882,1610612737,15.861,2.8,ATL,Elton Brand,151.0
4,2013-14,246,201960,1610612737,15.989,11.1,ATL,DeMarre Carroll,462.0


In [78]:
no_possessions_found_df = merged_df[merged_df['OFF_POSS'].isna()].reset_index(drop=True)

final_df = merged_df[~merged_df['OFF_POSS'].isna()].reset_index(drop=True)

print("The Number of Players who don't have any possession data, but only seconds-per-poss data:", len(no_possessions_found_df))
print("The Number of Players with the data:", len(final_df))

The Number of Players who don't have any possession data, but only seconds-per-poss data: 35
The Number of Players with the data: 2153


In [79]:
final_df['BALL_HOG%'] = round(((final_df['TIME_OF_POSS'] * 60) / (final_df['OFF_POSS'] * final_df['SECONDS_PER_POSS_OFFENSE_PLAYER_ON'])) * 100, 1)
final_df = final_df.sort_values(by=['PLAYER_NAME','SEASON']).reset_index(drop=True)

final_df.to_csv('data/ball-hog-rate/ball-hog-rates_playoffs_2022_23.csv', index=False)

final_df.head()

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
0,2014-15,132,201166,1610612741,17.679,27.6,CHI,Aaron Brooks,243.0,38.5
1,2016-17,7,201166,1610612754,15.933,0.6,IND,Aaron Brooks,15.0,15.1
2,2017-18,3,201166,1610612750,11.667,0.5,MIN,Aaron Brooks,6.0,42.9
3,2018-19,164,203932,1610612753,15.613,13.0,ORL,Aaron Gordon,320.0,15.6
4,2020-21,299,203932,1610612743,15.231,16.2,DEN,Aaron Gordon,603.0,10.6


In [80]:
final_df[final_df['PLAYER_NAME'].str.contains('Stephen C')]

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
1878,2013-14,296,201939,1610612744,14.603,57.8,GSW,Stephen Curry,590.0,40.3
1879,2014-15,826,201939,1610612744,14.0,146.8,GSW,Stephen Curry,1656.0,38.0
1880,2015-16,614,201939,1610612744,14.055,107.5,GSW,Stephen Curry,1250.0,36.7
1881,2016-17,601,201939,1610612744,12.973,98.0,GSW,Stephen Curry,1272.0,35.6
1882,2017-18,555,201939,1610612744,14.07,95.1,GSW,Stephen Curry,1121.0,36.2
1883,2018-19,846,201939,1610612744,13.297,132.3,GSW,Stephen Curry,1737.0,34.4
1884,2021-22,764,201939,1610612744,14.107,143.9,GSW,Stephen Curry,1557.0,39.3
1885,2022-23,493,201939,1610612744,14.56,93.3,GSW,Stephen Curry,1051.0,36.6


In [81]:
final_df[final_df['PLAYER_NAME'].str.contains('LeBr')]

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
1292,2013-14,763,2544,1610612748,16.738,121.8,MIA,LeBron James,1376.0,31.7
1293,2014-15,845,2544,1610612739,17.319,181.1,CLE,LeBron James,1593.0,39.4
1294,2015-16,822,2544,1610612739,16.622,131.5,CLE,LeBron James,1541.0,30.8
1295,2016-17,744,2544,1610612739,15.362,135.4,CLE,LeBron James,1478.0,35.8
1296,2017-18,922,2544,1610612739,16.417,212.2,CLE,LeBron James,1747.0,44.4
1297,2019-20,762,2544,1610612747,14.838,168.6,LAL,LeBron James,1523.0,44.8
1298,2020-21,224,2544,1610612747,15.495,40.8,LAL,LeBron James,426.0,37.1
1299,2022-23,619,2544,1610612747,14.471,80.5,LAL,LeBron James,1259.0,26.5


In [82]:
final_df[final_df['PLAYER_NAME'].str.contains('Jokic')]

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
1560,2018-19,557,203999,1610612743,15.829,67.0,DEN,Nikola Jokic,1070.0,23.7
1561,2019-20,694,203999,1610612743,15.696,88.5,DEN,Nikola Jokic,1363.0,24.8
1562,2020-21,345,203999,1610612743,15.426,45.8,DEN,Nikola Jokic,692.0,25.7
1563,2021-22,171,203999,1610612743,15.448,22.9,DEN,Nikola Jokic,346.0,25.7
1564,2022-23,583,203999,1610612743,15.532,80.1,DEN,Nikola Jokic,1176.0,26.3


In [83]:
final_df[final_df['PLAYER_NAME'].str.contains('Morant')]

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
800,2020-21,203,1629630,1610612763,14.258,46.4,MEM,Ja Morant,414.0,47.2
801,2021-22,338,1629630,1610612763,14.105,80.1,MEM,Ja Morant,711.0,47.9
802,2022-23,187,1629630,1610612763,14.396,36.9,MEM,Ja Morant,396.0,38.8


In [84]:
final_df[final_df['PLAYER_NAME'].str.contains('Booker')]

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
499,2020-21,889,1626164,1610612756,15.364,112.7,PHX,Devin Booker,1755.0,25.1
500,2021-22,366,1626164,1610612756,14.93,44.9,PHX,Devin Booker,698.0,25.9
501,2022-23,459,1626164,1610612756,14.451,76.1,PHX,Devin Booker,936.0,33.8
2023,2013-14,146,202344,1610612764,16.835,5.3,WAS,Trevor Booker,255.0,7.4
2024,2017-18,64,202344,1610612754,17.927,1.7,IND,Trevor Booker,110.0,5.2


In [85]:
final_df[final_df['PLAYER_NAME'].str.contains('Donovan M')]

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
526,2017-18,411,1628378,1610612762,15.069,66.2,UTA,Donovan Mitchell,822.0,32.1
527,2018-19,193,1628378,1610612762,15.208,32.2,UTA,Donovan Mitchell,389.0,32.7
528,2019-20,264,1628378,1610612762,15.629,54.7,UTA,Donovan Mitchell,504.0,41.7
529,2020-21,346,1628378,1610612762,15.106,78.3,UTA,Donovan Mitchell,687.0,45.3
530,2021-22,229,1628378,1610612762,14.805,41.5,UTA,Donovan Mitchell,436.0,38.6
531,2022-23,207,1628378,1610612739,15.373,31.8,CLE,Donovan Mitchell,391.0,31.7


In [86]:
final_df[final_df['PLAYER_NAME'].str.contains('Tatum')]

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
929,2017-18,683,1628369,1610612738,16.068,51.0,BOS,Jayson Tatum,1314.0,14.5
930,2018-19,295,1628369,1610612738,14.623,20.1,BOS,Jayson Tatum,608.0,13.6
931,2019-20,690,1628369,1610612738,15.036,81.1,BOS,Jayson Tatum,1370.0,23.6
932,2020-21,185,1628369,1610612738,14.938,27.7,BOS,Jayson Tatum,368.0,30.2
933,2021-22,983,1628369,1610612738,15.543,132.6,BOS,Jayson Tatum,1952.0,26.2
934,2022-23,673,1628369,1610612738,14.521,95.1,BOS,Jayson Tatum,1417.0,27.7


In [87]:
final_df[final_df['PLAYER_NAME'].str.contains('Smart')]

Unnamed: 0,SEASON,MINUTES_ON,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,PLAYER_NAME,OFF_POSS,BALL_HOG%
1377,2014-15,90,203935,1610612738,14.855,7.8,BOS,Marcus Smart,172.0,18.3
1378,2015-16,193,203935,1610612738,15.275,20.3,BOS,Marcus Smart,386.0,20.7
1379,2016-17,538,203935,1610612738,15.399,70.6,BOS,Marcus Smart,1049.0,26.2
1380,2017-18,449,203935,1610612738,15.544,72.1,BOS,Marcus Smart,882.0,31.6
1381,2018-19,32,203935,1610612738,13.254,2.8,BOS,Marcus Smart,71.0,17.9
1382,2019-20,648,203935,1610612738,15.069,58.1,BOS,Marcus Smart,1272.0,18.2
1383,2020-21,183,203935,1610612738,14.41,21.8,BOS,Marcus Smart,373.0,24.3
1384,2021-22,760,203935,1610612738,15.457,114.8,BOS,Marcus Smart,1511.0,29.5
1385,2022-23,571,203935,1610612738,14.368,61.8,BOS,Marcus Smart,1194.0,21.6
