#### Imports

In [1]:
import os
import time
import datetime
import pandas as pd

from tqdm import tqdm

from nba_api.stats.static import players, teams

#### Fetch Data from Local Directory

In [92]:
def _get_seconds_per_possession_data(filepath='data/seconds-per-possession/regular-season/'):
    '''
    returns DataFrame of Seconds Per Possession - Offense Data for each Player every Season & Team
    '''
    filenames = os.listdir(filepath)
    
    # initialize dataframe
    spp_df = pd.DataFrame()
    
    # loop through the filenames and get data
    for filename in tqdm(filenames):
        temp_df = pd.read_csv(filepath + filename)
        spp_df = pd.concat([spp_df, temp_df]).reset_index(drop=True)
        
    return spp_df

def _get_seasonal_touches_data(filepath='data/touches/season-totals/'):
    '''
    returns DataFrame of Tracking Touches Data for each season
    '''
    filenames = os.listdir(filepath)
    
    # initialize dataframe
    tracking_df = pd.DataFrame()
    
    # loop through the filenames and get data
    for filename in tqdm(filenames):
        temp_df = pd.read_csv(filepath + filename)
        tracking_df = pd.concat([tracking_df, temp_df]).reset_index(drop=True)
    
    return tracking_df

def _get_traded_touches_data(filepath='data/touches/traded-totals/'):
    '''
    returns DataFrame of Traded Tracking Touches Data for each season
    '''
    filenames = os.listdir(filepath)
    
    # initialize dataframe
    tracking_df = pd.DataFrame()
    
    # loop through the filenames and get data
    for filename in tqdm(filenames):
        temp_df = pd.read_csv(filepath + filename)
        tracking_df = pd.concat([tracking_df, temp_df]).reset_index(drop=True)
    
    return tracking_df

def _get_possessions_data(filepath='data/possessions/'):
    '''
    returns DataFrame of Player's Offensive Possession Totals by Season
    '''
    filenames = os.listdir(filepath)
    
    # initialize dataframe
    possessions_df = pd.DataFrame()
    
    # loop through the filenames and get data
    for filename in tqdm(filenames):
        temp_df = pd.read_csv(filepath + filename).rename(columns={'OffPoss': 'OFF_POSS'})
        possessions_df = pd.concat([possessions_df, temp_df]).reset_index(drop=True)
    
    return possessions_df

def _get_ids_data():
    '''
    returns DataFrame of Player IDs
    '''
    ids_df = pd.read_csv('data/ids/players_modern_database.csv')
    return ids_df

#### Call the Functions and Get the Data

In [131]:
ids_df = _get_ids_data()
spp_df = _get_seconds_per_possession_data()
tracking_df = _get_seasonal_touches_data()
traded_tracking_df = _get_traded_touches_data()
possessions_df = _get_possessions_data()

100%|███████████████████████████████████████████████████████████████████████████████| 300/300 [00:00<00:00, 578.96it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 379.39it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 199.88it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 199.97it/s]


In [132]:
mikeJames_spp_df = spp_df[spp_df['PLAYER_NAME'] == 'Mike James'].reset_index(drop=True)
spp_df = spp_df[~(spp_df['PLAYER_NAME'] == 'Mike James')].reset_index(drop=True)

In [133]:
# get DataFrame of all traded players by season
traded_names_df = traded_tracking_df[['SEASON','PLAYER_ID','PLAYER_NAME']].copy()
traded_names_df = traded_names_df.drop_duplicates()
print('The Number of Traded Players:', len(traded_names_df))

# create a list of tuples containing the traded players
traded_names = list(zip(traded_names_df.SEASON, traded_names_df.PLAYER_NAME))

traded_names[:5]

The Number of Traded Players: 681


[('2013-14', 'Andre Miller'),
 ('2013-14', 'Caron Butler'),
 ('2013-14', 'John Salmons'),
 ('2013-14', 'Reggie Evans'),
 ('2013-14', 'Luke Ridnour')]

In [134]:
# filter out the traded players season totals from the dataframe
singular_tracking_df = tracking_df[~tracking_df[["SEASON","PLAYER_NAME"]].apply(tuple, 1).isin(traded_names)].copy()
print('The Number of Untraded Player Totals: ', singular_tracking_df.shape[0])

# create a final dataframe with every player's total time of possession and touches by team and season
final_tracking_df = pd.concat([singular_tracking_df, traded_tracking_df]).reset_index(drop=True)
final_tracking_df = final_tracking_df.rename(columns={'TEAM_ABBREVIATION': 'TEAM'})
print('The Number of Unique Player Totals by team AND season:', final_tracking_df.shape[0])

final_tracking_df.head()

The Number of Untraded Player Totals:  4538
The Number of Unique Player Totals by team AND season: 5947


Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,FRONT_CT_TOUCHES,AVG_DRIB_PER_TOUCH,SEASON
0,201985,AJ Price,MIN,20.6,244,90,4.78,2013-14
1,1733,Al Harrington,WAS,20.2,823,499,0.58,2013-14
2,201143,Al Horford,ATL,53.3,1826,1176,0.49,2013-14
3,2744,Al Jefferson,CHA,139.2,3575,2776,0.59,2013-14
4,202329,Al-Farouq Aminu,NOP,88.4,2520,1709,1.03,2013-14


In [135]:
# create a team ID column
id_to_abb_map = dict()

# create team abbreviation to team ID mapping
for team in teams.get_teams():
    id_to_abb_map[team['abbreviation']] = team['id']
    
final_tracking_df['TEAM_ID'] = final_tracking_df['TEAM'].map(id_to_abb_map)
possessions_df['TEAM_ID'] = possessions_df['TEAM'].map(id_to_abb_map)

In [136]:
playerID_map = dict(zip(ids_df['PLAYER_NAME'], ids_df['PLAYER_ID']))

print('The number of  player ids to identify:', len(spp_df))

# map player IDs to player names
spp_df['PLAYER_ID'] = spp_df['PLAYER_NAME'].map(playerID_map)

# separate the rows that have been identified with player ids
identified_spp_df = spp_df[~spp_df['PLAYER_ID'].isna()].reset_index(drop=True)
unidentified_spp_df = spp_df[spp_df['PLAYER_ID'].isna()].reset_index(drop=True)

print('The Number of Identified Player IDs:', len(identified_spp_df))
print('The Number of Unidentified Player IDs:', len(unidentified_spp_df))

pbp_to_nba_map = {
     'CJ Wilcox': 'C.J. Wilcox',
     'Cameron Reynolds': 'Cam Reynolds',
     'Charles Brown Jr.': 'Charlie Brown Jr.',
     'Danuel House': 'Danuel House Jr.',
     'Enes Kanter': 'Enes Freedom',
     'Frank Mason': 'Frank Mason III',
     'Jeff Dowtin': 'Jeff Dowtin Jr.',
     'Juan Hernangomez': 'Juancho Hernangomez',
     'Kevin Knox': 'Kevin Knox II',
     'Marcus Morris': 'Marcus Morris Sr.',
     'Michael Frazier': 'Melvin Frazier Jr.',
     'Nicolas Claxton': 'Nic Claxton',
     'OG Anunoby': 'O.G. Anunoby',
     'P.J. Dozier': 'PJ Dozier',
     'PJ Tucker': 'P.J. Tucker',
     'TJ Leaf': 'T.J. Leaf',
     'TJ Warren': 'T.J. Warren',
     'Walter Lemon Jr.': 'Walt Lemon Jr.'
}

# change the pbp names to NBA names and now identify their player IDs
unidentified_spp_df['PLAYER_NAME'] = unidentified_spp_df['PLAYER_NAME'].map(pbp_to_nba_map)
unidentified_spp_df['PLAYER_ID'] = unidentified_spp_df['PLAYER_NAME'].map(playerID_map)

spp_df = pd.concat([identified_spp_df, unidentified_spp_df]).reset_index(drop=True)
print('The Final Number of Identified Player IDs:', len(spp_df))

The number of  player ids to identify: 6009
The Number of Identified Player IDs: 5951
The Number of Unidentified Player IDs: 58
The Final Number of Identified Player IDs: 6009


In [139]:
merged_df = pd.merge(spp_df[['SEASON','PLAYER_NAME','PLAYER_ID','TEAM_ID','SECONDS_PER_POSS_OFFENSE_PLAYER_ON']],
                     final_tracking_df[['SEASON','PLAYER_ID','TEAM_ID','TIME_OF_POSS']],
                     on=['SEASON','PLAYER_ID','TEAM_ID'],
                     how='left'
                    )
merged_df = pd.merge(merged_df, possessions_df, on=['SEASON','PLAYER_ID','TEAM_ID'], how='left')

In [156]:
no_possessions_found_df = merged_df[merged_df['OFF_POSS'].isna()].reset_index(drop=True)

final_df = merged_df[~merged_df['OFF_POSS'].isna()].reset_index(drop=True)

print("The Number of Players who don't have any possession data, but only seconds-per-poss data:", len(no_possessions_found_df))
print("The Number of Players with the data:", len(final_df))

The Number of Players who don't have any possession data, but only seconds-per-poss data: 207
The Number of Players with the data: 5802


In [157]:
final_df['BALL_HANDLE%'] = round(((final_df['TIME_OF_POSS'] * 60) / (final_df['OFF_POSS'] * final_df['SECONDS_PER_POSS_OFFENSE_PLAYER_ON'])) * 100, 1)
final_df = final_df.sort_values(by=['PLAYER_NAME','SEASON']).reset_index(drop=True)
final_df.head()

Unnamed: 0,SEASON,PLAYER_NAME,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,OFF_POSS,BALL_HANDLE%
0,2022-23,A.J. Lawson,1630639.0,1610612742,14.477,3.5,DAL,230.0,6.3
1,2022-23,A.J. Lawson,1630639.0,1610612750,12.0,0.0,MIN,5.0,0.0
2,2022-23,AJ Green,1631260.0,1610612749,14.634,17.8,MIL,717.0,10.2
3,2022-23,AJ Griffin,1631100.0,1610612737,14.134,52.8,ATL,2916.0,7.7
4,2016-17,AJ Hammons,1627773.0,1610612742,15.291,5.0,DAL,320.0,6.1


In [158]:
final_df[final_df['PLAYER_NAME'].str.contains('LeBron')]

Unnamed: 0,SEASON,PLAYER_NAME,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,OFF_POSS,BALL_HANDLE%
3556,2013-14,LeBron James,2544.0,1610612748,15.465,420.2,MIA,5550.0,29.4
3557,2014-15,LeBron James,2544.0,1610612739,15.699,440.8,CLE,4805.0,35.1
3558,2015-16,LeBron James,2544.0,1610612739,15.829,406.0,CLE,5253.0,29.3
3559,2016-17,LeBron James,2544.0,1610612739,15.164,474.3,CLE,5559.0,33.8
3560,2017-18,LeBron James,2544.0,1610612739,14.755,551.9,CLE,6157.0,36.5
3561,2018-19,LeBron James,2544.0,1610612747,13.494,352.2,LAL,4143.0,37.8
3562,2019-20,LeBron James,2544.0,1610612747,14.258,496.4,LAL,4790.0,43.6
3563,2020-21,LeBron James,2544.0,1610612747,14.625,289.2,LAL,3066.0,38.7
3564,2021-22,LeBron James,2544.0,1610612747,14.582,348.9,LAL,4280.0,33.5
3565,2022-23,LeBron James,2544.0,1610612747,14.142,311.6,LAL,4105.0,32.2


In [159]:
final_df[final_df['PLAYER_NAME'].str.contains('Westbrook')]

Unnamed: 0,SEASON,PLAYER_NAME,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,OFF_POSS,BALL_HANDLE%
4816,2013-14,Russell Westbrook,201566.0,1610612760,13.859,320.4,OKC,2894.0,47.9
4817,2014-15,Russell Westbrook,201566.0,1610612760,13.999,544.0,OKC,4709.0,49.5
4818,2015-16,Russell Westbrook,201566.0,1610612760,14.139,677.0,OKC,5578.0,51.5
4819,2016-17,Russell Westbrook,201566.0,1610612760,13.687,746.4,OKC,5784.0,56.6
4820,2017-18,Russell Westbrook,201566.0,1610612760,13.929,713.6,OKC,5945.0,51.7
4821,2018-19,Russell Westbrook,201566.0,1610612760,12.707,554.8,OKC,5669.0,46.2
4822,2019-20,Russell Westbrook,201566.0,1610612745,12.966,327.8,HOU,4490.0,33.8
4823,2020-21,Russell Westbrook,201566.0,1610612764,13.179,549.6,WAS,5160.0,48.5
4824,2021-22,Russell Westbrook,201566.0,1610612747,14.312,501.0,LAL,5577.0,37.7
4825,2022-23,Russell Westbrook,201566.0,1610612746,14.342,106.7,LAC,1331.0,33.5


In [160]:
final_df[final_df['PLAYER_NAME'].str.contains('Stephen C')]

Unnamed: 0,SEASON,PLAYER_NAME,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,OFF_POSS,BALL_HANDLE%
5056,2013-14,Stephen Curry,201939.0,1610612744,14.071,569.1,GSW,5748.0,42.2
5057,2014-15,Stephen Curry,201939.0,1610612744,13.171,449.8,GSW,5447.0,37.6
5058,2015-16,Stephen Curry,201939.0,1610612744,13.112,457.4,GSW,5701.0,36.7
5059,2016-17,Stephen Curry,201939.0,1610612744,13.05,418.5,GSW,5622.0,34.2
5060,2017-18,Stephen Curry,201939.0,1610612744,12.742,259.7,GSW,3507.0,34.9
5061,2018-19,Stephen Curry,201939.0,1610612744,12.908,329.0,GSW,4997.0,30.6
5062,2019-20,Stephen Curry,201939.0,1610612744,13.128,22.4,GSW,313.0,32.7
5063,2020-21,Stephen Curry,201939.0,1610612744,13.068,363.8,GSW,4672.0,35.8
5064,2021-22,Stephen Curry,201939.0,1610612744,13.944,357.7,GSW,4573.0,33.7
5065,2022-23,Stephen Curry,201939.0,1610612744,13.508,318.7,GSW,4174.0,33.9


In [161]:
final_df.sort_values(by=['BALL_HANDLE%'], ascending=False).head(10)

Unnamed: 0,SEASON,PLAYER_NAME,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,OFF_POSS,BALL_HANDLE%
3612,2022-23,Louis King,1629663.0,1610612755,0.0,2.1,PHI,62.0,inf
586,2015-16,Briante Weber,1627362.0,1610612748,14.833,1.1,MIA,6.0,74.2
3900,2021-22,Matt Mooney,1629760.0,1610612752,12.2,0.6,NYK,5.0,59.0
4819,2016-17,Russell Westbrook,201566.0,1610612760,13.687,746.4,OKC,5784.0,56.6
2825,2016-17,John Wall,202322.0,1610612764,14.221,741.0,WAS,5824.0,53.7
4645,2014-15,Reggie Jackson,202704.0,1610612765,15.481,231.0,DET,1671.0,53.6
2406,2016-17,James Harden,201935.0,1610612745,13.783,749.9,HOU,6147.0,53.1
867,2015-16,Chris Paul,101108.0,1610612746,14.7,630.8,LAC,4865.0,52.9
2817,2014-15,John Lucas III,101249.0,1610612765,16.06,72.7,DET,516.0,52.6
4679,2014-15,Ricky Rubio,201937.0,1610612750,14.749,177.6,MIN,1382.0,52.3


In [163]:
final_df[final_df['TEAM_ID'] == 1610612755].head(20)

Unnamed: 0,SEASON,PLAYER_NAME,PLAYER_ID,TEAM_ID,SECONDS_PER_POSS_OFFENSE_PLAYER_ON,TIME_OF_POSS,TEAM,OFF_POSS,BALL_HANDLE%
49,2013-14,Adonis Thomas,203519.0,1610612755,12.962,0.6,PHI,26.0,10.7
62,2019-20,Al Horford,201143.0,1610612755,14.915,111.4,PHI,4164.0,10.8
97,2019-20,Alec Burks,202692.0,1610612755,15.109,48.8,PHI,750.0,25.8
128,2016-17,Alex Poythress,1627816.0,1610612755,14.513,7.1,PHI,316.0,9.3
134,2014-15,Alexey Shved,203144.0,1610612755,14.547,46.3,PHI,574.0,33.3
183,2017-18,Amir Johnson,101161.0,1610612755,13.942,45.9,PHI,2418.0,8.2
184,2018-19,Amir Johnson,101161.0,1610612755,14.045,23.6,PHI,1120.0,9.0
203,2021-22,Andre Drummond,203083.0,1610612755,15.453,41.2,PHI,1783.0,9.0
319,2020-21,Anthony Tolliver,201229.0,1610612755,14.303,2.4,PHI,208.0,4.8
339,2013-14,Arnett Moultrie,203102.0,1610612755,13.341,5.6,PHI,396.0,6.4
