In [26]:
import os
import time
import datetime
import pandas as pd

from tqdm import tqdm
from selenium import webdriver

from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import leaguedashptstats

In [73]:
season_dates = {
    2013: {
      "regular_start_month":10,
      "regular_start_day": 29,
      "regular_end_month": 4,
      "regular_end_day": 16,
      "playoffs_start_month": 4,
      "playoffs_start_day": 19,
      "playoffs_end_month": 6,
      "playoffs_end_day": 15
    },
    2014: {
      "regular_start_month":10,
      "regular_start_day": 28,
      "regular_end_month": 4,
      "regular_end_day": 15,
      "playoffs_start_month": 4,
      "playoffs_start_day": 18,
      "playoffs_end_month": 6,
      "playoffs_end_day": 16
    },
    2015: {
      "regular_start_month":10,
      "regular_start_day": 27,
      "regular_end_month": 4,
      "regular_end_day": 13,
      "playoffs_start_month": 4,
      "playoffs_start_day": 16,
      "playoffs_end_month": 6,
      "playoffs_end_day": 19
    },
    2016: {
      "regular_start_month":10,
      "regular_start_day": 25,
      "regular_end_month": 4,
      "regular_end_day": 12,
      "playoffs_start_month": 4,
      "playoffs_start_day": 21,
      "playoffs_end_month": 6,
      "playoffs_end_day": 15
    },
    2017: {
      "regular_start_month":10,
      "regular_start_day": 17,
      "regular_end_month": 4,
      "regular_end_day": 11,
      "playoffs_start_month": 4,
      "playoffs_start_day": 14,
      "playoffs_end_month": 6,
      "playoffs_end_day": 8
    },
    2018: {
      "regular_start_month":10,
      "regular_start_day": 16,
      "regular_end_month": 4,
      "regular_end_day":10,
      "playoffs_start_month": 4,
      "playoffs_start_day": 13,
      "playoffs_end_month": 6,
      "playoffs_end_day": 13
    },
    2019: {
      "regular_start_month":10,
      "regular_start_day": 22,
      "regular_end_month": 8,
      "regular_end_day": 14,
      "playoffs_start_month": 8,
      "playoffs_start_day": 17,
      "playoffs_end_month": 10,
      "playoffs_end_day": 11
    },
    2020: {
      "regular_start_month":12,
      "regular_start_day": 22,
      "regular_end_month": 5,
      "regular_end_day": 16,
      "playoffs_start_month": 5,
      "playoffs_start_day": 22,
      "playoffs_end_month": 7,
      "playoffs_end_day": 22
    },
    2021: {
      "regular_start_month":10,
      "regular_start_day": 19,
      "regular_end_month": 4,
      "regular_end_day": 10,
      "playoffs_start_month": 4,
      "playoffs_start_day": 16,
      "playoffs_end_month": 5,
      "playoffs_end_day": 29
    }
}

In [76]:
filepath = "C:/Users/lukar/Desktop/Sports Analytics/NBA Datasets/tracking.pbp Touches by Year/"
files = os.listdir(filepath)

touches_df = pd.DataFrame()

for filename in files[:-1]:
    temp_df = pd.read_csv("C:/Users/lukar/Desktop/Sports Analytics/NBA Datasets/tracking.pbp Touches by Year/" + filename)
    touches_df = touches_df.append(temp_df).reset_index(drop=True)
    
print(touches_df.shape)

touches_df = touches_df[['PLAYER_ID','PLAYER_NAME','TEAM_ABBREVIATION','TIME_OF_POSS','TOUCHES','Season']]

touches_df = touches_df.rename(columns={'Season': 'SEASON'})

touches_df.head()

(4680, 22)


Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ABBREVIATION,TIME_OF_POSS,TOUCHES,SEASON
0,201985,AJ Price,MIN,20.6,244,2013-14
1,201166,Aaron Brooks,DEN,241.3,2859,2013-14
2,201189,Aaron Gray,SAC,13.8,504,2013-14
3,203519,Adonis Thomas,PHI,1.3,38,2013-14
4,1733,Al Harrington,WAS,20.2,823,2013-14


In [78]:
poss_filepath = "C:/Users/lukar/Desktop/Sports Analytics/NBA Datasets/pbp Possessions by Week & Year/"
poss_files = os.listdir(poss_filepath)

possessions_df = pd.DataFrame()

for filename in tqdm(poss_files):
    temp_df = pd.read_csv(poss_filepath + filename)
    pid = filename.split('-')[-1].replace('.csv','')
    temp_df['PLAYER_ID'] = int(pid)
    possessions_df = possessions_df.append(temp_df).reset_index(drop=True)
    
print(possessions_df.shape)

possessions_df['Date'] = pd.to_datetime(possessions_df['Date'])
    
possessions_df.head()

100%|██████████████████████████████████████████████████████████████████████████████| 1244/1244 [01:44<00:00, 11.89it/s]


(227226, 32)


Unnamed: 0,Date,Team,Opponent,Minutes,OffPoss,Points,FG2M,FG2A,Fg2Pct,FG3M,...,ShotQualityAvg,EfgPct,TsPct,PtsPutbacks,Fg2aBlocked,FG2APctBlocked,Fg3aBlocked,FG3APctBlocked,Usage,PLAYER_ID
0,2013-10-30,GSW,LAL,17:59,38,2,1,2,0.5,0,...,0.6982,0.5,0.5,0,0,0.0,0,0.0,4.651163,101106
1,2013-10-31,GSW,LAC,28:09,61,17,6,7,0.857143,0,...,0.556986,0.857143,0.85,2,0,0.0,0,0.0,16.666667,101106
2,2013-11-02,GSW,SAC,20:28,41,6,3,6,0.5,0,...,0.494983,0.5,0.428571,0,0,0.0,0,0.0,19.512195,101106
3,2013-11-04,GSW,PHI,22:57,52,4,1,1,1.0,0,...,0.8625,1.0,0.666667,0,0,0.0,0,0.0,6.557377,101106
4,2013-11-06,GSW,MIN,16:18,34,2,1,2,0.5,0,...,0.4265,0.5,0.5,0,0,0.0,0,0.0,13.513514,101106


In [79]:
player_possessions_df = pd.DataFrame()

for season in season_dates:
    season_param = str(season) + '-' + str(season+1)[2:]
    start_date = datetime.datetime(season, season_dates[season]['regular_start_month'], season_dates[season]['regular_start_day']) 
    end_date = datetime.datetime(season+1, season_dates[season]['regular_end_month'], season_dates[season]['regular_end_day'])
    season_df = possessions_df[(possessions_df['Date'] >= start_date) & (possessions_df['Date'] <= end_date)].reset_index(drop=True)
    season_df['SEASON'] = season_param
    player_possessions_df = player_possessions_df.append(season_df).reset_index(drop=True)
    
print(player_possessions_df.shape)

player_possessions_df.head()

(227224, 33)


Unnamed: 0,Date,Team,Opponent,Minutes,OffPoss,Points,FG2M,FG2A,Fg2Pct,FG3M,...,EfgPct,TsPct,PtsPutbacks,Fg2aBlocked,FG2APctBlocked,Fg3aBlocked,FG3APctBlocked,Usage,PLAYER_ID,SEASON
0,2013-10-30,GSW,LAL,17:59,38,2,1,2,0.5,0,...,0.5,0.5,0,0,0.0,0,0.0,4.651163,101106,2013-14
1,2013-10-31,GSW,LAC,28:09,61,17,6,7,0.857143,0,...,0.857143,0.85,2,0,0.0,0,0.0,16.666667,101106,2013-14
2,2013-11-02,GSW,SAC,20:28,41,6,3,6,0.5,0,...,0.5,0.428571,0,0,0.0,0,0.0,19.512195,101106,2013-14
3,2013-11-04,GSW,PHI,22:57,52,4,1,1,1.0,0,...,1.0,0.666667,0,0,0.0,0,0.0,6.557377,101106,2013-14
4,2013-11-06,GSW,MIN,16:18,34,2,1,2,0.5,0,...,0.5,0.5,0,0,0.0,0,0.0,13.513514,101106,2013-14


In [80]:
imprt_cols = ['SEASON','PLAYER_ID','Team','OffPoss']
player_possessions_gb = player_possessions_df[imprt_cols].groupby(by=['SEASON','PLAYER_ID','Team']).sum().reset_index()

player_possessions_gb = player_possessions_gb.rename(columns={'Team': 'TEAM'})

player_possessions_gb.to_csv('C:/Users/lukar/Desktop/Sports Analytics/NBA Processed Data/player_possession_totals_2013_22.csv', index=False)

In [81]:
traded_players = {}

for season in player_possessions_gb['SEASON'].unique():
    season_int = int(season[:-3])
    season_param = str(season_int) + '-' + str(season_int+1)[2:]
    season_df = player_possessions_gb[player_possessions_gb['SEASON'] == season].reset_index(drop=True)[['SEASON','PLAYER_ID']]
    traded_df = season_df[season_df.duplicated(keep=False)].reset_index(drop=True)
    traded_players[season] = list(traded_df['PLAYER_ID'].unique())

In [82]:
traded_players

{'2013-14': [1889,
  2406,
  2422,
  2501,
  2557,
  2581,
  2736,
  2757,
  101115,
  101122,
  101236,
  200752,
  200758,
  200769,
  200811,
  201150,
  201166,
  201175,
  201189,
  201196,
  201202,
  201571,
  201573,
  201584,
  201601,
  201785,
  201858,
  201947,
  201948,
  201953,
  201957,
  201962,
  201977,
  201986,
  202323,
  202335,
  202348,
  202349,
  202363,
  202390,
  202399,
  202620,
  202682,
  202686,
  202690,
  202705,
  202706,
  202721,
  202730,
  202952,
  203099,
  203104,
  203111,
  203112,
  203120,
  203129,
  203145,
  203156,
  203263,
  203473,
  203519,
  203552],
 '2014-15': [708,
  1889,
  2405,
  2419,
  2570,
  2590,
  2746,
  2747,
  2749,
  101126,
  101183,
  200761,
  200765,
  200779,
  200811,
  201145,
  201147,
  201148,
  201152,
  201167,
  201196,
  201229,
  201571,
  201580,
  201609,
  201948,
  201973,
  201977,
  201985,
  202087,
  202343,
  202347,
  202389,
  202390,
  202397,
  202498,
  202683,
  202688,
  202697,
  

In [83]:
def scrape_tracking_data(season_param, start_date, end_date, player_id, team):
    tracking_df = leaguedashptstats.LeagueDashPtStats(season=season_param,
                                                      player_or_team='Player',
                                                      pt_measure_type='Possessions',
                                                      per_mode_simple='Totals',
                                                      date_from_nullable=start_date,
                                                      date_to_nullable=end_date,
                                                     ).get_data_frames()[0]
    p_tracking_df = tracking_df[tracking_df['PLAYER_ID'] == player_id].reset_index(drop=True)
    p_tracking_df['SEASON'] = season_param
    p_tracking_df = p_tracking_df[['PLAYER_ID','PLAYER_NAME','TEAM_ABBREVIATION','TIME_OF_POSS','TOUCHES','SEASON']]
    p_tracking_df['TEAM_ABBREVIATION'] = team
    return p_tracking_df
        
def get_team_dates(season_param, gamelog_df, player_id):
    traded_df = pd.DataFrame(columns=['SEASON','PLAYER_ID','TEAM','START_DATE','END_DATE'])
    p_gamelog_df = gamelog_df[(gamelog_df['SEASON'] == season_param) & 
                              (gamelog_df['PLAYER_ID'] == player_id)].reset_index(drop=True)
    for team in list(p_gamelog_df['Team'].unique()):
        pt_gamelog_df = p_gamelog_df[p_gamelog_df['Team'] == team].reset_index(drop=True).sort_values(by=['Date'])
        dates = pt_gamelog_df['Date'].to_list()
        start_date, end_date = dates[0], dates[-1]
        
        player_info = [season_param, player_id, team, start_date, end_date]
        traded_df.loc[len(traded_df)] = player_info
        
    return traded_df

In [84]:
traded_df = pd.DataFrame(columns=['SEASON','PLAYER_ID','TEAM','START_DATE','END_DATE'])

for season_param in traded_players:
    for player_id in tqdm(traded_players[season_param]):
        temp_df = get_team_dates(season_param, player_possessions_df, player_id)
        traded_df = traded_df.append(temp_df).reset_index(drop=True)
        
print(traded_df.shape)

traded_df['START_DATE'] = traded_df['START_DATE'].astype(str)
traded_df['END_DATE'] = traded_df['END_DATE'].astype(str)

traded_df.head()

100%|██████████████████████████████████████████████████████████████████████████████████| 62/62 [00:01<00:00, 54.31it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 76/76 [00:01<00:00, 54.49it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 51.21it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 53/53 [00:01<00:00, 52.95it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 59/59 [00:01<00:00, 55.07it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 86/86 [00:01<00:00, 55.48it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 56.46it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:01<00:00, 55.13it/s]
100%|███████████████████████████████████

(1293, 5)





Unnamed: 0,SEASON,PLAYER_ID,TEAM,START_DATE,END_DATE
0,2013-14,1889,DEN,2013-10-30,2013-12-30
1,2013-14,1889,WAS,2014-02-22,2014-04-16
2,2013-14,2406,MIL,2013-10-30,2014-02-20
3,2013-14,2406,OKC,2014-03-04,2014-04-16
4,2013-14,2422,SAC,2013-10-30,2013-12-07


In [85]:
traded_touches_df = pd.DataFrame()

for index, row in tqdm(traded_df.iterrows(), total=traded_df.shape[0]):
    while True:
        try:
            temp_df = scrape_tracking_data(row['SEASON'], row['START_DATE'], row['END_DATE'], row['PLAYER_ID'], row['TEAM'])
            traded_touches_df = traded_touches_df.append(temp_df).reset_index(drop=True)
            time.sleep(0.5)
            break
        except:
            pass
    
print(traded_touches_df.shape)

traded_touches_df.to_csv('C:/Users/lukar/Desktop/Sports Analytics/NBA Datasets/tracking.pbp Touches by Year/Possessions_Player_traded_totals_2013-22.csv')

traded_touches_df.head()

100%|██████████████████████████████████████████████████████████████████████████████| 1293/1293 [19:04<00:00,  1.13it/s]

(1293, 6)





Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ABBREVIATION,TIME_OF_POSS,TOUCHES,SEASON
0,1889,Andre Miller,DEN,85.5,1088,2013-14
1,1889,Andre Miller,WAS,82.8,929,2013-14
2,2406,Caron Butler,MIL,53.0,1519,2013-14
3,2406,Caron Butler,OKC,19.8,646,2013-14
4,2422,John Salmons,SAC,21.7,512,2013-14


In [86]:
traded_names_df = traded_touches_df[['SEASON','PLAYER_ID','PLAYER_NAME']].copy()
print(traded_names_df.shape)
traded_names_df = traded_names_df.drop_duplicates()
print(traded_names_df.shape)

traded_names = list(zip(traded_names_df.SEASON, traded_names_df.PLAYER_NAME))

traded_names[:5]

(1293, 3)
(622, 3)


[('2013-14', 'Andre Miller'),
 ('2013-14', 'Caron Butler'),
 ('2013-14', 'John Salmons'),
 ('2013-14', 'Reggie Evans'),
 ('2013-14', 'Luke Ridnour')]

In [117]:
print(touches_df.shape)
sing_touches_df = touches_df[~touches_df[["SEASON","PLAYER_NAME"]].apply(tuple, 1).isin(traded_names)].copy()
print(sing_touches_df.shape)

sing_touches_df = pd.concat([sing_touches_df, traded_touches_df]).reset_index(drop=True)
print(sing_touches_df.shape)

sing_touches_df = sing_touches_df.rename(columns={'TEAM_ABBREVIATION': 'TEAM'})

sing_touches_df.head()

(4680, 6)
(4058, 6)
(5351, 6)


Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,SEASON
0,201985,AJ Price,MIN,20.6,244,2013-14
1,1733,Al Harrington,WAS,20.2,823,2013-14
2,201143,Al Horford,ATL,53.3,1826,2013-14
3,2744,Al Jefferson,CHA,139.2,3575,2013-14
4,202329,Al-Farouq Aminu,NOP,88.4,2520,2013-14


In [118]:
otime_filepath = 'C:/Users/lukar/Desktop/Sports Analytics/NBA Datasets/pbp Seconds Per Possession - Offense/'

files = os.listdir(otime_filepath)

otimes_df = pd.DataFrame()

for filename in files:
    season_param = filename[:7]
    team_id = int(filename.split('-')[-1].replace('.csv',''))
    team_info = teams.find_team_name_by_id(team_id)
    team_abb = team_info['abbreviation']
    
    temp_df = pd.read_csv(otime_filepath + filename)
    temp_df['TEAM'] = team_abb
    temp_df['SEASON'] = season_param
    temp_df = temp_df.rename(columns={'Player': 'PLAYER_NAME'})
    
    otimes_df = otimes_df.append(temp_df).reset_index(drop=True)
    
print(otimes_df.shape)

otimes_df['PLAYER_NAME'] = otimes_df['PLAYER_NAME'].apply(lambda x: 'Michael Frazier II' if x == 'Michael Frazier' else x)

otimes_df.head()

(5396, 8)


Unnamed: 0,PLAYER_NAME,Minutes On,Minutes Off,SPP-OFF P-On,SPP-OFF P-OFF,Difference,TEAM,SEASON
0,Jared Cunningham,22,3944,14.667,14.98,-0.31,ATL,2013-14
1,Lou Williams,1445,2521,14.826,15.066,-0.24,ATL,2013-14
2,Pero Antic,925,3041,15.193,14.912,0.28,ATL,2013-14
3,Shelvin Mack,1490,2476,15.055,14.933,0.12,ATL,2013-14
4,Paul Millsap,2482,1484,15.027,14.897,0.13,ATL,2013-14


In [119]:
merged_df = pd.merge(sing_touches_df, player_possessions_gb, 
                     on=['SEASON','PLAYER_ID','TEAM'], how='left')

noinfo_merged_df = merged_df[merged_df['OffPoss'].isna()].reset_index(drop=True)
merged_df = merged_df[~merged_df['OffPoss'].isna()].reset_index(drop=True)

print(merged_df.shape)

find_df = pd.merge(noinfo_merged_df.drop(columns=['OffPoss','TEAM']), player_possessions_gb, 
                   on=['SEASON','PLAYER_ID'], how='left')
find_df = find_df[merged_df.columns].copy()
reid_merged_df = find_df[~find_df['TEAM'].isna()]
noinfo_merged_df = find_df[find_df['OffPoss'].isna()]

merged_df = pd.concat([merged_df, reid_merged_df]).reset_index(drop=True)

print(noinfo_merged_df.shape)
print(merged_df.shape)

merged_df = pd.merge(merged_df, otimes_df[['SEASON','PLAYER_NAME','TEAM','Minutes On','SPP-OFF P-On']],
                     on=['SEASON','PLAYER_NAME','TEAM'], how='left')

unind_merged_df = merged_df[merged_df['SPP-OFF P-On'].isna()].reset_index(drop=True)
merged_df = merged_df[~merged_df['SPP-OFF P-On'].isna()].reset_index(drop=True)

print(unind_merged_df.shape)
print(merged_df.shape)

merged_df.head()

(5240, 7)
(76, 7)
(5275, 7)
(59, 9)
(5216, 9)


Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,SEASON,OffPoss,Minutes On,SPP-OFF P-On
0,201985,AJ Price,MIN,20.6,244,2013-14,207.0,99.0,15.029
1,1733,Al Harrington,WAS,20.2,823,2013-14,986.0,511.0,15.645
2,201143,Al Horford,ATL,53.3,1826,2013-14,1900.0,958.0,14.438
3,2744,Al Jefferson,CHA,139.2,3575,2013-14,4893.0,2553.0,16.07
4,202329,Al-Farouq Aminu,NOP,88.4,2520,2013-14,3896.0,2045.0,15.728


In [120]:
name_map = {
    'Enes Freedom': 'Enes Kanter',
    'Marcus Morris Sr.': 'Marcus Morris',
    'P.J. Tucker': 'PJ Tucker',
    'C.J. Wilcox': 'CJ Wilcox',
    'T.J. Warren': 'TJ Warren',
    'Danuel House Jr.': 'Danuel House',
    'Juancho Hernangomez': 'Juan Hernangomez',
    'O.G. Anunoby': 'OG Anunoby',
    'P.J. Dozier': 'PJ Dozier',
    'Walt Lemon Jr.': 'Walter Lemon Jr.',
    'Frank Mason III': 'Frank Mason',
    'T.J. Leaf': 'TJ Leaf',
    'Cam Reynolds': 'Cameron Reynolds',
    'Kevin Knox II': 'Kevin Knox',
    'Nic Claxton': 'Nicolas Claxton',
    'Brandon Boston': 'Brandon Boston Jr.',
    'Xavier Tillman Sr.': 'Xavier Tillman',
    'Michael Frazier II': 'Melvin Frazier Jr.',
    'Charlie Brown Jr.': 'Charlie Brown Jr.',
}

unind_merged_df['PLAYER_NAME'] = unind_merged_df['PLAYER_NAME'].map(name_map)

new_id_merged_df = pd.merge(unind_merged_df.drop(columns=['Minutes On','SPP-OFF P-On']), 
                                                 otimes_df[['SEASON','PLAYER_NAME','TEAM','Minutes On','SPP-OFF P-On']],
                            on=['SEASON','PLAYER_NAME','TEAM'], how='left')

print(new_id_merged_df.shape)
unind_merged_df = new_id_merged_df[new_id_merged_df['Minutes On'].isna()]
new_id_merged_df = new_id_merged_df[~new_id_merged_df['Minutes On'].isna()]
print(new_id_merged_df.shape)
print(unind_merged_df.shape)

merged_df = pd.concat([merged_df, new_id_merged_df]).reset_index(drop=True)
print(merged_df.shape)

merged_df = merged_df.rename(columns={'SPP-OFF P-On': 'AvgOffPossTime'})

merged_df.head()

(59, 9)
(58, 9)
(1, 9)
(5274, 9)


Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,SEASON,OffPoss,Minutes On,AvgOffPossTime
0,201985,AJ Price,MIN,20.6,244,2013-14,207.0,99.0,15.029
1,1733,Al Harrington,WAS,20.2,823,2013-14,986.0,511.0,15.645
2,201143,Al Horford,ATL,53.3,1826,2013-14,1900.0,958.0,14.438
3,2744,Al Jefferson,CHA,139.2,3575,2013-14,4893.0,2553.0,16.07
4,202329,Al-Farouq Aminu,NOP,88.4,2520,2013-14,3896.0,2045.0,15.728


In [121]:
merged_df['TimeOnOff'] = merged_df['OffPoss'] * merged_df['AvgOffPossTime']
merged_df['BallHog%'] = round((merged_df['TIME_OF_POSS'] * 60) / merged_df['TimeOnOff'] * 100, 1)
merged_df.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,SEASON,OffPoss,Minutes On,AvgOffPossTime,TimeOnOff,BallHog%
0,201985,AJ Price,MIN,20.6,244,2013-14,207.0,99.0,15.029,3111.003,39.7
1,1733,Al Harrington,WAS,20.2,823,2013-14,986.0,511.0,15.645,15425.97,7.9
2,201143,Al Horford,ATL,53.3,1826,2013-14,1900.0,958.0,14.438,27432.2,11.7
3,2744,Al Jefferson,CHA,139.2,3575,2013-14,4893.0,2553.0,16.07,78630.51,10.6
4,202329,Al-Farouq Aminu,NOP,88.4,2520,2013-14,3896.0,2045.0,15.728,61276.288,8.7


In [122]:
merged_df.to_csv('C:/Users/lukar/Desktop/Sports Analytics/NBA Processed Data/player_ballhog_rates_by_season_2013_22.csv', index=False)

In [123]:
traded_df.to_csv('C:/Users/lukar/Desktop/Sports Analytics/NBA Processed Data/player_trades_dates_2013_22.csv', index=False)

In [128]:
merged_df[merged_df['PLAYER_NAME'] == "Darius Garland"].sort_values(by=['SEASON'], ascending=True).head(50)

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,SEASON,OffPoss,Minutes On,AvgOffPossTime,TimeOnOff,BallHog%
2609,1629636,Darius Garland,CLE,285.8,3523,2019-20,3734.0,1824.0,15.391,57469.994,29.8
3052,1629636,Darius Garland,CLE,334.0,4162,2020-21,3614.0,1790.0,15.353,55485.742,36.1
3509,1629636,Darius Garland,CLE,523.4,5824,2021-22,4856.0,2430.0,15.324,74413.344,42.2


In [131]:
merged_df[merged_df['PLAYER_NAME'].str.contains('Fox')].sort_values(by=['SEASON'], ascending=True).head(50)

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,SEASON,OffPoss,Minutes On,AvgOffPossTime,TimeOnOff,BallHog%
1737,1628368,De'Aaron Fox,SAC,383.9,4951,2017-18,4065.0,2026.0,15.142,61552.23,37.4
2188,1628368,De'Aaron Fox,SAC,476.0,6307,2018-19,5520.0,2546.0,13.076,72179.52,39.6
2612,1628368,De'Aaron Fox,SAC,353.2,4028,2019-20,3429.0,1634.0,13.962,47875.698,44.3
3056,1628368,De'Aaron Fox,SAC,431.7,5015,2020-21,4251.0,2036.0,14.037,59671.287,43.4
3516,1628368,De'Aaron Fox,SAC,365.7,4801,2021-22,4362.0,2084.0,13.808,60230.496,36.4


In [129]:
merged_df[merged_df['PLAYER_NAME'] == "Donovan Mitchell"].sort_values(by=['SEASON'], ascending=True).head(50)

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,SEASON,OffPoss,Minutes On,AvgOffPossTime,TimeOnOff,BallHog%
1760,1628378,Donovan Mitchell,UTA,344.4,4835,2017-18,5268.0,2638.0,15.322,80716.296,25.6
2215,1628378,Donovan Mitchell,UTA,369.0,4698,2018-19,5403.0,2598.0,14.457,78111.171,28.3
2637,1628378,Donovan Mitchell,UTA,393.5,4700,2019-20,4837.0,2364.0,14.921,72172.877,32.7
3083,1628378,Donovan Mitchell,UTA,324.9,3987,2020-21,3633.0,1771.0,14.517,52740.261,37.0
3540,1628378,Donovan Mitchell,UTA,417.7,4724,2021-22,4589.0,2266.0,14.82,68008.98,36.9


In [127]:
merged_df[(merged_df['Minutes On'] >= 1000) & (merged_df['SEASON'] == '2020-21')].sort_values(by=['BallHog%'], ascending=False).head(50)

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM,TIME_OF_POSS,TOUCHES,SEASON,OffPoss,Minutes On,AvgOffPossTime,TimeOnOff,BallHog%
3385,1629027,Trae Young,ATL,551.3,5396,2020-21,4388.0,2125.0,14.465,63472.42,52.1
3254,1629029,Luka Doncic,DAL,588.6,5869,2020-21,4585.0,2259.0,15.024,68885.04,51.3
3344,201566,Russell Westbrook,WAS,549.6,6192,2020-21,5160.0,2369.0,13.179,68003.64,48.5
4832,201935,James Harden,BKN,317.8,3376,2020-21,2683.0,1319.0,14.728,39515.224,48.3
3184,202322,John Wall,HOU,307.5,3197,2020-21,2676.0,1288.0,14.503,38810.028,47.5
3031,101108,Chris Paul,PHX,502.5,5418,2020-21,4432.0,2199.0,14.746,65354.272,46.1
3043,203081,Damian Lillard,POR,559.2,5449,2020-21,4959.0,2398.0,14.907,73923.813,45.4
3354,1628983,Shai Gilgeous-Alexander,OKC,264.1,2687,2020-21,2432.0,1180.0,14.788,35964.416,44.1
3056,1628368,De'Aaron Fox,SAC,431.7,5015,2020-21,4251.0,2036.0,14.037,59671.287,43.4
3038,1630175,Cole Anthony,ORL,287.5,2876,2020-21,2601.0,1273.0,15.275,39730.275,43.4
