## Scrape Offensive Playtypes

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.ticker as mtick
import sqlite3
import seaborn as sns
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import requests   
import shutil      
import datetime
from scipy.stats import norm
import os
import winsound

home_folder = 'C:\\Users\\Travis\\OneDrive\\Data Science\\Personal_Projects\\Sports\\NBA_Prediction_V3_1'
os.chdir(home_folder)

In [5]:
def replace_name_values2(filename):
        # replace values with dashes for compatibility
    filename = filename.replace('%','_')
    filename = filename.replace('=','_')
    filename = filename.replace('?','_')
    filename = filename.replace('&','_')
    filename = filename.replace('20Season_','')
    filename = filename.replace('_20Season','')
    filename = filename.replace('SeasonType_','')
    filename = filename.replace('sort_gdate_dir_-1_','')
    filename = filename.replace('SeasonYear_','')
    return filename

In [6]:
def trans_urls(url):
    new_url = str(url)[34:].replace('/', '_')
    filename = replace_name_values2(new_url)
    filename = filename.replace('SeasonYear_', '')
    return filename

In [7]:
def append_the_data(folder, data_prefix, filename_selector):
    # Appending data together via folder and/or file name

    path = folder
    p = os.listdir(path)
    pf = pd.DataFrame(p)


    # filter for files that contain the filename_selector
    pf_reg = pf.loc[pf[0].astype(str).str.contains(filename_selector)] 

    appended_data = []
    for file in pf_reg[0]:
        data = pd.read_csv(folder + '/' + file)
        # if "Season" a column, drop it
        if 'Season' in data.columns:
            data = data.drop(columns = ['Season'])
        
        data['season'] = file[(file.find('20')):(file.find('20'))+4]
        data['season_type'] = np.where('Regular' in file, 'Regular', 'Playoffs')
        # add prefix to columns
        data = data.add_prefix(data_prefix)
        data.columns = data.columns.str.lower()
        appended_data.append(data)
    
    appended_data = pd.concat(appended_data)
    return appended_data

In [8]:
def grab_playtype(url_list, file_folder):
        # Scrape Season-Level player data from the url_list

        i = 0
        for u in url_list:
                
                driver.get(u)
                time.sleep(2)

                # if the page does not load, go to the next in the list
                try:
                        xpath = '//*[@id="__next"]/div[2]/div[2]/div[3]/section[2]/div/div[2]/div[2]/div[1]/div[3]/div/label/div/select/option[1]'
                        elem = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, xpath)))
                except:
                        print(f'{u} did not load. Moving to next url.')
                        continue

                # click "all pages"
                xpath_all = '//*[@id="__next"]/div[2]/div[2]/div[3]/section[2]/div/div[2]/div[2]/div[1]/div[3]/div/label/div/select/option[1]' 
                elem = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, xpath_all)))
                
                driver.find_element(by=By.XPATH, value=xpath_all).click()
                src = driver.page_source
                parser = BeautifulSoup(src, "lxml")
                table = parser.find("table", attrs = {"class":"Crom_table__p1iZz"})
                headers = table.findAll('th')
                headerlist = [h.text.strip() for h in headers[0:]] 
                row_names = table.findAll('a')                             # find rows
                row_list = [b.text.strip() for b in row_names[0:]] 
                rows = table.findAll('tr')[0:]
                player_stats = [[td.getText().strip() for td in rows[i].findAll('td')[0:]] for i in range(len(rows))]
                tot_cols = len(player_stats[1])                           #set the length to ignore hidden columns
                headerlist = headerlist[:tot_cols]   
                stats = pd.DataFrame(player_stats, columns = headerlist)

                # assign filename
                filename = file_folder + str(u[34:]).replace('/', '_') + '.csv'
                filename = replace_name_values2(filename)
                filename = filename.replace('SeasonYear_', '')
                pd.DataFrame.to_csv(stats, filename)
                i += 1
                lu = len(url_list)
                # close driver
                print(f'{filename} Completed Successfully! {i} / {lu} Complete!')

        winsound.Beep(523, 500)

In [9]:
years = ['2021-22', '2020-21', '2019-20', '2018-19', '2017-18', '2016-17', '2015-16']
playtypes = ['isolation', 'transition', 'ball-handler', 'roll-man', 
            'playtype-post-up','spot-up', 'hand-off', 'cut',
            'off-screen', 'putbacks', 'misc'] 
season_types = ['Playoffs', 'Regular%20Season']

playtype_urlz = []

for year in years:
    for play in playtypes:
        for s_types in season_types:
            url = 'https://www.nba.com/stats/players/'+ play + '?SeasonType=' + s_types + '&SeasonYear=' + year
            playtype_urlz.append(str(url))

# delete any misc playoff urls, as they do not work
playtype_urlz.remove('https://www.nba.com/stats/players/misc?SeasonType=Playoffs&SeasonYear=2015-16')
playtype_urlz.remove('https://www.nba.com/stats/players/misc?SeasonType=Playoffs&SeasonYear=2016-17')
playtype_urlz.remove('https://www.nba.com/stats/players/misc?SeasonType=Playoffs&SeasonYear=2017-18')
playtype_urlz.remove('https://www.nba.com/stats/players/misc?SeasonType=Playoffs&SeasonYear=2018-19')
playtype_urlz.remove('https://www.nba.com/stats/players/misc?SeasonType=Playoffs&SeasonYear=2019-20')
playtype_urlz.remove('https://www.nba.com/stats/players/misc?SeasonType=Playoffs&SeasonYear=2020-21')
playtype_urlz.remove('https://www.nba.com/stats/players/misc?SeasonType=Playoffs&SeasonYear=2021-22')

len(playtype_urlz)

147

In [10]:
to_download = pd.DataFrame(playtype_urlz, columns = ['url'])

In [11]:
# create a new column with the filename
to_download['filename'] = to_download.apply(lambda row: trans_urls(row['url']), axis=1)
to_download

Unnamed: 0,url,filename
0,https://www.nba.com/stats/players/isolation?Se...,isolation_Playoffs_2021-22
1,https://www.nba.com/stats/players/isolation?Se...,isolation_Regular_2021-22
2,https://www.nba.com/stats/players/transition?S...,transition_Playoffs_2021-22
3,https://www.nba.com/stats/players/transition?S...,transition_Regular_2021-22
4,https://www.nba.com/stats/players/ball-handler...,ball-handler_Playoffs_2021-22
...,...,...
142,https://www.nba.com/stats/players/off-screen?S...,off-screen_Playoffs_2015-16
143,https://www.nba.com/stats/players/off-screen?S...,off-screen_Regular_2015-16
144,https://www.nba.com/stats/players/putbacks?Sea...,putbacks_Playoffs_2015-16
145,https://www.nba.com/stats/players/putbacks?Sea...,putbacks_Regular_2015-16


In [12]:
# get list of all downloaded files
downloaded_files_reg = os.listdir('data/player/playtype/regular_season/')
downloaded_files_play = os.listdir('data/player/playtype/playoffs/')
downloaded_files = downloaded_files_reg + downloaded_files_play
downloaded_files = [x.replace('.csv', '') for x in downloaded_files]

In [13]:
# get list of files not yet downloaded
to_download = to_download[~to_download['filename'].isin(downloaded_files)]
to_download_list = to_download.url.to_list()

In [14]:
to_download_list

[]

In [15]:
len(to_download_list)

0

In [16]:
if len(to_download_list) > 0:
    driver = webdriver.Chrome()
    grab_playtype(to_download_list, 'data/player/playtype/')
    driver.close()
else:
    print('All files are downloaded')

All files are downloaded


### Update This Year

In [21]:
# get list of this year's urls
playtypes = ['isolation', 'transition', 'ball-handler', 'roll-man', 
            'playtype-post-up','spot-up', 'hand-off', 'cut',
            'off-screen', 'putbacks', 'misc']

current_urls = []

for play in playtypes:
        url = 'https://www.nba.com/stats/players/'+ play + '?SeasonType=Regular%20Season'  + '&SeasonYear=2022-23' 
        current_urls.append(str(url))

In [22]:
driver = webdriver.Chrome()
grab_playtype(current_urls, 'data/player/playtype/')

data/player/playtype/isolation_Regular_2022-23.csv Completed Successfully! 1 / 11 Complete!
data/player/playtype/transition_Regular_2022-23.csv Completed Successfully! 2 / 11 Complete!
data/player/playtype/ball-handler_Regular_2022-23.csv Completed Successfully! 3 / 11 Complete!
data/player/playtype/roll-man_Regular_2022-23.csv Completed Successfully! 4 / 11 Complete!
data/player/playtype/playtype-post-up_Regular_2022-23.csv Completed Successfully! 5 / 11 Complete!
data/player/playtype/spot-up_Regular_2022-23.csv Completed Successfully! 6 / 11 Complete!
data/player/playtype/hand-off_Regular_2022-23.csv Completed Successfully! 7 / 11 Complete!
data/player/playtype/cut_Regular_2022-23.csv Completed Successfully! 8 / 11 Complete!
data/player/playtype/off-screen_Regular_2022-23.csv Completed Successfully! 9 / 11 Complete!
data/player/playtype/putbacks_Regular_2022-23.csv Completed Successfully! 10 / 11 Complete!
data/player/playtype/misc_Regular_2022-23.csv Completed Successfully! 11 / 11 

In [23]:
# move files to proper respective folders if necessary

for file in os.listdir('data/player/playtype/'):
    if '.csv' in file:
        if 'Playoffs' in file:
            shutil.move('data/player/playtype/' + file, 'data/player/playtype/playoffs/')
        elif 'Regular' in file:
            shutil.move('data/player/playtype/' + file, 'data/player/playtype/regular_season/')


In [24]:
# agg each sub-category
ball_handler_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_ball_handler__', 'ball-handler')
ball_handler_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_ball_handler__', 'ball-handler')
ball_handler = pd.concat([ball_handler_reg, ball_handler_playoffs], axis = 0)

ball_handler_reg.to_csv('data/player/aggregates/Playtype_Offense_Ball_Handler_Regular_Season.csv')
ball_handler_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Ball_Handler_Playoffs.csv')
ball_handler.to_csv('data/player/aggregates/Playtype_Offense_Ball_Handler_ALL.csv')

cutter_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_cut__', 'cut')
cutter_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_cut__', 'cut')
cutter = pd.concat([cutter_reg, cutter_playoffs], axis = 0)

cutter_reg.to_csv('data/player/aggregates/Playtype_Offense_Cutter_Regular_Season.csv')
cutter_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Cutter_Playoffs.csv')
cutter.to_csv('data/player/aggregates/Playtype_Offense_Cutter_ALL.csv')

hand_off_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_hand_off__', 'hand-off')
hand_off_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_hand_off__', 'hand-off')
hand_off = pd.concat([hand_off_reg, hand_off_playoffs], axis = 0)

hand_off_reg.to_csv('data/player/aggregates/Playtype_Offense_Hand_Off_Regular_Season.csv')
hand_off_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Hand_Off_Playoffs.csv')
hand_off.to_csv('data/player/aggregates/Playtype_Offense_Hand_Off_ALL.csv')

iso_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_iso__', 'isolation')
iso_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_iso__', 'isolation')
iso = pd.concat([iso_reg, iso_playoffs], axis = 0)

iso_reg.to_csv('data/player/aggregates/Playtype_Offense_Isolation_Regular_Season.csv')
iso_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Isolation_Playoffs.csv')
iso.to_csv('data/player/aggregates/Playtype_Offense_Isolation_ALL.csv')

# Note: NO PLAYOFF MISC
misc_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_misc__', 'misc')
misc = misc_reg

misc_reg.to_csv('data/player/aggregates/Playtype_Offense_Misc_Regular_Season.csv')
misc.to_csv('data/player/aggregates/Playtype_Offense_Misc_ALL.csv')

off_screen_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_off_screen__', 'off-screen')
off_screen_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_off_screen__', 'off-screen')
off_screen = pd.concat([off_screen_reg, off_screen_playoffs], axis = 0)

off_screen_reg.to_csv('data/player/aggregates/Playtype_Offense_Off_Screen_Regular_Season.csv')
off_screen_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Off_Screen_Playoffs.csv')
off_screen.to_csv('data/player/aggregates/Playtype_Offense_Off_Screen_ALL.csv')

postup_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_postup__', 'post-up')
postup_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_postup__', 'post-up')
postup = pd.concat([postup_reg, postup_playoffs], axis = 0)

postup_reg.to_csv('data/player/aggregates/Playtype_Offense_Post_Up_Regular_Season.csv')
postup_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Post_Up_Playoffs.csv')
postup.to_csv('data/player/aggregates/Playtype_Offense_Post_Up_ALL.csv')

putback_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_putback__', 'putback')
putback_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_putback__', 'putback')
putback = pd.concat([putback_reg, putback_playoffs], axis = 0)

putback_reg.to_csv('data/player/aggregates/Playtype_Offense_Putback_Regular_Season.csv')
putback_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Putback_Playoffs.csv')
putback.to_csv('data/player/aggregates/Playtype_Offense_Putback_ALL.csv')

rollman_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_rollman__', 'roll-man')
rollman_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_rollman__', 'roll-man')
rollman = pd.concat([rollman_reg, rollman_playoffs], axis = 0)

rollman_reg.to_csv('data/player/aggregates/Playtype_Offense_Roll_Man_Regular_Season.csv')
rollman_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Roll_Man_Playoffs.csv')
rollman.to_csv('data/player/aggregates/Playtype_Offense_Roll_Man_ALL.csv')

spotups_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_spot_up__', 'spot-up')
spotups_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_spot_up__', 'spot-up')
spotups = pd.concat([spotups_reg, spotups_playoffs], axis = 0)

spotups_reg.to_csv('data/player/aggregates/Playtype_Offense_Spot_Up_Regular_Season.csv')
spotups_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Spot_Up_Playoffs.csv')
spotups.to_csv('data/player/aggregates/Playtype_Offense_Spot_Up_ALL.csv')

transition_reg = append_the_data('data/player/playtype/regular_season/', 'playtype_transition__', 'transition')
transition_playoffs = append_the_data('data/player/playtype/playoffs/', 'playtype_transition__', 'transition')
transition = pd.concat([transition_reg, transition_playoffs], axis = 0)

transition_reg.to_csv('data/player/aggregates/Playtype_Offense_Transition_Regular_Season.csv')
transition_playoffs.to_csv('data/player/aggregates/Playtype_Offense_Transition_Playoffs.csv')
transition.to_csv('data/player/aggregates/Playtype_Offense_Transition_ALL.csv')

In [25]:
# get df sizes
print(f' ball_handler: {ball_handler.shape}, cutter: {cutter.shape}, hand_off: {hand_off.shape}, iso: {iso.shape}, misc: {misc.shape}, off_screen: {off_screen.shape}, postup: {postup.shape}, putbacks: {putback.shape}, rollman: {rollman.shape}, spotups: {spotups.shape}, transition: {transition.shape}')

 ball_handler: (3243, 20), cutter: (3700, 20), hand_off: (2840, 20), iso: (2977, 20), misc: (3920, 23), off_screen: (2548, 20), postup: (2016, 20), putbacks: (3264, 20), rollman: (2523, 20), spotups: (4757, 20), transition: (4684, 20)


In [26]:
# turn season into int for both dfs
spotups['playtype_spot_up__season'] = spotups['playtype_spot_up__season'].astype(int)
transition['playtype_transition__season'] = transition['playtype_transition__season'].astype(int)

In [27]:
# There are duplicates for each team a player played for. We want to keep these, I believe. 

dups = spotups[spotups.duplicated(subset = ['playtype_spot_up__season', 'playtype_spot_up__player', 'playtype_spot_up__season_type', 'playtype_spot_up__team'], keep = False)]
dups = dups.sort_values(by = ['playtype_spot_up__player', 'playtype_spot_up__season'])
dups

Unnamed: 0,playtype_spot_up__unnamed: 0,playtype_spot_up__player,playtype_spot_up__team,playtype_spot_up__gp,playtype_spot_up__poss,playtype_spot_up__freq%,playtype_spot_up__ppp,playtype_spot_up__pts,playtype_spot_up__fgm,playtype_spot_up__fga,playtype_spot_up__fg%,playtype_spot_up__efg%,playtype_spot_up__ftfreq%,playtype_spot_up__tovfreq%,playtype_spot_up__sffreq%,playtype_spot_up__and onefreq%,playtype_spot_up__scorefreq%,playtype_spot_up__percentile,playtype_spot_up__season,playtype_spot_up__season_type


In [28]:
spotups_dd = spotups.drop_duplicates(subset = ['playtype_spot_up__season', 'playtype_spot_up__player', 'playtype_spot_up__season_type', 'playtype_spot_up__team'])
spotups_dd

Unnamed: 0,playtype_spot_up__unnamed: 0,playtype_spot_up__player,playtype_spot_up__team,playtype_spot_up__gp,playtype_spot_up__poss,playtype_spot_up__freq%,playtype_spot_up__ppp,playtype_spot_up__pts,playtype_spot_up__fgm,playtype_spot_up__fga,playtype_spot_up__fg%,playtype_spot_up__efg%,playtype_spot_up__ftfreq%,playtype_spot_up__tovfreq%,playtype_spot_up__sffreq%,playtype_spot_up__and onefreq%,playtype_spot_up__scorefreq%,playtype_spot_up__percentile,playtype_spot_up__season,playtype_spot_up__season_type
0,0,,,,,,,,,,,,,,,,,,2015,Regular
1,1,Wesley Matthews,DAL,78.0,4.6,36.2,1.11,5.1,1.7,4.3,39.3,56.1,2.8,3.4,2.2,0.6,39.4,86.0,2015,Regular
2,2,Kawhi Leonard,SAS,72.0,4.3,23.5,1.25,5.4,2.0,4.0,49.3,63.5,5.1,3.9,4.8,1.0,49.5,96.3,2015,Regular
3,3,Marvin Williams,CHA,81.0,4.3,39.5,1.12,4.8,1.7,4.0,41.5,56.7,2.9,2.9,2.6,0.6,41.6,86.9,2015,Regular
4,4,JR Smith,CLE,77.0,4.1,33.7,1.18,4.9,1.7,4.0,42.7,60.0,0.6,1.9,0.6,0.0,42.3,93.2,2015,Regular
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,183,Vlatko Cancar,DEN,2.0,1.0,40.0,0.00,0.0,0.0,0.5,0.0,0.0,0.0,50.0,0.0,0.0,0.0,0.0,2021,Playoffs
184,184,George Hill,MIL,5.0,0.4,28.6,0.00,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021,Playoffs
185,185,Jarrett Culver,MEM,3.0,1.0,23.1,0.00,0.0,0.0,0.7,0.0,0.0,0.0,33.3,0.0,0.0,0.0,0.0,2021,Playoffs
186,186,Malachi Flynn,TOR,6.0,0.8,71.4,0.00,0.0,0.0,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021,Playoffs


In [29]:
# check for duplicates
spotups_dd[spotups_dd.duplicated(subset = ['playtype_spot_up__season', 'playtype_spot_up__player', 'playtype_spot_up__season_type', 'playtype_spot_up__team'], keep = False)]

Unnamed: 0,playtype_spot_up__unnamed: 0,playtype_spot_up__player,playtype_spot_up__team,playtype_spot_up__gp,playtype_spot_up__poss,playtype_spot_up__freq%,playtype_spot_up__ppp,playtype_spot_up__pts,playtype_spot_up__fgm,playtype_spot_up__fga,playtype_spot_up__fg%,playtype_spot_up__efg%,playtype_spot_up__ftfreq%,playtype_spot_up__tovfreq%,playtype_spot_up__sffreq%,playtype_spot_up__and onefreq%,playtype_spot_up__scorefreq%,playtype_spot_up__percentile,playtype_spot_up__season,playtype_spot_up__season_type


In [30]:
all_playtype_data = pd.merge(spotups, transition,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_transition__player', 'playtype_transition__season', 'playtype_transition__season_type', 'playtype_transition__team'])

In [31]:
all_playtype_data

Unnamed: 0,playtype_spot_up__unnamed: 0,playtype_spot_up__player,playtype_spot_up__team,playtype_spot_up__gp,playtype_spot_up__poss,playtype_spot_up__freq%,playtype_spot_up__ppp,playtype_spot_up__pts,playtype_spot_up__fgm,playtype_spot_up__fga,...,playtype_transition__fg%,playtype_transition__efg%,playtype_transition__ftfreq%,playtype_transition__tovfreq%,playtype_transition__sffreq%,playtype_transition__and onefreq%,playtype_transition__scorefreq%,playtype_transition__percentile,playtype_transition__season,playtype_transition__season_type
0,0,,,,,,,,,,...,,,,,,,,,2015.0,Regular
1,1,Wesley Matthews,DAL,78.0,4.6,36.2,1.11,5.1,1.7,4.3,...,41.9,53.8,13.0,3.7,11.1,2.8,46.3,51.3,2015.0,Regular
2,2,Kawhi Leonard,SAS,72.0,4.3,23.5,1.25,5.4,2.0,4.0,...,57.5,62.3,15.9,10.0,11.2,4.7,55.9,73.2,2015.0,Regular
3,3,Marvin Williams,CHA,81.0,4.3,39.5,1.12,4.8,1.7,4.0,...,45.9,55.7,9.9,4.2,7.0,0.0,49.3,53.9,2015.0,Regular
4,4,JR Smith,CLE,77.0,4.1,33.7,1.18,4.9,1.7,4.0,...,46.2,59.8,4.2,3.5,4.2,0.0,45.1,56.1,2015.0,Regular
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4752,183,Vlatko Cancar,DEN,2.0,1.0,40.0,0.00,0.0,0.0,0.5,...,,,,,,,,,,
4753,184,George Hill,MIL,5.0,0.4,28.6,0.00,0.0,0.0,0.4,...,50.0,75.0,0.0,0.0,0.0,0.0,50.0,0.0,2021.0,Playoffs
4754,185,Jarrett Culver,MEM,3.0,1.0,23.1,0.00,0.0,0.0,0.7,...,50.0,50.0,0.0,0.0,0.0,0.0,50.0,0.0,2021.0,Playoffs
4755,186,Malachi Flynn,TOR,6.0,0.8,71.4,0.00,0.0,0.0,0.8,...,,,,,,,,,,


In [32]:
print(f' Spotups shape: {spotups.shape}, Transition shape: {transition.shape}, All Playtype shape: {all_playtype_data.shape}')

 Spotups shape: (4757, 20), Transition shape: (4684, 20), All Playtype shape: (4757, 40)


#### Add Cutter

In [33]:
cutter.head(2)

Unnamed: 0,playtype_cut__unnamed: 0,playtype_cut__player,playtype_cut__team,playtype_cut__gp,playtype_cut__poss,playtype_cut__freq%,playtype_cut__ppp,playtype_cut__pts,playtype_cut__fgm,playtype_cut__fga,playtype_cut__fg%,playtype_cut__efg%,playtype_cut__ftfreq%,playtype_cut__tovfreq%,playtype_cut__sffreq%,playtype_cut__and onefreq%,playtype_cut__scorefreq%,playtype_cut__percentile,playtype_cut__season,playtype_cut__season_type
0,0,,,,,,,,,,,,,,,,,,2015,Regular
1,1,Marcin Gortat,WAS,75.0,4.1,31.4,1.13,4.6,2.0,3.5,57.7,57.7,11.4,5.2,10.4,1.3,58.3,35.3,2015,Regular


In [34]:
cutter['playtype_cut__season'] = cutter['playtype_cut__season'].astype(int)

In [35]:
all_playtype_data2 = pd.merge(all_playtype_data, cutter,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_cut__player', 'playtype_cut__season', 'playtype_cut__season_type', 'playtype_cut__team'])

print(f' previous shape: {all_playtype_data.shape}, cutter shape: {cutter.shape}, new shape: {all_playtype_data2.shape}')

 previous shape: (4757, 40), cutter shape: (3700, 20), new shape: (4757, 60)


#### Add Ball Handler

In [36]:
ball_handler['playtype_ball_handler__season'] = ball_handler['playtype_ball_handler__season'].astype(int)

In [37]:
all_playtype_data3 = pd.merge(all_playtype_data2, ball_handler,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_ball_handler__player', 'playtype_ball_handler__season', 'playtype_ball_handler__season_type', 'playtype_ball_handler__team'])

print(f' previous shape: {all_playtype_data2.shape}, ball_handler shape: {ball_handler.shape}, new shape: {all_playtype_data3.shape}')

 previous shape: (4757, 60), ball_handler shape: (3243, 20), new shape: (4757, 80)


#### Add Hand Off

In [38]:
hand_off['playtype_hand_off__season'] = hand_off['playtype_hand_off__season'].astype(int)

In [39]:
all_playtype_data4 = pd.merge(all_playtype_data3, hand_off,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_hand_off__player', 'playtype_hand_off__season', 'playtype_hand_off__season_type', 'playtype_hand_off__team'])

print(f' previous shape: {all_playtype_data3.shape}, hand_off shape: {hand_off.shape}, new shape: {all_playtype_data4.shape}')

 previous shape: (4757, 80), hand_off shape: (2840, 20), new shape: (4757, 100)


#### Add Isolation

In [40]:
iso['playtype_iso__season'] = iso['playtype_iso__season'].astype(int)

In [41]:
all_playtype_data5 = pd.merge(all_playtype_data4, iso,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_iso__player', 'playtype_iso__season', 'playtype_iso__season_type', 'playtype_iso__team'])

print(f' previous shape: {all_playtype_data4.shape}, iso shape: {iso.shape}, new shape: {all_playtype_data5.shape}')

 previous shape: (4757, 100), iso shape: (2977, 20), new shape: (4757, 120)


#### Add Off Screen

In [42]:
off_screen['playtype_off_screen__season'] = off_screen['playtype_off_screen__season'].astype(int)

In [43]:
all_playtype_data6 = pd.merge(all_playtype_data5, off_screen,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_off_screen__player', 'playtype_off_screen__season', 'playtype_off_screen__season_type', 'playtype_off_screen__team'])

print(f' previous shape: {all_playtype_data5.shape}, off_screen shape: {off_screen.shape}, new shape: {all_playtype_data6.shape}')

 previous shape: (4757, 120), off_screen shape: (2548, 20), new shape: (4757, 140)


#### Add Postup

In [44]:
postup['playtype_postup__season'] = postup['playtype_postup__season'].astype(int)

In [45]:
all_playtype_data7 = pd.merge(all_playtype_data6, postup,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_postup__player', 'playtype_postup__season', 'playtype_postup__season_type', 'playtype_postup__team'])

print(f' previous shape: {all_playtype_data6.shape}, postup shape: {postup.shape}, new shape: {all_playtype_data7.shape}')

 previous shape: (4757, 140), postup shape: (2016, 20), new shape: (4757, 160)


#### Add Putbacks

In [46]:
putback['playtype_putback__season'] = putback['playtype_putback__season'].astype(int)

In [47]:
all_playtype_data8 = pd.merge(all_playtype_data7, putback,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_putback__player', 'playtype_putback__season', 'playtype_putback__season_type', 'playtype_putback__team'])

print(f' previous shape: {all_playtype_data7.shape}, putback shape: {putback.shape}, new shape: {all_playtype_data8.shape}')

 previous shape: (4757, 160), putback shape: (3264, 20), new shape: (4757, 180)


#### Add Roll Man

In [48]:
rollman['playtype_rollman__season'] = rollman['playtype_rollman__season'].astype(int)

In [49]:
all_playtype_data9 = pd.merge(all_playtype_data8, rollman,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_rollman__player', 'playtype_rollman__season', 'playtype_rollman__season_type', 'playtype_rollman__team'])

print(f' previous shape: {all_playtype_data8.shape}, rollman shape: {rollman.shape}, new shape: {all_playtype_data9.shape}')

 previous shape: (4757, 180), rollman shape: (2523, 20), new shape: (4757, 200)


#### Add Misc

In [50]:
misc['playtype_misc__season'] = misc['playtype_misc__season'].astype(int)

In [51]:
all_playtype_data10 = pd.merge(all_playtype_data9, misc,
                            how = 'left',
                            left_on = ['playtype_spot_up__player', 'playtype_spot_up__season', 'playtype_spot_up__season_type', 'playtype_spot_up__team'],
                            right_on = ['playtype_misc__player', 'playtype_misc__season', 'playtype_misc__season_type', 'playtype_misc__team'])

print(f' previous shape: {all_playtype_data9.shape}, misc shape: {misc.shape}, new shape: {all_playtype_data10.shape}')

 previous shape: (4757, 200), misc shape: (3920, 23), new shape: (4757, 223)


In [52]:
all_playtype_data10.to_csv('data/player/aggregates/ALL_Playtypes_Offense.csv')