## Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import requests
import time

In [None]:
!pip install pandas numpy matplotlib seaborn requests time

### Scraping Play Type Data

Trying to scrape one iteration of the playtype page to get an understanding

In [3]:
headers = {
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.9',
    'Connection': 'keep-alive',
    'Origin': 'https://www.nba.com',
    'Referer': 'https://www.nba.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

params = {
    'LeagueID': '00',
    'PerMode': 'PerGame',
    'PlayType': 'Transition',
    'PlayerOrTeam': 'P',
    'SeasonType': 'Regular Season',
    'SeasonYear': '2024-25',
    'TypeGrouping': 'offensive',
}

response = requests.get('https://stats.nba.com/stats/synergyplaytypes', params=params, headers=headers)

if response.status_code == 200:
    data = response.json()
    headers = data['resultSets'][0]['headers']
    rows = data['resultSets'][0]['rowSet']
    df = pd.DataFrame(rows, columns=headers)
    print(df.head())  # Display first few rows
else:
    print(f"Failed to retrieve data: {response.status_code}")

  SEASON_ID  PLAYER_ID              PLAYER_NAME     TEAM_ID TEAM_ABBREVIATION  \
0     22024     203507    Giannis Antetokounmpo  1610612749               MIL   
1     22024    1628983  Shai Gilgeous-Alexander  1610612760               OKC   
2     22024    1629632               Coby White  1610612741               CHI   
3     22024    1631128          Christian Braun  1610612743               DEN   
4     22024       2544             LeBron James  1610612747               LAL   

               TEAM_NAME   PLAY_TYPE TYPE_GROUPING  PERCENTILE  GP  ...  \
0        Milwaukee Bucks  Transition     Offensive       0.779  58  ...   
1  Oklahoma City Thunder  Transition     Offensive       0.784  68  ...   
2          Chicago Bulls  Transition     Offensive       0.751  64  ...   
3         Denver Nuggets  Transition     Offensive       0.863  70  ...   
4     Los Angeles Lakers  Transition     Offensive       0.648  59  ...   

   TOV_POSS_PCT  SF_POSS_PCT  PLUSONE_POSS_PCT  SCORE_POSS_PCT

In [4]:
df

Unnamed: 0,SEASON_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,PLAY_TYPE,TYPE_GROUPING,PERCENTILE,GP,...,TOV_POSS_PCT,SF_POSS_PCT,PLUSONE_POSS_PCT,SCORE_POSS_PCT,EFG_PCT,POSS,PTS,FGM,FGA,FGMX
0,22024,203507,Giannis Antetokounmpo,1610612749,MIL,Milwaukee Bucks,Transition,Offensive,0.779,58,...,0.115,0.281,0.073,0.652,0.717,6.1,7.7,2.9,4.1,1.2
1,22024,1628983,Shai Gilgeous-Alexander,1610612760,OKC,Oklahoma City Thunder,Transition,Offensive,0.784,68,...,0.079,0.133,0.028,0.586,0.633,5.2,6.6,2.4,4.1,1.7
2,22024,1629632,Coby White,1610612741,CHI,Chicago Bulls,Transition,Offensive,0.751,64,...,0.093,0.140,0.049,0.517,0.643,5.4,6.7,2.3,4.4,2.1
3,22024,1631128,Christian Braun,1610612743,DEN,Denver Nuggets,Transition,Offensive,0.863,70,...,0.066,0.165,0.041,0.642,0.671,4.5,5.9,2.4,3.6,1.3
4,22024,2544,LeBron James,1610612747,LAL,Los Angeles Lakers,Transition,Offensive,0.648,59,...,0.173,0.140,0.029,0.577,0.714,5.2,6.2,2.5,3.7,1.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,22024,203994,Jusuf Nurkić,1610612756,PHX,Phoenix Suns,Transition,Offensive,0.022,25,...,0.500,0.167,0.167,0.250,0.500,0.5,0.3,0.1,0.2,0.1
363,22024,1630544,Tre Mann,1610612766,CHA,Charlotte Hornets,Transition,Offensive,0.011,13,...,0.154,0.000,0.000,0.231,0.364,1.0,0.6,0.2,0.8,0.6
364,22024,1630205,Lamar Stevens,1610612763,MEM,Memphis Grizzlies,Transition,Offensive,0.016,9,...,0.091,0.091,0.000,0.364,0.333,1.2,0.8,0.3,1.0,0.7
365,22024,203994,Jusuf Nurkić,1610612766,CHA,Charlotte Hornets,Transition,Offensive,0.003,16,...,0.250,0.000,0.000,0.167,0.222,0.8,0.3,0.1,0.6,0.4


Creating a function to loop through all the different versions of the page to scrape.

In [3]:
def fetch_nba_data(season_years, play_types, type_groupings):
    url = 'https://stats.nba.com/stats/synergyplaytypes'

    headers = {
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.9',
        'Connection': 'keep-alive',
        'Origin': 'https://www.nba.com',
        'Referer': 'https://www.nba.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }

    base_params = {
        'LeagueID': '00',
        'PerMode': 'PerGame',
        'PlayerOrTeam': 'P',
        'SeasonType': 'Regular Season',
    }

    dataframes = {}

    for season in season_years:
        for play_type in play_types:
            for type_grouping in type_groupings:
                params = base_params.copy()
                params['SeasonYear'] = season
                params['PlayType'] = play_type
                params['TypeGrouping'] = type_grouping

                print(f"Fetching data for SeasonYear: {season}, PlayType: {play_type}, TypeGrouping: {type_grouping}...")

                try:
                    response = requests.get(url, params=params, headers=headers)
                    response.raise_for_status()  # Raise an error for HTTP failures

                    data = response.json()

                    # Extract relevant data (modify based on response structure)
                    if "resultSets" in data and data["resultSets"]:
                        results = data["resultSets"][0]  # Assuming first result set
                        df = pd.DataFrame(results["rowSet"], columns=results["headers"])
                        dataframes[(season, play_type, type_grouping)] = df
                    else:
                        print(f"No data found for {season}, {play_type}, {type_grouping}")
                        dataframes[(season, play_type, type_grouping)] = pd.DataFrame()

                except requests.exceptions.RequestException as e:
                    print(f"Request failed for {season}, {play_type}, {type_grouping} - {e}")
                    dataframes[(season, play_type, type_grouping)] = pd.DataFrame()

                time.sleep(1)  # Small delay to avoid getting blocked

    return dataframes

In [4]:
# Define parameter values
season_years = [f"{year}-{str(year+1)[-2:]}" for year in range(2015, 2025)]  # Generates '2015-16' to '2024-25'
play_types = ['isolation', 'transition', 'PRBallHandler', 'PRRollman', 'Postup', 'Spotup', 
              'Handoff', 'Cut', 'OffScreen', 'Putbacks', 'Misc']
type_groupings = ['offensive', 'defensive']

In [5]:
# Fetch data
playtypes_df = fetch_nba_data(season_years, play_types, type_groupings)

Fetching data for SeasonYear: 2015-16, PlayType: isolation, TypeGrouping: offensive...
Request failed for 2015-16, isolation, offensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2015-16&PlayType=isolation&TypeGrouping=offensive
Fetching data for SeasonYear: 2015-16, PlayType: isolation, TypeGrouping: defensive...
Request failed for 2015-16, isolation, defensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2015-16&PlayType=isolation&TypeGrouping=defensive
Fetching data for SeasonYear: 2015-16, PlayType: transition, TypeGrouping: offensive...
Request failed for 2015-16, transition, offensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&Sea

Fetching data for SeasonYear: 2017-18, PlayType: PRBallHandler, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: PRBallHandler, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: PRRollman, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: PRRollman, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: Postup, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: Postup, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: Spotup, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: Spotup, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: Handoff, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: Handoff, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: Cut, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: Cut, Typ

Fetching data for SeasonYear: 2019-20, PlayType: Putbacks, TypeGrouping: defensive...
Request failed for 2019-20, Putbacks, defensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2019-20&PlayType=Putbacks&TypeGrouping=defensive
Fetching data for SeasonYear: 2019-20, PlayType: Misc, TypeGrouping: offensive...
Fetching data for SeasonYear: 2019-20, PlayType: Misc, TypeGrouping: defensive...
Fetching data for SeasonYear: 2020-21, PlayType: isolation, TypeGrouping: offensive...
Request failed for 2020-21, isolation, offensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2020-21&PlayType=isolation&TypeGrouping=offensive
Fetching data for SeasonYear: 2020-21, PlayType: isolation, TypeGrouping: defensive...
Request failed for 2020-21, isolation, defensive

Fetching data for SeasonYear: 2022-23, PlayType: transition, TypeGrouping: defensive...
Request failed for 2022-23, transition, defensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2022-23&PlayType=transition&TypeGrouping=defensive
Fetching data for SeasonYear: 2022-23, PlayType: PRBallHandler, TypeGrouping: offensive...
Fetching data for SeasonYear: 2022-23, PlayType: PRBallHandler, TypeGrouping: defensive...
Fetching data for SeasonYear: 2022-23, PlayType: PRRollman, TypeGrouping: offensive...
Fetching data for SeasonYear: 2022-23, PlayType: PRRollman, TypeGrouping: defensive...
Fetching data for SeasonYear: 2022-23, PlayType: Postup, TypeGrouping: offensive...
Fetching data for SeasonYear: 2022-23, PlayType: Postup, TypeGrouping: defensive...
Fetching data for SeasonYear: 2022-23, PlayType: Spotup, TypeGrouping: offensive...
Fetching data for SeasonYear: 2022-23

Fetching data for SeasonYear: 2024-25, PlayType: Cut, TypeGrouping: defensive...
Fetching data for SeasonYear: 2024-25, PlayType: OffScreen, TypeGrouping: offensive...
Fetching data for SeasonYear: 2024-25, PlayType: OffScreen, TypeGrouping: defensive...
Fetching data for SeasonYear: 2024-25, PlayType: Putbacks, TypeGrouping: offensive...
Request failed for 2024-25, Putbacks, offensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2024-25&PlayType=Putbacks&TypeGrouping=offensive
Fetching data for SeasonYear: 2024-25, PlayType: Putbacks, TypeGrouping: defensive...
Request failed for 2024-25, Putbacks, defensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2024-25&PlayType=Putbacks&TypeGrouping=defensive
Fetching data for SeasonYear: 2024-25, PlayType

In [6]:
# Create a list to store modified DataFrames with a SeasonYear column
df_list = []

for (season, play_type, type_grouping), df in playtypes_df.items():
    df = df.copy()  # Avoid modifying the original DataFrame
    df['SeasonYear'] = season  # Add SeasonYear column
    df_list.append(df)

# Concatenate all DataFrames into one
playtypes_df_combined = pd.concat(df_list, ignore_index=True)

Creating a new column that abbreviates each player's name so that it matches the lineup dataset later on. This will allow us to join the data.

In [7]:
# Function to abbreviate player names
def abbreviate_name(full_name):
    parts = full_name.split()
    if len(parts) > 1:
        return f"{parts[0][0]}. {parts[-1]}"  # First initial + last name
    return full_name  # If there's only one name, return as is

In [8]:
# Apply the function to create the new column
playtypes_df_combined['Player_Abbrev'] = playtypes_df_combined['PLAYER_NAME'].apply(abbreviate_name)

In [9]:
playtypes_df_combined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43917 entries, 0 to 43916
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   SeasonYear         43917 non-null  object 
 1   SEASON_ID          43917 non-null  object 
 2   PLAYER_ID          43917 non-null  float64
 3   PLAYER_NAME        43917 non-null  object 
 4   TEAM_ID            43917 non-null  float64
 5   TEAM_ABBREVIATION  43917 non-null  object 
 6   TEAM_NAME          43917 non-null  object 
 7   PLAY_TYPE          43917 non-null  object 
 8   TYPE_GROUPING      43917 non-null  object 
 9   PERCENTILE         43917 non-null  float64
 10  GP                 43917 non-null  float64
 11  POSS_PCT           43917 non-null  float64
 12  PPP                43917 non-null  float64
 13  FG_PCT             43917 non-null  float64
 14  FT_POSS_PCT        43917 non-null  float64
 15  TOV_POSS_PCT       43917 non-null  float64
 16  SF_POSS_PCT        439

In [10]:
playtypes_df_combined.head()

Unnamed: 0,SeasonYear,SEASON_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,PLAY_TYPE,TYPE_GROUPING,PERCENTILE,...,SF_POSS_PCT,PLUSONE_POSS_PCT,SCORE_POSS_PCT,EFG_PCT,POSS,PTS,FGM,FGA,FGMX,Player_Abbrev
0,2015-16,22015,202704.0,Reggie Jackson,1610613000.0,DET,Detroit Pistons,PRBallHandler,Offensive,0.772,...,0.073,0.02,0.413,0.476,11.3,9.9,4.0,9.1,5.0,R. Jackson
1,2015-16,22015,203081.0,Damian Lillard,1610613000.0,POR,Portland Trail Blazers,PRBallHandler,Offensive,0.848,...,0.096,0.023,0.409,0.478,10.9,10.0,3.5,8.4,4.9,D. Lillard
2,2015-16,22015,101108.0,Chris Paul,1610613000.0,LAC,LA Clippers,PRBallHandler,Offensive,0.89,...,0.039,0.009,0.445,0.503,10.0,9.4,3.8,8.1,4.3,C. Paul
3,2015-16,22015,202689.0,Kemba Walker,1610613000.0,CHA,Charlotte Hornets,PRBallHandler,Offensive,0.807,...,0.084,0.009,0.418,0.462,9.7,8.7,3.2,7.8,4.5,K. Walker
4,2015-16,22015,201566.0,Russell Westbrook,1610613000.0,OKC,Oklahoma City Thunder,PRBallHandler,Offensive,0.707,...,0.107,0.022,0.406,0.477,9.0,7.7,2.7,6.3,3.5,R. Westbrook


In [11]:
playtypes_df_combined['SeasonYear'].value_counts()

2021-22    4802
2022-23    4661
2018-19    4626
2020-21    4462
2017-18    4426
2019-20    4371
2015-16    4228
2016-17    4213
2023-24    4112
2024-25    4016
Name: SeasonYear, dtype: int64

In [12]:
playtypes_df_combined['PLAY_TYPE'].value_counts()

Spotup           8726
PRBallHandler    6832
Handoff          5762
OffScreen        5419
Postup           5215
PRRollMan        4993
Misc             3626
Cut              3344
Name: PLAY_TYPE, dtype: int64

In [13]:
playtypes_df_combined['TYPE_GROUPING'].value_counts()

Offensive    23080
Defensive    20837
Name: TYPE_GROUPING, dtype: int64

In [15]:
playtypes_df_combined.to_csv('../outputs/playtype_data_2015_2024.csv', index=False)

### Scraping Lineup Data