## NBA Web Scraping - Synergy Sports API

### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import requests
import time

In [None]:
!pip install pandas numpy matplotlib seaborn requests time

### Scraping Play Type Data

Creating a function to loop through all the different versions of the page to scrape.

In [3]:
def fetch_nba_data(season_years, play_types, type_groupings):
    url = 'https://stats.nba.com/stats/synergyplaytypes'

    headers = {
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.9',
        'Connection': 'keep-alive',
        'Origin': 'https://www.nba.com',
        'Referer': 'https://www.nba.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }

    base_params = {
        'LeagueID': '00',
        'PerMode': 'PerGame',
        'PlayerOrTeam': 'P',
        'SeasonType': 'Regular Season',
    }

    dataframes = {}

    for season in season_years:
        for play_type in play_types:
            for type_grouping in type_groupings:
                params = base_params.copy()
                params['SeasonYear'] = season
                params['PlayType'] = play_type
                params['TypeGrouping'] = type_grouping

                print(f"Fetching data for SeasonYear: {season}, PlayType: {play_type}, TypeGrouping: {type_grouping}...")

                try:
                    response = requests.get(url, params=params, headers=headers)
                    response.raise_for_status()  # Raise an error for HTTP failures

                    data = response.json()

                    # Extract relevant data (modify based on response structure)
                    if "resultSets" in data and data["resultSets"]:
                        results = data["resultSets"][0]  # Assuming first result set
                        df = pd.DataFrame(results["rowSet"], columns=results["headers"])
                        dataframes[(season, play_type, type_grouping)] = df
                    else:
                        print(f"No data found for {season}, {play_type}, {type_grouping}")
                        dataframes[(season, play_type, type_grouping)] = pd.DataFrame()

                except requests.exceptions.RequestException as e:
                    print(f"Request failed for {season}, {play_type}, {type_grouping} - {e}")
                    dataframes[(season, play_type, type_grouping)] = pd.DataFrame()

                time.sleep(1)  # Small delay to avoid getting blocked

    return dataframes

In [4]:
# Define parameter values
season_years = [f"{year}-{str(year+1)[-2:]}" for year in range(2015, 2025)]  # Generates '2015-16' to '2024-25'
play_types = ['isolation', 'transition', 'PRBallHandler', 'PRRollman', 'Postup', 'Spotup', 
              'Handoff', 'Cut', 'OffScreen', 'Putbacks', 'Misc']
type_groupings = ['offensive', 'defensive']

In [5]:
# Fetch data
playtypes_df = fetch_nba_data(season_years, play_types, type_groupings)

Fetching data for SeasonYear: 2015-16, PlayType: isolation, TypeGrouping: offensive...
Request failed for 2015-16, isolation, offensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2015-16&PlayType=isolation&TypeGrouping=offensive
Fetching data for SeasonYear: 2015-16, PlayType: isolation, TypeGrouping: defensive...
Request failed for 2015-16, isolation, defensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2015-16&PlayType=isolation&TypeGrouping=defensive
Fetching data for SeasonYear: 2015-16, PlayType: transition, TypeGrouping: offensive...
Request failed for 2015-16, transition, offensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&Sea

Fetching data for SeasonYear: 2017-18, PlayType: PRBallHandler, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: PRBallHandler, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: PRRollman, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: PRRollman, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: Postup, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: Postup, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: Spotup, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: Spotup, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: Handoff, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: Handoff, TypeGrouping: defensive...
Fetching data for SeasonYear: 2017-18, PlayType: Cut, TypeGrouping: offensive...
Fetching data for SeasonYear: 2017-18, PlayType: Cut, Typ

Fetching data for SeasonYear: 2019-20, PlayType: Putbacks, TypeGrouping: defensive...
Request failed for 2019-20, Putbacks, defensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2019-20&PlayType=Putbacks&TypeGrouping=defensive
Fetching data for SeasonYear: 2019-20, PlayType: Misc, TypeGrouping: offensive...
Fetching data for SeasonYear: 2019-20, PlayType: Misc, TypeGrouping: defensive...
Fetching data for SeasonYear: 2020-21, PlayType: isolation, TypeGrouping: offensive...
Request failed for 2020-21, isolation, offensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2020-21&PlayType=isolation&TypeGrouping=offensive
Fetching data for SeasonYear: 2020-21, PlayType: isolation, TypeGrouping: defensive...
Request failed for 2020-21, isolation, defensive

Fetching data for SeasonYear: 2022-23, PlayType: transition, TypeGrouping: defensive...
Request failed for 2022-23, transition, defensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2022-23&PlayType=transition&TypeGrouping=defensive
Fetching data for SeasonYear: 2022-23, PlayType: PRBallHandler, TypeGrouping: offensive...
Fetching data for SeasonYear: 2022-23, PlayType: PRBallHandler, TypeGrouping: defensive...
Fetching data for SeasonYear: 2022-23, PlayType: PRRollman, TypeGrouping: offensive...
Fetching data for SeasonYear: 2022-23, PlayType: PRRollman, TypeGrouping: defensive...
Fetching data for SeasonYear: 2022-23, PlayType: Postup, TypeGrouping: offensive...
Fetching data for SeasonYear: 2022-23, PlayType: Postup, TypeGrouping: defensive...
Fetching data for SeasonYear: 2022-23, PlayType: Spotup, TypeGrouping: offensive...
Fetching data for SeasonYear: 2022-23

Fetching data for SeasonYear: 2024-25, PlayType: Cut, TypeGrouping: defensive...
Fetching data for SeasonYear: 2024-25, PlayType: OffScreen, TypeGrouping: offensive...
Fetching data for SeasonYear: 2024-25, PlayType: OffScreen, TypeGrouping: defensive...
Fetching data for SeasonYear: 2024-25, PlayType: Putbacks, TypeGrouping: offensive...
Request failed for 2024-25, Putbacks, offensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2024-25&PlayType=Putbacks&TypeGrouping=offensive
Fetching data for SeasonYear: 2024-25, PlayType: Putbacks, TypeGrouping: defensive...
Request failed for 2024-25, Putbacks, defensive - 400 Client Error: Bad Request for url: https://stats.nba.com/stats/synergyplaytypes?LeagueID=00&PerMode=PerGame&PlayerOrTeam=P&SeasonType=Regular+Season&SeasonYear=2024-25&PlayType=Putbacks&TypeGrouping=defensive
Fetching data for SeasonYear: 2024-25, PlayType

In [6]:
# Create a list to store modified DataFrames with a SeasonYear column
df_list = []

for (season, play_type, type_grouping), df in playtypes_df.items():
    df = df.copy()  # Avoid modifying the original DataFrame
    df['SeasonYear'] = season  # Add SeasonYear column
    df_list.append(df)

# Concatenate all DataFrames into one
playtypes_df_combined = pd.concat(df_list, ignore_index=True)

Creating a new column that abbreviates each player's name so that it matches the lineup dataset later on. This will allow us to join the data.

In [3]:
# Function to abbreviate player names
def abbreviate_name(full_name):
    parts = full_name.split(' ', 1)  # Split only on the first space
    if len(parts) == 2:
        first_initial = parts[0][0]
        rest_of_name = parts[1]
        return f"{first_initial}. {rest_of_name}"
    return full_name

In [4]:
# Apply the function to create the new column
playtypes_df_combined['Player_Abbrev'] = playtypes_df_combined['PLAYER_NAME'].apply(abbreviate_name)

In [5]:
playtypes_df_combined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43917 entries, 0 to 43916
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   SeasonYear         43917 non-null  object 
 1   SEASON_ID          43917 non-null  int64  
 2   PLAYER_ID          43917 non-null  float64
 3   PLAYER_NAME        43917 non-null  object 
 4   TEAM_ID            43917 non-null  float64
 5   TEAM_ABBREVIATION  43917 non-null  object 
 6   TEAM_NAME          43917 non-null  object 
 7   PLAY_TYPE          43917 non-null  object 
 8   TYPE_GROUPING      43917 non-null  object 
 9   PERCENTILE         43917 non-null  float64
 10  GP                 43917 non-null  float64
 11  POSS_PCT           43917 non-null  float64
 12  PPP                43917 non-null  float64
 13  FG_PCT             43917 non-null  float64
 14  FT_POSS_PCT        43917 non-null  float64
 15  TOV_POSS_PCT       43917 non-null  float64
 16  SF_POSS_PCT        439

In [6]:
playtypes_df_combined.head()

Unnamed: 0,SeasonYear,SEASON_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,PLAY_TYPE,TYPE_GROUPING,PERCENTILE,...,SF_POSS_PCT,PLUSONE_POSS_PCT,SCORE_POSS_PCT,EFG_PCT,POSS,PTS,FGM,FGA,FGMX,Player_Abbrev
0,2015-16,22015,202704.0,Reggie Jackson,1610613000.0,DET,Detroit Pistons,PRBallHandler,Offensive,0.772,...,0.073,0.02,0.413,0.476,11.3,9.9,4.0,9.1,5.0,R. Jackson
1,2015-16,22015,203081.0,Damian Lillard,1610613000.0,POR,Portland Trail Blazers,PRBallHandler,Offensive,0.848,...,0.096,0.023,0.409,0.478,10.9,10.0,3.5,8.4,4.9,D. Lillard
2,2015-16,22015,101108.0,Chris Paul,1610613000.0,LAC,LA Clippers,PRBallHandler,Offensive,0.89,...,0.039,0.009,0.445,0.503,10.0,9.4,3.8,8.1,4.3,C. Paul
3,2015-16,22015,202689.0,Kemba Walker,1610613000.0,CHA,Charlotte Hornets,PRBallHandler,Offensive,0.807,...,0.084,0.009,0.418,0.462,9.7,8.7,3.2,7.8,4.5,K. Walker
4,2015-16,22015,201566.0,Russell Westbrook,1610613000.0,OKC,Oklahoma City Thunder,PRBallHandler,Offensive,0.707,...,0.107,0.022,0.406,0.477,9.0,7.7,2.7,6.3,3.5,R. Westbrook


In [11]:
playtypes_df_combined['SeasonYear'].value_counts()

2021-22    4802
2022-23    4661
2018-19    4626
2020-21    4462
2017-18    4426
2019-20    4371
2015-16    4228
2016-17    4213
2023-24    4112
2024-25    4016
Name: SeasonYear, dtype: int64

In [12]:
playtypes_df_combined['PLAY_TYPE'].value_counts()

Spotup           8726
PRBallHandler    6832
Handoff          5762
OffScreen        5419
Postup           5215
PRRollMan        4993
Misc             3626
Cut              3344
Name: PLAY_TYPE, dtype: int64

In [13]:
playtypes_df_combined['TYPE_GROUPING'].value_counts()

Offensive    23080
Defensive    20837
Name: TYPE_GROUPING, dtype: int64

In [9]:
playtypes_df_combined.to_csv('../../outputs/playtype_data_2015_2024.csv', index=False)

In [2]:
playtypes_df_combined = pd.read_csv('../../outputs/playtype_data_2015_2024.csv')

### Scraping Lineup Data

In [2]:
def fetch_nba_lineups(season_years):
    url = 'https://stats.nba.com/stats/leaguedashlineups'

    headers = {
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.9',
        'Connection': 'keep-alive',
        'Origin': 'https://www.nba.com',
        'Referer': 'https://www.nba.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }

    params = {
        'Conference': '',
        'DateFrom': '',
        'DateTo': '',
        'Division': '',
        'GameSegment': '',
        'GroupQuantity': '5',
        'ISTRound': '',
        'LastNGames': '0',
        'LeagueID': '00',
        'Location': '',
        'MeasureType': 'Advanced',
        'Month': '0',
        'OpponentTeamID': '0',
        'Outcome': '',
        'PORound': '0',
        'PaceAdjust': 'N',
        'PerMode': 'PerGame',
        'Period': '0',
        'PlusMinus': 'N',
        'Rank': 'N',
        'SeasonSegment': '',
        'SeasonType': 'Regular Season',
        'ShotClockRange': '',
        'TeamID': '0',
        'VsConference': '',
        'VsDivision': '',
    }

    dataframes = {}

    for season in season_years:
        params['Season'] = season
        print(f"Fetching data for SeasonYear: {season}...")

        try:
            response = requests.get(url, params=params, headers=headers)
            response.raise_for_status()

            data = response.json()

            if "resultSets" in data and data["resultSets"]:
                results = data["resultSets"][0]
                df = pd.DataFrame(results["rowSet"], columns=results["headers"])
                dataframes[season] = df
            else:
                print(f"No data found for {season}")
                dataframes[season] = pd.DataFrame()

        except requests.exceptions.RequestException as e:
            print(f"Request failed for {season} - {e}")
            dataframes[season] = pd.DataFrame()

        time.sleep(1)

    return dataframes

In [3]:
season_years = [f"{year}-{str(year+1)[-2:]}" for year in range(2015, 2025)] 

In [4]:
lineups_df = fetch_nba_lineups(season_years)

Fetching data for SeasonYear: 2015-16...
Fetching data for SeasonYear: 2016-17...
Fetching data for SeasonYear: 2017-18...
Fetching data for SeasonYear: 2018-19...
Fetching data for SeasonYear: 2019-20...
Fetching data for SeasonYear: 2020-21...
Fetching data for SeasonYear: 2021-22...
Fetching data for SeasonYear: 2022-23...
Fetching data for SeasonYear: 2023-24...
Fetching data for SeasonYear: 2024-25...


In [16]:
# Create a list to store modified DataFrames with a SeasonYear column
df_list = []

for season, df in lineups_df.items():
    df = df.copy()  # Avoid modifying the original DataFrame
    df['SeasonYear'] = season  # Add SeasonYear column
    df_list.append(df)

In [17]:
# Concatenate all DataFrames into one
lineups_df_combined = pd.concat(df_list, ignore_index=True)

In [18]:
lineups_df_combined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 50 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   GROUP_SET          20000 non-null  object 
 1   GROUP_ID           20000 non-null  object 
 2   GROUP_NAME         20000 non-null  object 
 3   TEAM_ID            20000 non-null  int64  
 4   TEAM_ABBREVIATION  20000 non-null  object 
 5   GP                 20000 non-null  int64  
 6   W                  20000 non-null  int64  
 7   L                  20000 non-null  int64  
 8   W_PCT              20000 non-null  float64
 9   MIN                20000 non-null  float64
 10  E_OFF_RATING       20000 non-null  float64
 11  OFF_RATING         20000 non-null  float64
 12  E_DEF_RATING       20000 non-null  float64
 13  DEF_RATING         20000 non-null  float64
 14  E_NET_RATING       20000 non-null  float64
 15  NET_RATING         20000 non-null  float64
 16  AST_PCT            200

In [34]:
lineups_df_combined.head()

Unnamed: 0,GROUP_SET,GROUP_ID,GROUP_NAME,TEAM_ID,TEAM_ABBREVIATION,GP,W,L,W_PCT,MIN,...,EFG_PCT_RANK,TS_PCT_RANK,PACE_RANK,PIE_RANK,SeasonYear,Player1,Player2,Player3,Player4,Player5
0,Lineups,-101141-202694-202704-203083-203484-,E. Ilyasova - M. Morris - R. Jackson - A. Drum...,1610612765,DET,48,25,23,0.521,915.0,...,6273,6257,9164,6118,2015-16,E. Ilyasova,M. Morris,R. Jackson,A. Drummond,K. Caldwell-Pope
1,Lineups,-2594-200794-201143-201952-203145-,K. Korver - P. Millsap - A. Horford - J. Teagu...,1610612737,ATL,70,41,29,0.586,892.0,...,4587,5127,7537,5395,2015-16,K. Korver,P. Millsap,A. Horford,J. Teague,K. Bazemore
2,Lineups,-201142-201566-201586-203460-203500-,K. Durant - R. Westbrook - S. Ibaka - A. Rober...,1610612760,OKC,59,43,16,0.729,816.0,...,4260,4583,8850,4113,2015-16,K. Durant,R. Westbrook,S. Ibaka,A. Roberson,S. Adams
3,Lineups,-2546-101181-201167-201577-204001-,C. Anthony - J. Calderon - A. Afflalo - R. Lop...,1610612752,NYK,50,22,28,0.44,735.0,...,4803,5671,9840,6178,2015-16,C. Anthony,J. Calderon,A. Afflalo,R. Lopez,K. Porzingis
4,Lineups,-101161-202340-202738-203096-203109-,A. Johnson - A. Bradley - I. Thomas - J. Sulli...,1610612738,BOS,60,35,25,0.583,723.0,...,4794,5660,8556,5703,2015-16,A. Johnson,A. Bradley,I. Thomas,J. Sullinger,J. Crowder


In [33]:
# Split the 'GROUP_NAME' column by the ' - ' delimiter
player_names = lineups_df_combined['GROUP_NAME'].str.split(' - ', expand=True)

# Assign the split columns to new column names: Player1, Player2, ...
lineups_df_combined[['Player1', 'Player2', 'Player3', 'Player4', 'Player5']] = player_names

In [35]:
lineups_df_combined.to_csv('../../outputs/lineup_data_2015_2024.csv', index=False)

In [2]:
lineups_df_combined = pd.read_csv('../../outputs/lineup_data_2015_2024.csv')

In [6]:
lebron_lineups = lineups_df_combined[
    lineups_df_combined['GROUP_NAME'].str.contains(r'L\. James', regex=True, na=False)
]

In [7]:
lebron_lineups.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 426 entries, 11 to 19833
Data columns (total 55 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   GROUP_SET          426 non-null    object 
 1   GROUP_ID           426 non-null    object 
 2   GROUP_NAME         426 non-null    object 
 3   TEAM_ID            426 non-null    int64  
 4   TEAM_ABBREVIATION  426 non-null    object 
 5   GP                 426 non-null    int64  
 6   W                  426 non-null    int64  
 7   L                  426 non-null    int64  
 8   W_PCT              426 non-null    float64
 9   MIN                426 non-null    float64
 10  E_OFF_RATING       426 non-null    float64
 11  OFF_RATING         426 non-null    float64
 12  E_DEF_RATING       426 non-null    float64
 13  DEF_RATING         426 non-null    float64
 14  E_NET_RATING       426 non-null    float64
 15  NET_RATING         426 non-null    float64
 16  AST_PCT            426 

In [8]:
lebron_lineups.to_csv('../../outputs/lebron_lineup_data_2015_2024.csv', index=False)

In [11]:
lebron_lineups = pd.read_csv('../../outputs/lebron_lineup_data_2015_2024.csv')

In [12]:
luka_lineups = lineups_df_combined[
    lineups_df_combined['GROUP_NAME'].str.contains(r'L\. Doncic|L\. Dončić', regex=True, na=False)
]

In [13]:
luka_lineups.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 282 entries, 6015 to 19765
Data columns (total 55 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   GROUP_SET          282 non-null    object 
 1   GROUP_ID           282 non-null    object 
 2   GROUP_NAME         282 non-null    object 
 3   TEAM_ID            282 non-null    int64  
 4   TEAM_ABBREVIATION  282 non-null    object 
 5   GP                 282 non-null    int64  
 6   W                  282 non-null    int64  
 7   L                  282 non-null    int64  
 8   W_PCT              282 non-null    float64
 9   MIN                282 non-null    float64
 10  E_OFF_RATING       282 non-null    float64
 11  OFF_RATING         282 non-null    float64
 12  E_DEF_RATING       282 non-null    float64
 13  DEF_RATING         282 non-null    float64
 14  E_NET_RATING       282 non-null    float64
 15  NET_RATING         282 non-null    float64
 16  AST_PCT            28

In [14]:
luka_lineups.to_csv('../../outputs/luka_lineup_data_2015_2024.csv', index=False)

In [15]:
luka_lineups = pd.read_csv('../../outputs/luka_lineup_data_2015_2024.csv')