In [57]:
# Import Libraries
import requests
import pandas as pd
import numpy as np
from pathlib import Path

# Define Global Variables
CWD = Path('.')
IN = CWD / "in"
OUT = CWD / "out"
IN.mkdir(exist_ok=True)
OUT.mkdir(exist_ok=True)

# Read Dataset
#dataframe = pd.read_csv(OUT / "2024data.csv")
dataframe = pd.read_csv(OUT / "full_data.csv")
dataframe.head()

# Dataset Variables
dataframe["DATE"] = pd.to_datetime(dataframe["DATE"], format="%m/%d/%y")
temp = dataframe.copy()
temp["DATE"] = temp["DATE"].dt.date
dates = temp["DATE"].unique()
dates = [date.strftime('%Y%m%d') for date in dates]
dates

['20240414',
 '20240412',
 '20240411',
 '20240410',
 '20240409',
 '20240407',
 '20240406',
 '20240405',
 '20240404',
 '20240403',
 '20240402',
 '20240401',
 '20240331',
 '20240330',
 '20240329',
 '20240328',
 '20240327',
 '20240326',
 '20240325',
 '20240324',
 '20240323',
 '20240322',
 '20240321',
 '20240320',
 '20240319',
 '20240318',
 '20240317',
 '20240316',
 '20240315',
 '20240314',
 '20240313',
 '20240312',
 '20240311',
 '20240310',
 '20240309',
 '20240308',
 '20240307',
 '20240306',
 '20240305',
 '20240304',
 '20240303',
 '20240302',
 '20240301',
 '20240229',
 '20240228',
 '20240227',
 '20240226',
 '20240225',
 '20240224',
 '20240223',
 '20240222',
 '20240215',
 '20240214',
 '20240213',
 '20240212',
 '20240211',
 '20240210',
 '20240209',
 '20240208',
 '20240207',
 '20240206',
 '20240205',
 '20240204',
 '20240203',
 '20240202',
 '20240201',
 '20240131',
 '20240130',
 '20240129',
 '20240128',
 '20240127',
 '20240126',
 '20240125',
 '20240124',
 '20240123',
 '20240122',
 '20240121',

In [51]:
merged_df = dataframe.copy()
merged_df["datestring"] = merged_df["DATE"].dt.strftime('%Y%m%d')
# Extracting data
players_data = []
for date in dates:
    url = f'https://stats.nba.com/js/data/leaders/00_daily_lineups_{date}.json'
    response = requests.get(url)
    data = response.json()

    # Loop through each game in the data
    for game in data['games']:
        game_id = game['gameId']
        for team_type in ['homeTeam', 'awayTeam']:
            team = game[team_type]
            team_id = team['teamId']
            team_abbreviation = team['teamAbbreviation']

            # Loop through each player in the team
            for player in team['players']:
                player_info = {
                    'Game ID': game_id,
                    'Team ID': team_id,
                    'Team Abbreviation': format_team(team_abbreviation),
                    'Player ID': player['personId'],
                    'Player Name': format_name(player['playerName']),
                    'Position': player['position'],
                    'Status': player['rosterStatus'],
                    'Timestamp': player['timestamp'],
                    'datestring': date
                }
                players_data.append(player_info)

In [62]:
positions = pd.DataFrame(players_data)
positions.rename(columns={'Player Name': 'NAME', 'Team Abbreviation': 'TEAM', 'Position': "POSITION"}, inplace=True)
positions = positions[["datestring", "TEAM", "NAME", "POSITION"]]
positions.replace('', np.nan, inplace=True)
positions
merged_df

Unnamed: 0,DATE,TEAM,OPPONENT,NAME,POSITION,MINUTES,POINTS,REBOUNDS,ASSISTS,PTS+REB+AST,...,FTM,FTA,OREB,DREB,FANTASY,STARTER?,HOME?,WIN?,game_id,datestring
0,2024-04-14,BOS,WAS,Payton Pritchard,PG,43,38,9,12,59,...,4,4,1,8,64.8,False,True,True,bos20240414,20240414
1,2024-04-14,BOS,WAS,Svi Mykhailiuk,SF,37,26,5,5,36,...,0,0,1,4,41.5,False,True,True,bos20240414,20240414
2,2024-04-14,BOS,WAS,Sam Hauser,SF,35,16,5,2,23,...,0,0,1,4,34.0,False,True,True,bos20240414,20240414
3,2024-04-14,BOS,WAS,Luke Kornet,C,27,11,8,5,24,...,3,3,3,5,37.1,False,True,True,bos20240414,20240414
4,2024-04-14,BOS,WAS,Jaden Springer,SG,20,7,2,0,9,...,2,2,0,2,20.4,False,True,True,bos20240414,20240414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26600,2023-10-24,PHO,GSW,Grayson Allen,SG,20,0,4,2,6,...,0,0,3,1,6.8,False,False,True,gsw20231024,20231024
26601,2023-10-24,PHO,GSW,Drew Eubanks,C,19,4,7,2,13,...,2,4,2,5,21.4,False,False,True,gsw20231024,20231024
26602,2023-10-24,PHO,GSW,Yuta Watanabe,SF,17,8,4,0,12,...,0,0,0,4,14.8,False,False,True,gsw20231024,20231024
26603,2023-10-24,PHO,GSW,Jordan Goodwin,PG,14,2,4,3,9,...,0,0,1,3,12.3,False,False,True,gsw20231024,20231024


In [69]:
updated_df = pd.merge(merged_df, positions[['NAME', 'TEAM', 'datestring', 'POSITION']], 
                      on=['NAME', 'TEAM', 'datestring'], 
                      how='left', 
                      suffixes=('', '_new'))
# Update 'POSITION' only where 'POSITION_new' is not NaN
updated_df['POSITION'] = np.where(pd.notna(updated_df['POSITION_new']), updated_df['POSITION_new'], updated_df['POSITION'])

# Drop the temporary 'POSITION_new' column
updated_df.drop('POSITION_new', axis=1, inplace=True)
updated_df.drop('datestring', axis=1, inplace=True)
updated_df["DATE"] = updated_df["DATE"].dt.strftime('%m/%d/%y')
updated_df

Unnamed: 0,DATE,TEAM,OPPONENT,NAME,POSITION,MINUTES,POINTS,REBOUNDS,ASSISTS,PTS+REB+AST,...,3PA,FTM,FTA,OREB,DREB,FANTASY,STARTER?,HOME?,WIN?,game_id
0,04/14/24,BOS,WAS,Payton Pritchard,PG,43,38,9,12,59,...,6,4,4,1,8,64.8,False,True,True,bos20240414
1,04/14/24,BOS,WAS,Svi Mykhailiuk,SG,37,26,5,5,36,...,8,0,0,1,4,41.5,False,True,True,bos20240414
2,04/14/24,BOS,WAS,Sam Hauser,PF,35,16,5,2,23,...,11,0,0,1,4,34.0,False,True,True,bos20240414
3,04/14/24,BOS,WAS,Luke Kornet,C,27,11,8,5,24,...,0,3,3,3,5,37.1,False,True,True,bos20240414
4,04/14/24,BOS,WAS,Jaden Springer,SG,20,7,2,0,9,...,2,2,2,0,2,20.4,False,True,True,bos20240414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26600,10/24/23,PHO,GSW,Grayson Allen,SG,20,0,4,2,6,...,5,0,0,3,1,6.8,False,False,True,gsw20231024
26601,10/24/23,PHO,GSW,Drew Eubanks,C,19,4,7,2,13,...,0,2,4,2,5,21.4,False,False,True,gsw20231024
26602,10/24/23,PHO,GSW,Yuta Watanabe,SF,17,8,4,0,12,...,4,0,0,0,4,14.8,False,False,True,gsw20231024
26603,10/24/23,PHO,GSW,Jordan Goodwin,PG,14,2,4,3,9,...,1,0,0,1,3,12.3,False,False,True,gsw20231024


In [68]:
def format_date(date):
    return date.strftime('%Y%m%d')

def format_name(name):
    split_names = name.split()
    return f"{split_names[0]} {split_names[1]}"

def format_team(team):
    if team == "CHA":
        return "CHO"
    elif team == "BKN":
        return "BRK"
    elif team == "PHX":
        return "PHO"
    else:
        return team

In [70]:
updated_df.to_csv(OUT / "2024data_new.csv", index=False , mode="w")