In [1]:
import pandas as pd

from datetime import datetime, timedelta
from nba_api.stats.static.teams import get_teams
from nba_api.stats.endpoints import playbyplayv2, leaguegamefinder

In [2]:
teams = get_teams()
teams_df = pd.DataFrame(teams)

In [17]:
teams_df

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966
5,1610612742,Dallas Mavericks,DAL,Mavericks,Dallas,Texas,1980
6,1610612743,Denver Nuggets,DEN,Nuggets,Denver,Colorado,1976
7,1610612744,Golden State Warriors,GSW,Warriors,Golden State,California,1946
8,1610612745,Houston Rockets,HOU,Rockets,Houston,Texas,1967
9,1610612746,Los Angeles Clippers,LAC,Clippers,Los Angeles,California,1970


In [18]:
def check_date_format(date: str):
    try:
        datetime.strptime(date, '%m/%d/%Y')
    except ValueError:
        raise ValueError("Incorrect date format, should be MM/DD/YYYY")

def scrape_games_between(start_date: str, end_date: str):
    check_date_format(start_date)
    check_date_format(end_date)
    gamefinder = leaguegamefinder.LeagueGameFinder(
        date_from_nullable=start_date_str,
        date_to_nullable=end_date_str,
        league_id_nullable='00'  # NBA games only
    )
    games_dict = gamefinder.get_normalized_dict()
    games_df = pd.DataFrame(games_dict['LeagueGameFinderResults'])
    return games_df


end_date = datetime.now()
start_date = end_date - timedelta(days=365)

# Convert dates to string format required by the API
start_date_str = start_date.strftime('%m/%d/%Y')
end_date_str = end_date.strftime('%m/%d/%Y')

games_df = scrape_games_between(start_date_str, end_date_str)
games_df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22024,1610612747,LAL,Los Angeles Lakers,0022400454,2024-12-31,LAL vs. CLE,L,238,110,...,0.895,8,35,43,26,7,3,8,12,-12.0
1,22024,1610612746,LAC,LA Clippers,0022400451,2024-12-31,LAC @ SAS,L,238,86,...,0.833,10,31,41,17,7,3,13,15,-36.0
2,22024,1610612760,OKC,Oklahoma City Thunder,0022400452,2024-12-31,OKC vs. MIN,W,241,113,...,0.739,9,28,37,25,16,2,9,13,8.0
3,22024,1610612754,IND,Indiana Pacers,0022400450,2024-12-31,IND vs. MIL,L,240,112,...,0.633,9,41,50,26,8,7,16,20,-8.0
4,22024,1610612761,TOR,Toronto Raptors,0022400449,2024-12-31,TOR @ BOS,L,240,71,...,0.636,14,30,44,18,8,4,21,16,-54.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2781,22023,1610612738,BOS,Boston Celtics,0022300462,2024-01-02,BOS @ OKC,L,241,123,...,0.828,17,30,47,33,7,10,14,15,-4.0
2782,22023,1610612759,SAS,San Antonio Spurs,0022300460,2024-01-02,SAS @ MEM,L,238,98,...,0.696,6,34,40,21,6,8,14,24,-8.0
2783,22023,1610612744,GSW,Golden State Warriors,0022300463,2024-01-02,GSW vs. ORL,W,239,121,...,0.649,9,27,36,23,9,6,12,19,6.0
2784,22023,1610612766,CHA,Charlotte Hornets,0022300464,2024-01-02,CHA @ SAC,W,239,111,...,0.789,4,31,35,23,8,10,14,15,7.0


In [19]:
def scrape_gameflow(game_id: str) -> pd.DataFrame:
    play_by_play = playbyplayv2.PlayByPlayV2(game_id=game_id)
    df_plays = pd.DataFrame(play_by_play.get_normalized_dict()['PlayByPlay'])
    scored_mask = ~df_plays['SCORE'].isna()
    df_plays = df_plays[scored_mask]
    scores = df_plays['SCORE'].str.split(' - ', expand=True)
    scores.columns = ['HOME_SCORE', 'AWAY_SCORE']
    scores = scores.astype(int)
    scores.insert(0, 'GAME_ID', game_id)
    # scores['DIFF'] = scores['HOME_SCORE'] - scores['AWAY_SCORE']
    period_length = 12 * 60
    total_periods = df_plays['PERIOD'].max()
    time_remaining_period = df_plays['PCTIMESTRING'].str.split(':', expand=True).astype(int)
    time_remaining_period = time_remaining_period[0] * 60 + time_remaining_period[1]
    time_remaining = (total_periods - df_plays['PERIOD']) * period_length + time_remaining_period
    scores['TIME_REMAINING'] = time_remaining
    return scores

game_id = '0022300442'
gameflow_df = scrape_gameflow(game_id)
gameflow_df

Unnamed: 0,GAME_ID,HOME_SCORE,AWAY_SCORE,TIME_REMAINING
4,0022300442,0,2,2839
17,0022300442,0,4,2758
18,0022300442,2,4,2747
19,0022300442,2,6,2735
22,0022300442,2,9,2713
...,...,...,...,...
456,0022300442,88,104,68
458,0022300442,88,105,68
459,0022300442,90,105,53
462,0022300442,92,105,15
