In [45]:
import os

from sqlalchemy import create_engine

from nba_betting_ai.data.processing import prepare_game_data, merge_game_data
from nba_betting_ai.data.storage import load_games, load_teams, load_gameflow, delete_games


In [20]:
postgres_user = os.environ.get('POSTGRES_USER')
postgres_password = os.environ.get('POSTGRES_PASSWORD')
postgres_host = os.environ.get('POSTGRES_HOST')
postgres_port = os.environ.get('POSTGRES_PORT')
postgres_db = os.environ.get('POSTGRES_DB')


In [21]:
postgres_conn = f'postgresql://{postgres_user}:{postgres_password}@{postgres_host}:{postgres_port}/{postgres_db}'
engine = create_engine(postgres_conn)

In [22]:
df_games_raw = load_games(engine)

In [23]:
df_teams = load_teams(engine)
df_games = load_games(engine)
df_gameflow = load_gameflow(engine, game_id = list(df_games['game_id'].unique()))

In [24]:
suspicious_team_id = [50009, 15019, 12325, 15020, 15022, 12315, 15025]
suspicious_teams = df_games[df_games['team_id'].isin(suspicious_team_id)]
suspicious_teams.team_name.unique()

array(["Ra'anana Maccabi Ra'anana", 'Adelaide 36ers', 'Flamengo Flamengo',
       'New Zealand Breakers', 'Cairns Taipans', 'Madrid Baloncesto',
       'Ratiopharm Ulm'], dtype=object)

In [25]:
results = df_gameflow.groupby('game_id').tail(1)

In [26]:
results[results.home_score == results.away_score]

Unnamed: 0,game_id,home_score,away_score,time_remaining


In [27]:
df_games.wl.unique()

array(['W', 'L'], dtype=object)

In [28]:
df_gameflow[df_gameflow['game_id'] == '0022200840']

Unnamed: 0,game_id,home_score,away_score,time_remaining
223222,0022200840,3,0,2823
223223,0022200840,3,3,2797
223224,0022200840,6,3,2780
223225,0022200840,6,6,2766
223226,0022200840,9,6,2756
...,...,...,...,...
223326,0022200840,93,93,7
223327,0022200840,94,93,7
223328,0022200840,95,93,7
223329,0022200840,95,95,0


In [29]:
# df_games[df_games['game_id'] == '0012400001']
df_games[df_games['game_id'] == '0022200888']

Unnamed: 0,season_id,team_id,team_abbreviation,team_name,game_id,game_date,matchup,wl
860,2022-23,1610612753,ORL,Orlando Magic,22200888,2023-02-23,ORL vs. DET,W
867,2022-23,1610612765,DET,Detroit Pistons,22200888,2023-02-23,DET @ ORL,L


In [30]:
mask_away = df_games['matchup'].str.contains(' vs. ')
weird_games = df_games['game_id'][~mask_away][~df_games['game_id'][~mask_away].isin(df_games['game_id'][mask_away])].values
df_gameflow[df_gameflow['game_id'].isin(weird_games)].groupby('game_id').tail(1).sort_values('game_id')

Unnamed: 0,game_id,home_score,away_score,time_remaining
388981,12400001,103,107,0
407527,12400005,130,104,0
352405,12400007,125,98,0
397802,22400147,98,118,0
353119,22401229,110,102,0
349069,22401230,111,96,0
408798,62400001,81,97,0


In [31]:
df_games[df_games['game_id'].isin(weird_games)].groupby('game_id').tail(1)[[ 'game_id', 'team_name','matchup', 'wl']].sort_values('game_id')

Unnamed: 0,game_id,team_name,matchup,wl
6605,12400001,Denver Nuggets,BOS @ DEN,L
6594,12400005,Denver Nuggets,DEN @ BOS,L
6589,12400007,Toronto Raptors,WAS @ TOR,W
6312,22400147,Miami Heat,MIA @ WAS,W
5754,22401229,Atlanta Hawks,ATL @ MIL,L
5755,22401230,Houston Rockets,HOU @ OKC,L
5727,62400001,Milwaukee Bucks,MIL @ OKC,W


In [54]:
df_games = load_games(engine)
nonteams = df_games[~df_games['team_id'].isin(df_teams['team_id'].unique())]['game_id'].unique()
nonteams

array([], dtype=object)

In [51]:
nonteams = df_games[~df_games['team_id'].isin(df_teams['team_id'].unique())]['game_id'].unique()
nonteams

array(['0032100003'], dtype=object)

In [52]:
df_games[df_games['game_id'].isin(nonteams)]

Unnamed: 0,season_id,team_id,team_abbreviation,team_name,game_id,game_date,matchup,wl
7664,2021-22,1610616839,NIQ,Team Nique,32100003,2022-02-18,NIQ vs. WAL,L
7665,2021-22,1610616840,WAL,Team Walton,32100003,2022-02-18,WAL @ NIQ,W


In [43]:
df_games[df_games['game_id'].isin(nonteams)]

Unnamed: 0,season_id,team_id,team_abbreviation,team_name,game_id,game_date,matchup,wl,points_for,points_against,...,season_pts_against,season_games,last_5_wins,last_5_pts_for_avg,last_5_pts_for_total,last_5_pts_against_avg,last_5_pts_against_total,season_wins_avg,season_pts_for_avg,season_pts_against_avg
1898,2021-22,1610616849,IAH,Team Isiah,0032100006,2022-02-18,IAH vs. BAR,L,20,25,...,0.0,0,0.0,0.00,0.0,0.00,0.0,0.000000,0.000000,0.00000
1899,2021-22,1610616848,PAY,Team Payton,0032100005,2022-02-18,PAY @ BAR,L,48,50,...,0.0,0,0.0,0.00,0.0,0.00,0.0,0.000000,0.000000,0.00000
1900,2021-22,1610616849,IAH,Team Isiah,0032100004,2022-02-18,IAH vs. WOR,W,50,49,...,25.0,1,0.0,20.00,20.0,25.00,25.0,0.000000,20.000000,25.00000
1901,2021-22,1610616850,WOR,Team Worthy,0032100004,2022-02-18,WOR @ IAH,L,49,50,...,0.0,0,0.0,0.00,0.0,0.00,0.0,0.000000,0.000000,0.00000
1902,2021-22,1610616847,BAR,Team Barry,0032100005,2022-02-18,BAR vs. PAY,W,50,48,...,0.0,0,0.0,0.00,0.0,0.00,0.0,0.000000,0.000000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8397,2024-25,1610612755,PHI,Philadelphia 76ers,0012400011,2024-10-07,PHI vs. NZB,W,139,84,...,0.0,0,2.0,109.00,545.0,107.80,539.0,0.565217,114.021739,111.23913
8421,2024-25,1610612760,OKC,Oklahoma City Thunder,0012400029,2024-10-10,OKC vs. NZB,W,117,89,...,229.0,2,2.0,106.60,533.0,109.20,546.0,0.500000,112.500000,114.50000
8425,2024-25,15020,NZB,New Zealand Breakers,0012400029,2024-10-10,NZB @ OKC,L,89,117,...,255.0,2,0.0,82.75,331.0,118.75,475.0,0.000000,85.500000,127.50000
8481,2024-25,15025,ULM,Ratiopharm Ulm,0012400060,2024-10-16,ULM @ POR,L,100,111,...,0.0,0,0.0,0.00,0.0,0.00,0.0,0.000000,0.000000,0.00000


In [53]:
# delete_games(engine, list(nonteams))

In [32]:
df_games[df_games['game_id'].isin(df_games[df_games.wl.isna()]['game_id'])].game_id

Series([], Name: game_id, dtype: object)

In [33]:
len(df_games['team_id'].unique())

45

In [12]:
df_games

Unnamed: 0,season_id,team_id,team_abbreviation,team_name,game_id,game_date,matchup,wl
0,2022-23,1610612743,DEN,Denver Nuggets,0042200405,2023-06-12,DEN vs. MIA,W
1,2022-23,1610612748,MIA,Miami Heat,0042200405,2023-06-12,MIA @ DEN,L
2,2022-23,1610612748,MIA,Miami Heat,0042200404,2023-06-09,MIA vs. DEN,L
3,2022-23,1610612743,DEN,Denver Nuggets,0042200404,2023-06-09,DEN @ MIA,W
4,2022-23,1610612743,DEN,Denver Nuggets,0042200403,2023-06-07,DEN @ MIA,W
...,...,...,...,...,...,...,...,...
6799,2024-25,1610612737,ATL,Atlanta Hawks,0022400506,2025-01-07,ATL @ UTA,W
6800,2024-25,1610612745,HOU,Houston Rockets,0022400503,2025-01-07,HOU @ WAS,W
6801,2024-25,1610612764,WAS,Washington Wizards,0022400503,2025-01-07,WAS vs. HOU,L
6802,2024-25,1610612747,LAL,Los Angeles Lakers,0022400504,2025-01-07,LAL @ DAL,L


In [37]:
weird_games_2 = ['0032300004', '0032300005']
df_games[df_games['game_id'].isin(weird_games_2)]

Unnamed: 0,season_id,team_id,team_abbreviation,team_name,game_id,game_date,matchup,wl,points_for,points_against,...,season_pts_against,season_games,last_5_wins,last_5_pts_for_avg,last_5_pts_for_total,last_5_pts_against_avg,last_5_pts_against_total,season_wins_avg,season_pts_for_avg,season_pts_against_avg
7366,2023-24,1610616847,PAU,Team Pau,32300005,2024-02-16,PAU vs. DLF,L,36,41,...,0.0,0,4.0,35.0,140.0,28.25,113.0,1.0,40.0,25.0
7368,2023-24,1610616848,DLF,Team Detlef,32300005,2024-02-16,DLF @ PAU,W,41,36,...,26.0,1,0.0,28.666667,86.0,38.666667,116.0,0.0,13.0,26.0
7370,2023-24,1610616849,JAL,Team Jalen,32300004,2024-02-16,JAL @ TAM,W,40,35,...,13.0,1,3.0,31.2,156.0,28.8,144.0,1.0,26.0,13.0
7371,2023-24,1610616850,TAM,Team Tamika,32300004,2024-02-16,TAM vs. JAL,L,35,40,...,0.0,0,0.0,40.5,81.0,45.0,90.0,0.0,0.0,0.0


In [38]:
df_games[df_games_raw['game_id'].isin(weird_games)].sort_values(by='game_id')


  df_games[df_games_raw['game_id'].isin(weird_games)].sort_values(by='game_id')


Unnamed: 0,season_id,team_id,team_abbreviation,team_name,game_id,game_date,matchup,wl,points_for,points_against,...,season_pts_against,season_games,last_5_wins,last_5_pts_for_avg,last_5_pts_for_total,last_5_pts_against_avg,last_5_pts_against_total,season_wins_avg,season_pts_for_avg,season_pts_against_avg
5727,2023-24,1610612739,CLE,Cleveland Cavaliers,22300067,2023-10-25,CLE @ BKN,W,114,113,...,414.0,4,1.0,106.2,531.0,104.0,520.0,0.25,109.0,103.5
5726,2023-24,1610612758,SAC,Sacramento Kings,22300072,2023-10-25,SAC @ UTA,W,130,114,...,571.0,5,1.0,109.2,546.0,114.2,571.0,0.2,109.2,114.2
5753,2023-24,1610612749,MIL,Milwaukee Bucks,22300075,2023-10-26,MIL vs. PHI,W,118,117,...,547.0,5,3.0,108.0,540.0,109.4,547.0,0.6,108.0,109.4
5752,2023-24,1610612747,LAL,Los Angeles Lakers,22300076,2023-10-26,LAL vs. PHX,W,100,95,...,831.0,7,1.0,107.6,538.0,116.0,580.0,0.285714,110.714286,118.714286
5754,2023-24,1610612745,HOU,Houston Rockets,22300083,2023-10-27,HOU @ SAS,L,122,126,...,616.0,6,3.0,103.6,518.0,102.6,513.0,0.666667,106.666667,102.666667
5755,2023-24,1610612759,SAS,San Antonio Spurs,22300083,2023-10-27,SAS vs. HOU,W,126,122,...,671.0,6,3.0,113.4,567.0,109.8,549.0,0.5,114.666667,111.833333
6589,2023-24,1610612752,NYK,New York Knicks,22300401,2023-12-25,NYK vs. MIL,W,129,122,...,3619.0,32,3.0,121.4,607.0,121.4,607.0,0.53125,114.0625,113.09375
6591,2023-24,1610612749,MIL,Milwaukee Bucks,22300401,2023-12-25,MIL @ NYK,L,122,129,...,3991.0,34,5.0,130.8,654.0,115.4,577.0,0.735294,122.176471,117.382353
6586,2023-24,1610612756,PHX,Phoenix Suns,22300405,2023-12-25,PHX vs. DAL,L,114,128,...,3751.0,33,1.0,111.0,555.0,118.4,592.0,0.545455,115.30303,113.666667
6600,2023-24,1610612753,ORL,Orlando Magic,22300407,2023-12-26,ORL @ WAS,W,127,119,...,3480.0,32,1.0,109.0,545.0,117.0,585.0,0.625,112.46875,108.75


In [17]:
df_games[df_games['game_id'].isin(weird_games)]

Unnamed: 0,season_id,team_id,team_abbreviation,team_name,game_id,game_date,matchup,wl
5726,2024-25,1610612760,OKC,Oklahoma City Thunder,62400001,2024-12-17,MIL @ OKC,L
5727,2024-25,1610612749,MIL,Milwaukee Bucks,62400001,2024-12-17,MIL @ OKC,W
5752,2024-25,1610612749,MIL,Milwaukee Bucks,22401229,2024-12-14,ATL @ MIL,W
5753,2024-25,1610612760,OKC,Oklahoma City Thunder,22401230,2024-12-14,HOU @ OKC,W
5754,2024-25,1610612737,ATL,Atlanta Hawks,22401229,2024-12-14,ATL @ MIL,L
5755,2024-25,1610612745,HOU,Houston Rockets,22401230,2024-12-14,HOU @ OKC,L
6311,2024-25,1610612764,WAS,Washington Wizards,22400147,2024-11-02,MIA @ WAS,L
6312,2024-25,1610612748,MIA,Miami Heat,22400147,2024-11-02,MIA @ WAS,W
6586,2024-25,1610612764,WAS,Washington Wizards,12400007,2024-10-06,WAS @ TOR,L
6589,2024-25,1610612761,TOR,Toronto Raptors,12400007,2024-10-06,WAS @ TOR,W


In [18]:
df_gametime_agg = (
    df_gameflow.groupby('game_id')['time_remaining']
        .agg(['min', 'max'])
        .rename(columns={'min': 'min_tr', 'max': 'max_tr'})
        .assign(game_time_span=lambda x: x['max_tr'] - x['min_tr'])
        .reset_index().sort_values('min_tr')
) 
df_gametime_agg

Unnamed: 0,game_id,min_tr,max_tr,game_time_span
0,0012100001,0,2839,2839
2640,0022300676,0,2863,2863
2641,0022300677,0,2829,2829
2642,0022300678,0,2856,2856
2643,0022300679,0,2819,2819
...,...,...,...,...
1329,0022200595,0,2857,2857
1330,0022200596,0,2860,2860
1318,0022200584,0,2838,2838
3757,0032300005,5100,5923,823
