In [1]:
import sys
import os

sys.path.append(os.path.dirname(os.getcwd())) 

import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
from features.load_games_from_db import load_games_from_db

In [3]:
df = load_games_from_db()

2025-06-27 18:17:00,905 [INFO] Successully loaded 10641 games from database


There 10641 for  modeling...


In [4]:
df.head()

Unnamed: 0,game_id,game_date,game_date_time,home_team_id,away_team_id,home_team,away_team,home_score,away_score,state,venue,game_type
0,634642,2021-04-01,2021-04-01 17:05:00+00:00,147,141,New York Yankees,Toronto Blue Jays,2,3,Final,Yankee Stadium,R
1,634645,2021-04-01,2021-04-01 17:10:00+00:00,116,114,Detroit Tigers,Cleveland Indians,3,2,Final,Comerica Park,R
2,634638,2021-04-01,2021-04-01 18:10:00+00:00,158,142,Milwaukee Brewers,Minnesota Twins,6,5,Final,American Family Field,R
3,634634,2021-04-01,2021-04-01 18:20:00+00:00,112,134,Chicago Cubs,Pittsburgh Pirates,3,5,Final,Wrigley Field,R
4,634622,2021-04-01,2021-04-01 19:05:00+00:00,143,144,Philadelphia Phillies,Atlanta Braves,3,2,Final,Citizens Bank Park,R


In [5]:
df.shape

(10641, 12)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10641 entries, 0 to 10640
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   game_id         10641 non-null  int64              
 1   game_date       10641 non-null  object             
 2   game_date_time  10641 non-null  datetime64[ns, UTC]
 3   home_team_id    10641 non-null  int64              
 4   away_team_id    10641 non-null  int64              
 5   home_team       10641 non-null  object             
 6   away_team       10641 non-null  object             
 7   home_score      10641 non-null  int64              
 8   away_score      10641 non-null  int64              
 9   state           10641 non-null  object             
 10  venue           10641 non-null  object             
 11  game_type       10641 non-null  object             
dtypes: datetime64[ns, UTC](1), int64(5), object(6)
memory usage: 997.7+ KB


In [7]:
df['game_date'] = pd.to_datetime(df['game_date'])

In [8]:
df.loc[df['home_team']=='Cleveland Indians', 'home_team'] = 'Cleveland Guardians'
df.loc[df['away_team']=='Cleveland Indians', 'away_team'] = 'Cleveland Guardians'

df.loc[df['home_team']=='Athletics', 'home_team'] = 'Oakland Athletics'
df.loc[df['away_team']=='Athletics', 'away_team'] = 'Oakland Athletics'

In [169]:
def team_schedule(
    df: pd.DataFrame, 
    date_col: str = 'game_date', 
    date_time_col: str = 'game_date_time'
) -> pd.DataFrame:
    # Extract home and away games for every team
    home_schedule = df[['home_team',date_col, date_time_col, 'home_score','away_score']].rename(columns={
        'home_team': 'team',
        'home_score': 'team_score',
        'away_score': 'opp_score'
    }).assign(home_ind=1)
    away_schedule = df[['away_team', date_col, date_time_col, 'home_score','away_score']].rename(columns={
        'away_team': 'team',
        'home_score': 'opp_score',
        'away_score': 'team_score'
    }).assign(home_ind=0)

    # Join them into one 'team' column
    team_schedule = (
        pd.concat([home_schedule, away_schedule])
        .sort_values(['team',date_time_col])
        .reset_index(drop=True)
    )
    team_schedule['team_win'] = (team_schedule['team_score'] > team_schedule['opp_score']).astype(int)

    return team_schedule
def team_rest_days(
    df: pd.DataFrame,
    date_col: str = 'game_date',
    date_time_col: str = 'game_date_time'
):
    df_team_sched = team_schedule(df, date_time_col=date_time_col, date_col=date_col)
    return (
        df_team_sched
        .groupby('team')[date_col]
        .diff()
        .dt.days
        .fillna(value=0)
        -1
    ).clip(lower=0)

def team_games_previous_7days(
    df: pd.DataFrame,
    date_col: str = 'game_date',
    date_time_col: str = 'game_date_time'
):
    df_team_sched = team_schedule(df, date_time_col=date_time_col, date_col=date_col)
    df_team_sched['yesterday_date'] = df_team_sched['game_date']-timedelta(days=1)
    
    return (
        df_team_sched
        .groupby('team')
        .rolling(window='7D', on='yesterday_date')
        .count()['game_date']
        .reset_index(drop=True)
    )
    
def team_win_rate_last_10(
    df: pd.DataFrame,
    date_col: str = 'game_date',
    date_time_col: str = 'game_date_time'
):
    df_team_sched = team_schedule(df,date_time_col=date_time_col, date_col=date_col)
    return (
        df_team_sched
        .groupby('team')['team_win']
        .rolling(window=10, min_periods=1)
        .mean()
        .reset_index(drop=True)
    )

In [163]:
dts = team_schedule(df)

In [158]:
df.query("away_team=='Arizona Diamondbacks'").head(3)

Unnamed: 0,game_id,game_date,game_date_time,home_team_id,away_team_id,home_team,away_team,home_score,away_score,state,venue,game_type
6,634618,2021-04-01,2021-04-01 20:10:00+00:00,135,109,San Diego Padres,Arizona Diamondbacks,8,7,Final,Petco Park,R
17,634576,2021-04-02,2021-04-03 02:10:00+00:00,135,109,San Diego Padres,Arizona Diamondbacks,4,2,Final,Petco Park,R
30,634623,2021-04-03,2021-04-04 00:40:00+00:00,135,109,San Diego Padres,Arizona Diamondbacks,7,0,Final,Petco Park,R


In [164]:
dts.head()

Unnamed: 0,team,game_date,game_date_time,team_score,opp_score,home_ind,team_win
0,Arizona Diamondbacks,2021-04-01,2021-04-01 20:10:00+00:00,7,8,0,0
1,Arizona Diamondbacks,2021-04-02,2021-04-03 02:10:00+00:00,2,4,0,0
2,Arizona Diamondbacks,2021-04-03,2021-04-04 00:40:00+00:00,0,7,0,0
3,Arizona Diamondbacks,2021-04-04,2021-04-04 20:10:00+00:00,3,1,0,1
4,Arizona Diamondbacks,2021-04-06,2021-04-07 00:40:00+00:00,10,8,0,1


In [170]:
dts['win_rate_last_10'] = team_win_rate_last_10(df)

In [180]:
dts.head(730).tail(20)

Unnamed: 0,team,game_date,game_date_time,team_score,opp_score,home_ind,team_win,win_rate_last_10
710,Arizona Diamondbacks,2025-06-13,2025-06-14 01:40:00+00:00,5,1,1,1,0.7
711,Arizona Diamondbacks,2025-06-14,2025-06-14 23:15:00+00:00,8,7,1,1,0.7
712,Arizona Diamondbacks,2025-06-15,2025-06-15 20:10:00+00:00,2,8,1,0,0.6
713,Arizona Diamondbacks,2025-06-17,2025-06-17 23:07:00+00:00,4,5,0,0,0.5
714,Arizona Diamondbacks,2025-06-18,2025-06-18 23:07:00+00:00,1,8,0,0,0.5
715,Arizona Diamondbacks,2025-06-19,2025-06-19 19:07:00+00:00,9,5,0,1,0.6
716,Arizona Diamondbacks,2025-06-20,2025-06-21 00:40:00+00:00,14,8,0,1,0.7
717,Atlanta Braves,2021-04-01,2021-04-01 19:05:00+00:00,2,3,0,0,0.0
718,Atlanta Braves,2021-04-03,2021-04-03 20:05:00+00:00,0,4,0,0,0.0
719,Atlanta Braves,2021-04-04,2021-04-04 17:05:00+00:00,1,2,0,0,0.0


In [152]:
dts.isna().sum()

team                    0
game_date               0
game_date_time          0
home_ind                0
num_games_prev_7days    0
dtype: int64

In [135]:
dts['game_date_lag1'] = dts['game_date']-timedelta(days=1)
dts['game_date_lag8'] = dts['game_date']-timedelta(days=8)
dts['num_games_1wk'] = dts.groupby('team').rolling(window='7D', on='game_date_lag1').count()['game_date'].reset_index(drop=True)
dts['num_games_1wk_alt'] = dts.groupby('team').rolling(window='7D', on='game_date').count()['home_ind'].reset_index(drop=True)-1
#df_nyy = dts.query("team=='New York Yankees'").copy()

In [107]:
#df_nyy['num_games_7days'] = df_nyy.rolling(window='7D', on='game_date').count()['team']
#df_nyy['num_games_2days'] = df_nyy.rolling(window='2D', on='game_date').count()['team']
#df_nyy['7_days_prior'] = df_nyy['game_date']-timedelta(days=7)
df_nyy['1_days_prior'] = df_nyy['game_date']-timedelta(days=1)
df_nyy['num_games_7days'] = df_nyy.rolling(window='7D', on='1_days_prior').count()['team']

In [137]:
dts.tail(20)

Unnamed: 0,team,game_date,game_date_time,home_ind,game_date_lag1,num_games_7_days,num_games_1wk,num_games_1wk_alt,game_date_lag8
21262,Washington Nationals,2025-05-30,2025-05-31 01:40:00+00:00,0,2025-05-29,2025-05-29,6.0,5.0,2025-05-22
21263,Washington Nationals,2025-05-31,2025-06-01 02:10:00+00:00,0,2025-05-30,2025-05-30,6.0,5.0,2025-05-23
21264,Washington Nationals,2025-06-01,2025-06-01 20:10:00+00:00,0,2025-05-31,2025-05-31,6.0,5.0,2025-05-24
21265,Washington Nationals,2025-06-03,2025-06-03 22:45:00+00:00,1,2025-06-02,2025-06-02,6.0,5.0,2025-05-26
21266,Washington Nationals,2025-06-04,2025-06-04 22:45:00+00:00,1,2025-06-03,2025-06-03,6.0,5.0,2025-05-27
21267,Washington Nationals,2025-06-05,2025-06-05 22:45:00+00:00,1,2025-06-04,2025-06-04,6.0,5.0,2025-05-28
21268,Washington Nationals,2025-06-06,2025-06-06 22:45:00+00:00,1,2025-06-05,2025-06-05,6.0,5.0,2025-05-29
21269,Washington Nationals,2025-06-07,2025-06-07 20:05:00+00:00,1,2025-06-06,2025-06-06,6.0,5.0,2025-05-30
21270,Washington Nationals,2025-06-08,2025-06-08 17:35:00+00:00,1,2025-06-07,2025-06-07,6.0,5.0,2025-05-31
21271,Washington Nationals,2025-06-10,2025-06-10 23:10:00+00:00,0,2025-06-09,2025-06-09,6.0,5.0,2025-06-02


In [42]:

def get_sched_features(df: pd.DataFrame, date_col: str = 'game_date', date_time_col: str = 'game_date_time'):
    # create team schedule data
    df_team_sched = team_schedule(df, date_col=date_col, date_time_col=date_time_col)
    # add team_rest_days
    df_team_sched['team_rest_days'] = team_rest_days(df)
    # add rest_days_7day_avg
    df_team_sched['team_games_prev_7days'] = team_games_previous_7days(df)

    # Merge
    team_sched_cols = ['team','game_date','team_rest_days','team_games_prev_7days']
    df_merge = df.merge(
        df_team_sched[team_sched_cols], 
        how='left', 
        left_on=['home_team','game_date'], 
        right_on=['team','game_date']
    )
    df_merge.rename(columns={
        'team_rest_days':'home_team_rest_days',
        'team_games_prev_7days': 'home_team_games_prev_7days'
    }, 
                    inplace=True)
    df_merge.drop('team', axis=1, inplace=True)

    df_merge = df_merge.merge(
        df_team_sched[team_sched_cols],
        how='left',
        left_on=['away_team', 'game_date'],
        right_on=['team','game_date']
    )
    df_merge.rename(columns={
        'team_rest_days': 'away_team_rest_days',
        'team_games_prev_7days': 'away_team_games_prev_7days'
    },
    inplace=True)
    df_merge.drop('team', axis=1, inplace=True)

    return df_merge

In [73]:
d1 = team_schedule(df)

In [11]:
df.head()

Unnamed: 0,game_id,game_date,game_date_time,home_team_id,away_team_id,home_team,away_team,home_score,away_score,state,venue,game_type
0,634642,2021-04-01,2021-04-01 17:05:00+00:00,147,141,New York Yankees,Toronto Blue Jays,2,3,Final,Yankee Stadium,R
1,634645,2021-04-01,2021-04-01 17:10:00+00:00,116,114,Detroit Tigers,Cleveland Guardians,3,2,Final,Comerica Park,R
2,634638,2021-04-01,2021-04-01 18:10:00+00:00,158,142,Milwaukee Brewers,Minnesota Twins,6,5,Final,American Family Field,R
3,634634,2021-04-01,2021-04-01 18:20:00+00:00,112,134,Chicago Cubs,Pittsburgh Pirates,3,5,Final,Wrigley Field,R
4,634622,2021-04-01,2021-04-01 19:05:00+00:00,143,144,Philadelphia Phillies,Atlanta Braves,3,2,Final,Citizens Bank Park,R


In [62]:
d1.head()

Unnamed: 0,team,game_date,game_date_time,home_ind
6,Arizona Diamondbacks,2021-04-01,2021-04-01 20:10:00+00:00,0
17,Arizona Diamondbacks,2021-04-02,2021-04-03 02:10:00+00:00,0
30,Arizona Diamondbacks,2021-04-03,2021-04-04 00:40:00+00:00,0
43,Arizona Diamondbacks,2021-04-04,2021-04-04 20:10:00+00:00,0
68,Arizona Diamondbacks,2021-04-06,2021-04-07 00:40:00+00:00,0


In [74]:
d1['team_rest_days_7day_sum'] = team_rest_days_7day_sum(df)

In [70]:
from datetime import timedelta
d1['game_date'][0:5]-timedelta(days=7)

6    2021-03-25
17   2021-03-26
30   2021-03-27
43   2021-03-28
68   2021-03-30
Name: game_date, dtype: datetime64[ns]

In [75]:
d1.head(20)

Unnamed: 0,team,game_date,game_date_time,home_ind,team_rest_days_7day_sum
6,Arizona Diamondbacks,2021-04-01,2021-04-01 20:10:00+00:00,0,0.0
17,Arizona Diamondbacks,2021-04-02,2021-04-03 02:10:00+00:00,0,0.0
30,Arizona Diamondbacks,2021-04-03,2021-04-04 00:40:00+00:00,0,0.0
43,Arizona Diamondbacks,2021-04-04,2021-04-04 20:10:00+00:00,0,0.0
68,Arizona Diamondbacks,2021-04-06,2021-04-07 00:40:00+00:00,0,0.0
85,Arizona Diamondbacks,2021-04-07,2021-04-08 00:40:00+00:00,0,1.0
89,Arizona Diamondbacks,2021-04-08,2021-04-08 19:10:00+00:00,0,1.0
103,Arizona Diamondbacks,2021-04-09,2021-04-10 01:40:00+00:00,1,1.0
116,Arizona Diamondbacks,2021-04-10,2021-04-11 00:10:00+00:00,1,1.0
128,Arizona Diamondbacks,2021-04-11,2021-04-11 20:10:00+00:00,1,1.0


In [16]:
d2 = df.merge(d1[['team','game_date','team_rest_days']], how='left', left_on=['home_team','game_date'], right_on=['team','game_date'])
d2.rename(columns={'team_rest_days': 'home_team_rest_days'}, inplace=True)
d2.drop('team', axis=1, inplace=True)
d3 = d2.merge(d1[['team','game_date','team_rest_days']], how='left', left_on=['away_team','game_date'], right_on=['team','game_date'])
d3.rename(columns={'team_rest_days': 'away_team_rest_days'},inplace=True)
d3.drop('team', axis=1, inplace=True)

In [17]:
d3.head()

Unnamed: 0,game_id,game_date,game_date_time,home_team_id,away_team_id,home_team,away_team,home_score,away_score,state,venue,game_type,home_team_rest_days,away_team_rest_days
0,634642,2021-04-01,2021-04-01 17:05:00+00:00,147,141,New York Yankees,Toronto Blue Jays,2,3,Final,Yankee Stadium,R,0.0,0.0
1,634645,2021-04-01,2021-04-01 17:10:00+00:00,116,114,Detroit Tigers,Cleveland Guardians,3,2,Final,Comerica Park,R,0.0,0.0
2,634638,2021-04-01,2021-04-01 18:10:00+00:00,158,142,Milwaukee Brewers,Minnesota Twins,6,5,Final,American Family Field,R,0.0,0.0
3,634634,2021-04-01,2021-04-01 18:20:00+00:00,112,134,Chicago Cubs,Pittsburgh Pirates,3,5,Final,Wrigley Field,R,0.0,0.0
4,634622,2021-04-01,2021-04-01 19:05:00+00:00,143,144,Philadelphia Phillies,Atlanta Braves,3,2,Final,Citizens Bank Park,R,0.0,0.0


In [18]:
d3['home_team_rest_days'].value_counts(dropna=False)

home_team_rest_days
0.0      9242
1.0      1438
2.0        58
4.0        50
178.0      23
175.0      15
3.0        11
5.0         8
185.0       7
186.0       6
187.0       2
6.0         2
180.0       2
179.0       2
170.0       1
169.0       1
7.0         1
Name: count, dtype: int64

In [27]:
d1['team_rest_days_7day_avg'] = d1['team_rest_days'].shift(1).rolling(window=7, min_periods=1).sum().fillna(0)

In [28]:
d1.head(20)

Unnamed: 0,team,game_date,game_date_time,home_ind,team_rest_days,team_rest_days_7day_avg
6,Arizona Diamondbacks,2021-04-01,2021-04-01 20:10:00+00:00,0,0.0,0.0
17,Arizona Diamondbacks,2021-04-02,2021-04-03 02:10:00+00:00,0,0.0,0.0
30,Arizona Diamondbacks,2021-04-03,2021-04-04 00:40:00+00:00,0,0.0,0.0
43,Arizona Diamondbacks,2021-04-04,2021-04-04 20:10:00+00:00,0,0.0,0.0
68,Arizona Diamondbacks,2021-04-06,2021-04-07 00:40:00+00:00,0,1.0,0.0
85,Arizona Diamondbacks,2021-04-07,2021-04-08 00:40:00+00:00,0,0.0,1.0
89,Arizona Diamondbacks,2021-04-08,2021-04-08 19:10:00+00:00,0,0.0,1.0
103,Arizona Diamondbacks,2021-04-09,2021-04-10 01:40:00+00:00,1,0.0,1.0
116,Arizona Diamondbacks,2021-04-10,2021-04-11 00:10:00+00:00,1,0.0,1.0
128,Arizona Diamondbacks,2021-04-11,2021-04-11 20:10:00+00:00,1,0.0,1.0


In [48]:
df_sched_feat = get_sched_features(df=df)

In [49]:
df_sched_feat.head()

Unnamed: 0,game_id,game_date,game_date_time,home_team_id,away_team_id,home_team,away_team,home_score,away_score,state,venue,game_type,home_team_rest_days,home_team_rest_days_7day_sum,away_team_rest_days,away_team_rest_days_7day_sum
0,634642,2021-04-01,2021-04-01 17:05:00+00:00,147,141,New York Yankees,Toronto Blue Jays,2,3,Final,Yankee Stadium,R,0.0,1.0,0.0,1.0
1,634645,2021-04-01,2021-04-01 17:10:00+00:00,116,114,Detroit Tigers,Cleveland Guardians,3,2,Final,Comerica Park,R,0.0,0.0,0.0,2.0
2,634638,2021-04-01,2021-04-01 18:10:00+00:00,158,142,Milwaukee Brewers,Minnesota Twins,6,5,Final,American Family Field,R,0.0,0.0,0.0,2.0
3,634634,2021-04-01,2021-04-01 18:20:00+00:00,112,134,Chicago Cubs,Pittsburgh Pirates,3,5,Final,Wrigley Field,R,0.0,1.0,0.0,0.0
4,634622,2021-04-01,2021-04-01 19:05:00+00:00,143,144,Philadelphia Phillies,Atlanta Braves,3,2,Final,Citizens Bank Park,R,0.0,0.0,0.0,2.0


In [58]:
cols = ['home_team','away_team', 'game_date','home_team_rest_days','home_team_rest_days_7day_sum', 'away_team_rest_days','away_team_rest_days_7day_sum']
df_sched_feat[cols].head(10)

Unnamed: 0,home_team,away_team,game_date,home_team_rest_days,home_team_rest_days_7day_sum,away_team_rest_days,away_team_rest_days_7day_sum
0,New York Yankees,Toronto Blue Jays,2021-04-01,0.0,1.0,0.0,1.0
1,Detroit Tigers,Cleveland Guardians,2021-04-01,0.0,0.0,0.0,2.0
2,Milwaukee Brewers,Minnesota Twins,2021-04-01,0.0,0.0,0.0,2.0
3,Chicago Cubs,Pittsburgh Pirates,2021-04-01,0.0,1.0,0.0,0.0
4,Philadelphia Phillies,Atlanta Braves,2021-04-01,0.0,0.0,0.0,2.0
5,Colorado Rockies,Los Angeles Dodgers,2021-04-01,0.0,2.0,0.0,0.0
6,San Diego Padres,Arizona Diamondbacks,2021-04-01,0.0,2.0,0.0,0.0
7,Cincinnati Reds,St. Louis Cardinals,2021-04-01,0.0,2.0,0.0,1.0
8,Kansas City Royals,Texas Rangers,2021-04-01,0.0,0.0,0.0,0.0
9,Miami Marlins,Tampa Bay Rays,2021-04-01,0.0,0.0,0.0,2.0


In [60]:
df_sched_feat.query("home_team == 'New York Yankees' | away_team == 'New York Yankees'")[cols]

Unnamed: 0,home_team,away_team,game_date,home_team_rest_days,home_team_rest_days_7day_sum,away_team_rest_days,away_team_rest_days_7day_sum
0,New York Yankees,Toronto Blue Jays,2021-04-01,0.0,1.0,0.0,1.0
19,New York Yankees,Toronto Blue Jays,2021-04-03,1.0,1.0,1.0,1.0
34,New York Yankees,Toronto Blue Jays,2021-04-04,0.0,2.0,0.0,2.0
48,New York Yankees,Baltimore Orioles,2021-04-05,0.0,2.0,0.0,1.0
61,New York Yankees,Baltimore Orioles,2021-04-06,0.0,1.0,0.0,1.0
84,New York Yankees,Baltimore Orioles,2021-04-07,0.0,1.0,0.0,0.0
95,Tampa Bay Rays,New York Yankees,2021-04-09,1.0,1.0,1.0,1.0
105,Tampa Bay Rays,New York Yankees,2021-04-10,0.0,2.0,0.0,2.0
122,Tampa Bay Rays,New York Yankees,2021-04-11,0.0,2.0,0.0,2.0
132,Toronto Blue Jays,New York Yankees,2021-04-12,1.0,0.0,0.0,1.0


In [52]:
d1[d1['team']=='New York Yankees'].head(5)

Unnamed: 0,team,game_date,game_date_time,home_ind,team_rest_days,team_rest_days_7day_avg
0,New York Yankees,2021-04-01,2021-04-01 17:05:00+00:00,1,0.0,1.0
19,New York Yankees,2021-04-03,2021-04-03 17:05:00+00:00,1,1.0,1.0
34,New York Yankees,2021-04-04,2021-04-04 17:05:00+00:00,1,0.0,2.0
48,New York Yankees,2021-04-05,2021-04-05 22:35:00+00:00,1,0.0,2.0
61,New York Yankees,2021-04-06,2021-04-06 22:35:00+00:00,1,0.0,1.0


In [55]:
d1.game_date[:5]

6    2021-04-01
17   2021-04-02
30   2021-04-03
43   2021-04-04
68   2021-04-06
Name: game_date, dtype: datetime64[ns]

In [56]:
d1.game_date.shift(1)[:5]

6           NaT
17   2021-04-01
30   2021-04-02
43   2021-04-03
68   2021-04-04
Name: game_date, dtype: datetime64[ns]

In [57]:
d1.game_date.shift(-1)[:5]

6    2021-04-02
17   2021-04-03
30   2021-04-04
43   2021-04-06
68   2021-04-07
Name: game_date, dtype: datetime64[ns]

In [27]:
d1['rest_days'] = (
    d1
    .sort_values(['team','game_date_time'])
    .groupby('team')['game_date_time'].diff().dt.days.fillna(value=0)
    -1
).clip(lower=0)

In [28]:
d1.head(30)

Unnamed: 0,team,game_date,game_date_time,home_ind,rest_days
6,Arizona Diamondbacks,2021-04-01,2021-04-01 20:10:00+00:00,0,0.0
17,Arizona Diamondbacks,2021-04-02,2021-04-03 02:10:00+00:00,0,0.0
30,Arizona Diamondbacks,2021-04-03,2021-04-04 00:40:00+00:00,0,0.0
43,Arizona Diamondbacks,2021-04-04,2021-04-04 20:10:00+00:00,0,0.0
68,Arizona Diamondbacks,2021-04-06,2021-04-07 00:40:00+00:00,0,1.0
85,Arizona Diamondbacks,2021-04-07,2021-04-08 00:40:00+00:00,0,0.0
89,Arizona Diamondbacks,2021-04-08,2021-04-08 19:10:00+00:00,0,0.0
103,Arizona Diamondbacks,2021-04-09,2021-04-10 01:40:00+00:00,1,0.0
116,Arizona Diamondbacks,2021-04-10,2021-04-11 00:10:00+00:00,1,0.0
128,Arizona Diamondbacks,2021-04-11,2021-04-11 20:10:00+00:00,1,0.0


In [30]:
d1[d1['rest_days']>1].head(20)

Unnamed: 0,team,game_date,game_date_time,home_ind,rest_days
1319,Arizona Diamondbacks,2021-07-16,2021-07-17 01:40:00+00:00,1,4.0
2362,Arizona Diamondbacks,2022-04-07,2022-04-08 01:40:00+00:00,1,185.0
3722,Arizona Diamondbacks,2022-07-22,2022-07-23 01:40:00+00:00,1,4.0
4752,Arizona Diamondbacks,2023-03-30,2023-03-31 02:10:00+00:00,0,175.0
6079,Arizona Diamondbacks,2023-07-14,2023-07-14 23:07:00+00:00,0,4.0
7142,Arizona Diamondbacks,2024-03-28,2024-03-29 02:10:00+00:00,1,178.0
8559,Arizona Diamondbacks,2024-07-19,2024-07-19 18:20:00+00:00,0,3.0
9538,Arizona Diamondbacks,2025-03-27,2025-03-28 02:10:00+00:00,1,178.0
744,Atlanta Braves,2021-05-29,2021-05-29 23:15:00+00:00,0,2.0
1313,Atlanta Braves,2021-07-16,2021-07-16 23:20:00+00:00,1,4.0


In [32]:
d1[(d1['game_date']>='2021-07-09') & (d1['game_date']<='2021-07-19')]

Unnamed: 0,team,game_date,game_date_time,home_ind,rest_days
1276,Arizona Diamondbacks,2021-07-09,2021-07-10 02:10:00+00:00,0,0.0
1291,Arizona Diamondbacks,2021-07-10,2021-07-11 02:10:00+00:00,0,0.0
1304,Arizona Diamondbacks,2021-07-11,2021-07-11 20:10:00+00:00,0,0.0
1319,Arizona Diamondbacks,2021-07-16,2021-07-17 01:40:00+00:00,1,4.0
1323,Arizona Diamondbacks,2021-07-17,2021-07-17 20:10:00+00:00,1,0.0
1348,Arizona Diamondbacks,2021-07-18,2021-07-18 20:10:00+00:00,1,0.0
1359,Arizona Diamondbacks,2021-07-19,2021-07-20 01:40:00+00:00,1,0.0
1270,Atlanta Braves,2021-07-09,2021-07-09 23:10:00+00:00,0,1.0
1286,Atlanta Braves,2021-07-10,2021-07-10 20:10:00+00:00,0,0.0
1298,Atlanta Braves,2021-07-11,2021-07-11 17:10:00+00:00,0,0.0


In [18]:
df[df['away_team']=='Arizona Diamondbacks'].head(10)

Unnamed: 0,game_id,game_date,game_date_time,home_team_id,away_team_id,home_team,away_team,home_score,away_score,state,venue,game_type
6,634618,2021-04-01,2021-04-01 20:10:00+00:00,135,109,San Diego Padres,Arizona Diamondbacks,8,7,Final,Petco Park,R
17,634576,2021-04-02,2021-04-03 02:10:00+00:00,135,109,San Diego Padres,Arizona Diamondbacks,4,2,Final,Petco Park,R
30,634623,2021-04-03,2021-04-04 00:40:00+00:00,135,109,San Diego Padres,Arizona Diamondbacks,7,0,Final,Petco Park,R
43,634572,2021-04-04,2021-04-04 20:10:00+00:00,135,109,San Diego Padres,Arizona Diamondbacks,1,3,Final,Petco Park,R
68,634619,2021-04-06,2021-04-07 00:40:00+00:00,115,109,Colorado Rockies,Arizona Diamondbacks,8,10,Final,Coors Field,R
85,634542,2021-04-07,2021-04-08 00:40:00+00:00,115,109,Colorado Rockies,Arizona Diamondbacks,8,0,Final,Coors Field,R
89,634564,2021-04-08,2021-04-08 19:10:00+00:00,115,109,Colorado Rockies,Arizona Diamondbacks,7,3,Final,Coors Field,R
175,634481,2021-04-15,2021-04-15 23:05:00+00:00,120,109,Washington Nationals,Arizona Diamondbacks,6,11,Final,Nationals Park,R
181,634469,2021-04-16,2021-04-16 23:05:00+00:00,120,109,Washington Nationals,Arizona Diamondbacks,1,0,Final,Nationals Park,R
192,634507,2021-04-17,2021-04-17 17:05:00+00:00,120,109,Washington Nationals,Arizona Diamondbacks,6,2,Final,Nationals Park,R


In [16]:
df[df['home_team']=='Arizona Diamondbacks'].head()

Unnamed: 0,game_id,game_date,game_date_time,home_team_id,away_team_id,home_team,away_team,home_score,away_score,state,venue,game_type
103,634540,2021-04-09,2021-04-10 01:40:00+00:00,109,113,Arizona Diamondbacks,Cincinnati Reds,5,6,Final,Chase Field,R
116,634484,2021-04-10,2021-04-11 00:10:00+00:00,109,113,Arizona Diamondbacks,Cincinnati Reds,8,3,Final,Chase Field,R
128,632209,2021-04-11,2021-04-11 20:10:00+00:00,109,113,Arizona Diamondbacks,Cincinnati Reds,7,0,Final,Chase Field,R
140,632231,2021-04-12,2021-04-13 01:40:00+00:00,109,133,Arizona Diamondbacks,Oakland Athletics,5,9,Final,Chase Field,R
143,632206,2021-04-13,2021-04-13 19:40:00+00:00,109,133,Arizona Diamondbacks,Oakland Athletics,5,7,Final,Chase Field,R


In [22]:
df[['away_team','game_date','game_date_time']].head(20)

Unnamed: 0,away_team,game_date,game_date_time
0,Toronto Blue Jays,2021-04-01,2021-04-01 17:05:00+00:00
1,Cleveland Indians,2021-04-01,2021-04-01 17:10:00+00:00
2,Minnesota Twins,2021-04-01,2021-04-01 18:10:00+00:00
3,Pittsburgh Pirates,2021-04-01,2021-04-01 18:20:00+00:00
4,Atlanta Braves,2021-04-01,2021-04-01 19:05:00+00:00
5,Los Angeles Dodgers,2021-04-01,2021-04-01 20:10:00+00:00
6,Arizona Diamondbacks,2021-04-01,2021-04-01 20:10:00+00:00
7,St. Louis Cardinals,2021-04-01,2021-04-01 20:10:00+00:00
8,Texas Rangers,2021-04-01,2021-04-01 20:10:00+00:00
9,Tampa Bay Rays,2021-04-01,2021-04-01 20:10:00+00:00
