In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
base_nba_per_game = pd.read_csv("bases_nba_stats/base_nba_per_game_17_18_arrumada.csv", sep=";")

In [31]:
base_nba_per_game["fl_home"] = np.where(base_nba_per_game["GAME"].str[6:9] == base_nba_per_game["TEAM_ABBREVIATION"], 1, 0)

In [32]:
home_games = base_nba_per_game[base_nba_per_game["fl_home"] == 1].set_index("GAME")
away_games = base_nba_per_game[base_nba_per_game["fl_home"] == 0].set_index("GAME")

In [33]:
print("Home", home_games.shape)
print("Away", away_games.shape)

Home (1230, 112)
Away (1230, 112)


In [34]:
all_games = home_games.join(away_games, how="inner", lsuffix="_home", rsuffix="_away")
all_games.drop(["GAME_ID_away", "GAME_DATE_away", "GAME_PLACE_away",
                "MIN_home", "MIN_away", 'PTS_hustle_home', 'PTS_hustle_away',
                "fl_home_away", "fl_home_home"], axis=1, inplace=True)

In [35]:
all_games[["TEAM_ABBREVIATION_away", "PTS_away", "TEAM_ABBREVIATION_home", "PTS_home"]].head()

Unnamed: 0_level_0,TEAM_ABBREVIATION_away,PTS_away,TEAM_ABBREVIATION_home,PTS_home
GAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BOS @ CLE 2017-10-17,BOS,99,CLE,102
HOU @ GSW 2017-10-17,HOU,122,GSW,121
CHA @ DET 2017-10-18,CHA,90,DET,102
BKN @ IND 2017-10-18,BKN,131,IND,140
MIA @ ORL 2017-10-18,MIA,109,ORL,116


In [12]:
def get_season(date):
    ano = date.year
    if(date.month >= 10):
        return ano + 1
    return ano

def is_playoff(date):
    # Playoffs 2016
    if date >= datetime(2016, 4, 16) and date < datetime(2016, 6, 30):
        return 1
    
    # Playoffs 2017
    elif date >= datetime(2017, 4, 15) and date < datetime(2017, 6, 30):
        return 1
    
     # Playoffs 2018
    elif date >= datetime(2018, 4, 14) and date < datetime(2018, 6, 30):
        return 1
    
    return 0

In [13]:
all_games["DATE"] = [datetime.strptime(str(x), '%d/%m/%Y') for x in all_games.GAME_DATE_home]
all_games["SEASON"] = [get_season(x) for x in all_games.DATE]
all_games["fl_playoff"] = [is_playoff(x) for x in all_games.DATE]
all_games['fl_home_win'] = np.where(all_games['PTS_home'] > all_games['PTS_away'], 1, 0)
all_games = all_games.sort_values('DATE')

In [39]:
all_games = all_games.rename(columns={'TEAM_ABBREVIATION_home': "team_home",
                                     'TEAM_ABBREVIATION_away': "team_away"})

In [54]:
all_games["team_home_game_num"] = all_games.groupby(['team_home']).cumcount() + 1
all_games["team_away_game_num"] = all_games.groupby(['team_away']).cumcount() + 1

In [14]:
all_games.to_csv("./bases_nba_stats/all_games_nba_17_18.csv")

### Cria features de Número de Jogos

In [5]:
all_games = pd.read_csv("./bases_nba_stats/all_games_nba_17_18.csv").drop("Unnamed: 0", axis=1)

In [17]:
all_games["DATE"] = [datetime.strptime(str(x), '%d/%m/%Y') for x in all_games.GAME_DATE_home]

In [None]:
all_games["NUM_GAMES_LAST_2_DAYS_home"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=2))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    ((all_games["team_home"] == x.team_home) |
                                                     (all_games["team_away"] == x.team_home)), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]

all_games["NUM_GAMES_LAST_4_DAYS_home"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=4))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    ((all_games["team_home"] == x.team_home) |
                                                     (all_games["team_away"] == x.team_home)), 1, 0).sum() 
                                           
                                           for _, x in all_games.iterrows()]
all_games["NUM_GAMES_LAST_6_DAYS_home"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=6))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    ((all_games["team_home"] == x.team_home) |
                                                     (all_games["team_away"] == x.team_home)), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]

all_games["NUM_GAMES_LAST_10_DAYS_home"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=10))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    ((all_games["team_home"] == x.team_home) |
                                                     (all_games["team_away"] == x.team_home)), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]

all_games["NUM_GAMES_AWAY_LAST_2_DAYS_home"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=2))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    (all_games["team_away"] == x.team_home), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]
all_games["NUM_GAMES_AWAY_LAST_4_DAYS_home"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=4))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    (all_games["team_away"] == x.team_home), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]
all_games["NUM_GAMES_AWAY_LAST_6_DAYS_home"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=6))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    (all_games["team_away"] == x.team_home), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]
all_games["NUM_GAMES_AWAY_LAST_10_DAYS_home"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=10))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    (all_games["team_away"] == x.team_home), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]

In [None]:
all_games["NUM_GAMES_LAST_2_DAYS_away"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=2))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    ((all_games["team_away"] == x.team_away) |
                                                     (all_games["team_home"] == x.team_away)), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]

all_games["NUM_GAMES_LAST_4_DAYS_away"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=4))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    ((all_games["team_away"] == x.team_away) |
                                                     (all_games["team_home"] == x.team_away)), 1, 0).sum() 
                                           
                                           for _, x in all_games.iterrows()]
all_games["NUM_GAMES_LAST_6_DAYS_away"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=6))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    ((all_games["team_away"] == x.team_away) |
                                                     (all_games["team_home"] == x.team_away)), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]

all_games["NUM_GAMES_LAST_10_DAYS_away"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=10))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    ((all_games["team_away"] == x.team_away) |
                                                     (all_games["team_home"] == x.team_away)), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]

all_games["NUM_GAMES_AWAY_LAST_2_DAYS_away"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=2))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    (all_games["team_away"] == x.team_away), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]
all_games["NUM_GAMES_AWAY_LAST_4_DAYS_away"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=4))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    (all_games["team_away"] == x.team_away), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]
all_games["NUM_GAMES_AWAY_LAST_6_DAYS_away"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=6))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    (all_games["team_away"] == x.team_away), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]
all_games["NUM_GAMES_AWAY_LAST_10_DAYS_away"] = [np.where((all_games["DATE"] >= (x.DATE - timedelta(days=10))) & 
                                                    (all_games["DATE"] < x.DATE) &
                                                    (all_games["team_away"] == x.team_away), 1, 0).sum() 
                                           for _, x in all_games.iterrows()]

### Últimos 5 Jogos

In [7]:
home_columns = [x for x in all_games.columns if x.endswith("_home") and x not in ['GAME_ID_home', 'TEAM_CITY_home', 'GAME_DATE_home', 'GAME_PLACE_home', 'TEAM_NICKNAME_home']]
away_columns = [x for x in all_games.columns if x.endswith("_away") and x not in ['TEAM_CITY_away', 'TEAM_NICKNAME_away']]

In [None]:
# "['WIN_HOME_away' 'WIN_AWAY_away' 'WIN_away' 'WIN_HOME_PCT_away'\n 'WIN_AWAY_PCT_away'
# 'WIN_PCT_away' 'NUM_GAMES_HOME_away'\n 'NUM_GAMES_AWAY_away' 'DAYS_DIFF_TOTAL_away' 
# 'DAYS_DIFF_NEXT_GAME_away'\n 'DAYS_DIFF_NEXT_GAMES_STD_away']

In [21]:
# extra_columns = []

# home_columns += [x + "_home" for x in extra_columns]
# away_columns += [x + "_away" for x in extra_columns]

In [84]:
def cria_variaveis_sumarizacao(last_games, team_name, n = 5, data_ref = None, verbose = False):
    resp = {}
    
    # Cria variávies de Win %
    # Visão home
    resp["num_wins_home"] = [np.where((last_games["team_home"] == team_name) &
                             (last_games["fl_home_win"] == 1) , 1, 0).sum()]
    resp["num_games_home"] = [np.where(last_games["team_home"] == team_name, 1, 0).sum()]
    resp["win_home_pct"] = [(resp["num_wins_home"][0] / resp["num_games_home"][0])]
    
    # Visão Away
    resp["num_wins_away"] = [np.where((last_games["team_away"] == team_name) & 
                                      (last_games["fl_home_win"] == 0), 1, 0).sum()]
    resp["num_games_away"] = [np.where(last_games["team_away"] == team_name, 1, 0).sum()]
    resp["win_away_pct"] = [(resp["num_wins_away"][0] / resp["num_games_away"][0])]
    
    # Visão geral
    resp["num_wins_total"] = [resp["num_wins_away"][0] + resp["num_wins_home"][0]]
    resp["win_pct"] = [resp["num_wins_total"][0]/n]
    
    if verbose:
        print("Win_PCT", resp["win_pct"][0], resp["num_wins_away"][0], resp["num_wins_home"][0])
    
    # Cria variáveis de data
    if(data_ref is None):
        data_ref = np.max(last_games["DATE"]) + timedelta(days=1)
    
    # Visão da série
    resp["total_days_diff"] = [(np.max(last_games["DATE"]) - np.min(last_games["DATE"])).days]
    days_diff_last_games = [-x.days if not np.isnan(x.days) else 0 
                            for x in last_games["DATE"].sub(last_games["DATE"].shift(-1).fillna(data_ref))]
    resp["days_diff_last_games_std"] = [np.std(days_diff_last_games)]
    resp["days_diff_last_games_mean"] = [np.mean(days_diff_last_games)]
    
    if verbose:
        print("Days_Diff", days_diff_last_games, resp["days_diff_last_games_mean"][0], resp["total_days_diff"][0]) 
    
    # All
    resp["num_games_last_2_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=2))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
    
    resp["num_games_last_4_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=4))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
                             
    resp["num_games_last_6_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=6))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
    
    resp["num_games_last_8_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=8))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
    
    resp["num_games_last_10_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=10))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
    
    # Away
    resp["num_games_away_last_2_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=2))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()]
    
    resp["num_games_away_last_4_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=4))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()]
    
    resp["num_games_away_last_6_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=6))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()] 
    
    resp["num_games_away_last_8_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=8))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()]
    
    resp["num_games_away_last_8_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=10))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()]
    
    # Home    
    resp["num_games_home_last_2_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=2))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    resp["num_games_home_last_4_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=4))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    resp["num_games_home_last_6_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=6))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    resp["num_games_home_last_8_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=8))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    resp["num_games_home_last_10_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=10))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    if verbose:
        print("Num Games Last X Days", resp["num_games_last_6_days"][0], resp["num_games_away_last_6_days"][0])
    
    return(pd.DataFrame(resp))

In [115]:
def get_last_games(df, data, team_name, n = 5, filter="all", verbose=False):
    if(filter == "all"):
        last_games = df[(df["DATE"] < data) & 
                        ((df["team_home"] == team_name) | (df["team_away"] == team_name))].tail(n)
        
    elif(filter == "home"):
        last_games = df[(df["DATE"] < data) & (df["team_home"] == team_name)].tail(n)
    elif(filter == "away"):
        last_games = df[(df["DATE"] < data) & (df["team_away"] == team_name)].tail(n)

    if(verbose):
        print(last_games[["team_home", "team_away", "PTS_home", "PTS_away", "DATE"]])
    
    return(last_games)

def get_avg_last_games(last_games, team_name, n = 5, data_ref = None, rivals = False, 
                       to_drop=['fl_home_win', 'fl_playoff', 'fl_win','index', 'team_away_game_num',
                                'team_game_num', 'team_home_game_num', "TEAM_CITY_away", "TEAM_CITY_home",
                                "TEAM_CITY_opponent", "TEAM_ID_opponent", "TEAM_NAME_advanced_opponent",
                                "TEAM_NAME_fourfactors_opponent", "TEAM_NAME_hustle_opponent",
                                "TEAM_NAME_misc_opponent", "TEAM_NAME_opponent", "TEAM_NAME_playertrack_opponent",
                                "TEAM_NAME_scoring_opponent", "TEAM_NAME_advanced_opponent"]):
    if(rivals == False):
        last_games_home = last_games[last_games["team_home"] == team_name].drop(away_columns + to_drop, axis=1, errors="ignore").groupby(["team_home"]).sum()
        last_games_away = last_games[last_games["team_away"] == team_name].drop(home_columns + to_drop, axis=1, errors="ignore").groupby(["team_away"]).sum()
        last_games_home.columns = [x.replace("_home","") for x in last_games_home.columns]
        last_games_away.columns = [x.replace("_away","") for x in last_games_away.columns]
    else:
        last_games_home = last_games[last_games["team_home"] != team_name].drop(away_columns + to_drop, axis=1, errors="ignore").drop("team_home", axis = 1).sum()
        last_games_away = last_games[last_games["team_away"] != team_name].drop(home_columns + to_drop, axis=1, errors="ignore").drop("team_away", axis = 1).sum()
        last_games_home.index = [x.replace("_home","_opponent") for x in last_games_home.index]
        last_games_away.index = [x.replace("_away","_opponent") for x in last_games_away.index]
    
    if(len(last_games_home) == 0):
        if(rivals):
            resp = (last_games_away/n).to_frame().transpose()
        else:
            resp = last_games_away/n
    
    if(len(last_games_away) == 0):
        if(rivals):
            resp = (last_games_home/n).to_frame().transpose()
        else:
            resp = last_games_home/n
            
    if(len(last_games_away) > 0 and len(last_games_home) > 0):
        print("HOME", list(last_games_home.index), len(last_games_home.index))
        print("AWAY", list(last_games_away.index), len(last_games_away.index))
        if(rivals):
            resp = ((last_games_home + last_games_away) / n).to_frame().transpose()
        else:
            resp = ((last_games_home + last_games_away) / n)    
    
    if not rivals:
        var_criadas = cria_variaveis_sumarizacao(last_games, team_name, n = 5, data_ref = None)
        resp = pd.concat([resp, var_criadas], axis=1)
    
    return(resp)

In [111]:
teste = all_games[all_games["team_home"] == "ATL"]
teste = get_last_games(all_games, teste.iloc[10]["DATE"], "ATL", verbose=False).reset_index()

In [112]:
teste

Unnamed: 0,index,GAME,GAME_ID_home,TEAM_ID_home,TEAM_NAME_home,team_home,TEAM_CITY_home,FGM_home,FGA_home,FG_PCT_home,...,LOOSE_BALLS_RECOVERED_away,CHARGES_DRAWN_away,SCREEN_ASSISTS_away,BOX_OUTS_away,DATE,SEASON,fl_playoff,team_home_game_num,team_away_game_num,fl_home_win
0,259,LAC @ ATL 2017-11-22,21700256,1610612737,Hawks,ATL,Atlanta,40,83,0.482,...,8,2,4,19,2017-11-22,2018,0,7,9,0
1,270,NYK @ ATL 2017-11-24,21700268,1610612737,Hawks,ATL,Atlanta,47,99,0.475,...,6,0,7,21,2017-11-24,2018,0,8,6,1
2,278,TOR @ ATL 2017-11-25,21700280,1610612737,Hawks,ATL,Atlanta,26,75,0.347,...,6,2,8,17,2017-11-25,2018,0,9,12,0
3,311,CLE @ ATL 2017-11-30,21700313,1610612737,Hawks,ATL,Atlanta,41,79,0.519,...,11,1,9,47,2017-11-30,2018,0,10,11,0
4,330,ATL @ BKN 2017-12-02,21700328,1610612751,Nets,BKN,Brooklyn,34,82,0.415,...,13,3,18,42,2017-12-02,2018,0,10,12,0


In [113]:
cria_variaveis_sumarizacao(teste, "ATL", verbose = True)

Win_PCT 0.4 1 1
Days_Diff [2, 1, 5, 2, 1] 2.2 10
Num Games Last X Days 2 1


Unnamed: 0,num_wins_home,num_games_home,win_home_pct,num_wins_away,num_games_away,win_away_pct,num_wins_total,win_pct,total_days_diff,days_diff_last_games_std,...,num_games_last_10_days,num_games_away_last_2_days,num_games_away_last_4_days,num_games_away_last_6_days,num_games_away_last_8_days,num_games_home_last_2_days,num_games_home_last_4_days,num_games_home_last_6_days,num_games_home_last_8_days,num_games_home_last_10_days
0,1,4,0.25,1,1,1.0,2,0.4,10,1.469694,...,4,1,1,1,1,0,1,1,2,3


In [116]:
resp = get_avg_last_games(teste, "ATL", rivals=True)

HOME ['GAME', 'GAME_ID_opponent', 'TEAM_ID_opponent', 'TEAM_NAME_opponent', 'FGM_opponent', 'FGA_opponent', 'FG_PCT_opponent', 'FG3M_opponent', 'FG3A_opponent', 'FG3_PCT_opponent', 'FTM_opponent', 'FTA_opponent', 'FT_PCT_opponent', 'OREB_opponent', 'DREB_opponent', 'REB_opponent', 'AST_opponent', 'STL_opponent', 'BLK_opponent', 'TO_opponent', 'PF_opponent', 'PTS_opponent', 'PLUS_MINUS_opponent', 'TEAM_NAME_advanced_opponent', 'OFF_RATING_opponent', 'DEF_RATING_opponent', 'NET_RATING_opponent', 'AST_PCT_opponent', 'AST_TOV_opponent', 'AST_RATIO_opponent', 'OREB_PCT_opponent', 'DREB_PCT_opponent', 'REB_PCT_opponent', 'TM_TOV_PCT_opponent', 'EFG_PCT_opponent', 'TS_PCT_opponent', 'USG_PCT_opponent', 'PACE_opponent', 'PIE_opponent', 'TEAM_NAME_scoring_opponent', 'PCT_FGA_2PT_opponent', 'PCT_FGA_3PT_opponent', 'PCT_PTS_2PT_opponent', 'PCT_PTS_2PT_MR_opponent', 'PCT_PTS_3PT_opponent', 'PCT_PTS_FB_opponent', 'PCT_PTS_FT_opponent', 'PCT_PTS_OFF_TOV_opponent', 'PCT_PTS_PAINT_opponent', 'PCT_AST_

TypeError: unsupported operand type(s) for /: 'str' and 'int'

In [45]:
resp

Unnamed: 0_level_0,AST,AST_PCT,AST_RATIO,AST_TOV,AST_playertrack,BLK,BLKA,BLK_misc,BOX_OUTS,CFGA,...,UFGA,UFGM,UFG_PCT,USG_PCT,WIN,WIN_AWAY,WIN_AWAY_PCT,WIN_HOME,WIN_HOME_PCT,WIN_PCT
team_home,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ATL,23.2,0.5896,17.42,1.762,23.2,2.8,5.8,2.8,33.4,34.4,...,52.6,24.6,0.4656,0.1982,2.0,1.0,1.0,1.0,0.25,0.4


In [278]:
resp = []
season = all_games.reset_index()

for index, row in season.iterrows():
#for index, row in regular_season.groupby("season").first().iterrows():
        # Home team
        home_last_games = get_last_games(season, row["DATE"], row["team_home"])
        home_avg_last_games = get_avg_last_games(home_last_games, row["team_home"])
        home_rivals_last_games = get_avg_last_games(home_last_games, row["team_home"], rivals=True)

        home_avg_last_games["game_ref"] = [row.GAME]
        home_avg_last_games.set_index("game_ref", inplace=True)
        home_avg_last_games.drop(["team_home", "team_away"],axis=1 ,errors="ignore", inplace=True)

        home_rivals_last_games["game_ref"] = [row.GAME]
        home_rivals_last_games.set_index("game_ref", inplace=True)
        home_rivals_last_games.drop(["team_home", "team_away"],axis=1 ,errors="ignore", inplace=True)

        #print(home_rivals_last_games.index, home_avg_last_games.index)

        # Away team
        away_last_games = get_last_games(season, row["DATE"], row["team_away"]).reset_index()
        away_avg_last_games = get_avg_last_games(away_last_games, row["team_away"])
        away_rivals_last_games = get_avg_last_games(away_last_games, row["team_away"], rivals=True)

        away_avg_last_games["game_ref"] = [row.GAME]
        away_avg_last_games.set_index("game_ref", inplace=True)
        away_avg_last_games.drop(["team_home", "team_away"],axis=1 ,errors="ignore", inplace=True)

        away_rivals_last_games["game_ref"] = [row.GAME]
        away_rivals_last_games.set_index("game_ref", inplace=True)
        away_rivals_last_games.drop(["team_home", "team_away"],axis=1 ,errors="ignore", inplace=True)

        #print(away_rivals_last_games.index, away_avg_last_games.index)

        # Junta bases 
        rivals_last_games = home_rivals_last_games.join(away_rivals_last_games, how="inner",
                                            lsuffix='_home_last_5_games', rsuffix='_away_last_5_games')
    
        avg_last_games = home_avg_last_games.join(away_avg_last_games, how="inner", 
                             lsuffix='_home_last_5_games', rsuffix='_away_last_5_games')

        
        print(rivals_last_games.columns)
        
        game_line = avg_last_games.join(rivals_last_games,
                                        how="inner")

        game_line = pd.concat([row.to_frame().transpose().set_index("GAME"), game_line], axis=1)

        print(str(row.GAME), end="\r")
        
        resp.append(game_line)

KeyError: "['WIN_HOME_home' 'WIN_AWAY_home' 'WIN_home' 'WIN_HOME_PCT_home'\n 'WIN_AWAY_PCT_home' 'WIN_PCT_home' 'NUM_GAMES_HOME_home'\n 'NUM_GAMES_AWAY_home' 'NUM_GAMES_LAST_2_DAYS_home'\n 'NUM_GAMES_LAST_4_DAYS_home' 'NUM_GAMES_LAST_6_DAYS_home'\n 'NUM_GAMES_LAST_10_DAYS_home' 'NUM_GAMES_LAST_2_DAYS_HOME_home'\n 'NUM_GAMES_LAST_4_DAYS_HOME_home' 'NUM_GAMES_LAST_6_DAYS_HOME_home'\n 'NUM_GAMES_LAST_10_DAYS_HOME_home' 'NUM_GAMES_LAST_2_DAYS_AWAY_home'\n 'NUM_GAMES_LAST_4_DAYS_AWAY_home' 'NUM_GAMES_LAST_6_DAYS_AWAY_home'\n 'NUM_GAMES_LAST_10_DAYS_AWAY_home' 'DAYS_DIFF_TOTAL_home'\n 'DAYS_DIFF_NEXT_GAME_home' 'DAYS_DIFF_NEXT_GAMES_STD_home'] not in index"