In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import geopy.distance

In [2]:
base_nba_per_game = pd.read_csv("bases_nba_stats/base_nba_per_game_17_18_arrumada.csv", sep=";")

In [31]:
base_nba_per_game["fl_home"] = np.where(base_nba_per_game["GAME"].str[6:9] == base_nba_per_game["TEAM_ABBREVIATION"], 1, 0)

In [32]:
home_games = base_nba_per_game[base_nba_per_game["fl_home"] == 1].set_index("GAME")
away_games = base_nba_per_game[base_nba_per_game["fl_home"] == 0].set_index("GAME")

In [33]:
print("Home", home_games.shape)
print("Away", away_games.shape)

Home (1230, 112)
Away (1230, 112)


In [34]:
all_games = home_games.join(away_games, how="inner", lsuffix="_home", rsuffix="_away")
all_games.drop(["GAME_ID_away", "GAME_DATE_away", "GAME_PLACE_away",
                "MIN_home", "MIN_away", 'PTS_hustle_home', 'PTS_hustle_away',
                "fl_home_away", "fl_home_home"], axis=1, inplace=True)

In [35]:
all_games[["TEAM_ABBREVIATION_away", "PTS_away", "TEAM_ABBREVIATION_home", "PTS_home"]].head()

Unnamed: 0_level_0,TEAM_ABBREVIATION_away,PTS_away,TEAM_ABBREVIATION_home,PTS_home
GAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BOS @ CLE 2017-10-17,BOS,99,CLE,102
HOU @ GSW 2017-10-17,HOU,122,GSW,121
CHA @ DET 2017-10-18,CHA,90,DET,102
BKN @ IND 2017-10-18,BKN,131,IND,140
MIA @ ORL 2017-10-18,MIA,109,ORL,116


In [12]:
def get_season(date):
    ano = date.year
    if(date.month >= 10):
        return ano + 1
    return ano

def is_playoff(date):
    # Playoffs 2016
    if date >= datetime(2016, 4, 16) and date < datetime(2016, 6, 30):
        return 1
    
    # Playoffs 2017
    elif date >= datetime(2017, 4, 15) and date < datetime(2017, 6, 30):
        return 1
    
     # Playoffs 2018
    elif date >= datetime(2018, 4, 14) and date < datetime(2018, 6, 30):
        return 1
    
    return 0

In [13]:
all_games["DATE"] = [datetime.strptime(str(x), '%d/%m/%Y') for x in all_games.GAME_DATE_home]
all_games["SEASON"] = [get_season(x) for x in all_games.DATE]
all_games["fl_playoff"] = [is_playoff(x) for x in all_games.DATE]
all_games['fl_home_win'] = np.where(all_games['PTS_home'] > all_games['PTS_away'], 1, 0)
all_games = all_games.sort_values('DATE')

In [39]:
all_games = all_games.rename(columns={'TEAM_ABBREVIATION_home': "team_home",
                                     'TEAM_ABBREVIATION_away': "team_away"})

In [54]:
all_games["team_home_game_num"] = all_games.groupby(['team_home']).cumcount() + 1
all_games["team_away_game_num"] = all_games.groupby(['team_away']).cumcount() + 1

In [14]:
all_games.to_csv("./bases_nba_stats/all_games_nba_17_18.csv")

### Matriz de Distância entre os Times

In [12]:
base_lat_long = pd.read_excel("./lat_long_teams.xlsx").drop("team_id", axis=1)

In [38]:
base_lat_long.head()

Unnamed: 0,abreviation,team_name,lat,lon
0,MIA,Miami Heat,25.7814,-80.1878
1,ORL,Orlando Magic,28.5392,-81.3836
2,SAS,San Antonio Spurs,29.4269,-98.4375
3,HOU,Houston Rockets,29.7508,-95.3622
4,NOP,New Orleans Pelicans,29.9489,-90.0822


In [27]:
teams = base_lat_long.abreviation.unique()

In [35]:
teams = base_lat_long.abreviation.unique()
df_dist = pd.DataFrame(columns=teams, index=teams)

for team in teams:
    for team_2 in teams:
        team1_location = (base_lat_long[base_lat_long.abreviation == team].lat.iloc[0], 
                          base_lat_long[base_lat_long.abreviation == team].lon.iloc[0])
        
        team2_location = (base_lat_long[base_lat_long.abreviation == team_2].lat.iloc[0], 
                          base_lat_long[base_lat_long.abreviation == team_2].lon.iloc[0])
        
        df_dist.loc[team, team_2] = geopy.distance.vincenty(team1_location, team2_location).km        

In [37]:
df_dist.to_csv("dist_matrix_km.csv")

### Cria features de Número de Jogos

In [65]:
all_games = pd.read_csv("./bases_nba_stats/all_games_nba_17_18.csv").drop("Unnamed: 0", axis=1)

In [66]:
all_games["DATE"] = [datetime.strptime(str(x), '%d/%m/%Y') for x in all_games.GAME_DATE_home]

In [67]:
def get_dist_last_game(df, data, team_home, team_away, is_home=True):    
    """
        Retorna a distância em KM percorrida pelo time específico para chegar a um jogo
    """
    
    if(is_home):
        last_game = get_last_games(df, data, team_home, n = 1)
        
        if(len(last_game) == 0):
            return(0)
        
        if (last_game.team_home.iloc[0] == team_home):
            return(0)
        else:
            return(df_dist.loc[team_home, last_game.team_home.iloc[0]])        
    else:
        last_game = get_last_games(df, data, team_away, n = 1)
        
        if(len(last_game) == 0):
            return(df_dist.loc[team_home, team_away])
        
        if (last_game.team_away.iloc[0] == team_away):
            return(df_dist.loc[team_home, last_game.team_away.iloc[0]])
        else:
            return(df_dist.loc[team_home, team_away])    

In [68]:
all_games["DISTANCE_KM_home"] = [get_dist_last_game(all_games, x.DATE, x.team_home, x.team_away, is_home=True) for x in all_games.itertuples()]
all_games["DISTANCE_KM_away"] = [get_dist_last_game(all_games, x.DATE, x.team_home, x.team_away, is_home=False) for x in all_games.itertuples()]

In [69]:
all_games.head()

Unnamed: 0,GAME,GAME_ID_home,TEAM_ID_home,TEAM_NAME_home,team_home,TEAM_CITY_home,FGM_home,FGA_home,FG_PCT_home,FG3M_home,...,SCREEN_ASSISTS_away,BOX_OUTS_away,DATE,SEASON,fl_playoff,team_home_game_num,team_away_game_num,fl_home_win,DISTANCE_KM_home,DISTANCE_KM_away
0,BOS @ CLE 2017-10-17,21700001,1610612739,Cavaliers,CLE,Cleveland,38,83,0.458,5,...,9,38,2017-10-17,2018,0,1,1,1,0.0,885.985871
1,HOU @ GSW 2017-10-17,21700002,1610612744,Warriors,GSW,Golden State,43,80,0.537,16,...,7,21,2017-10-17,2018,0,1,1,0,0.0,2629.261668
2,NOP @ MEM 2017-10-18,21700008,1610612763,Grizzlies,MEM,Memphis,39,92,0.424,9,...,4,17,2017-10-18,2018,0,1,1,1,0.0,575.494941
3,HOU @ SAC 2017-10-18,21700013,1610612758,Kings,SAC,Sacramento,42,88,0.477,8,...,5,28,2017-10-18,2018,0,1,2,0,0.0,2593.376343
4,DEN @ UTA 2017-10-18,21700010,1610612762,Jazz,UTA,Utah,41,81,0.506,9,...,7,21,2017-10-18,2018,0,1,1,1,0.0,597.057075


### Últimos 5 Jogos

In [5]:
home_columns = [x for x in all_games.columns if x.endswith("_home") and x not in ['GAME_ID_home', 'TEAM_CITY_home', 'GAME_DATE_home', 'GAME_PLACE_home', 'TEAM_NICKNAME_home']]
away_columns = [x for x in all_games.columns if x.endswith("_away") and x not in ['TEAM_CITY_away', 'TEAM_NICKNAME_away']]

In [6]:
# "['WIN_HOME_away' 'WIN_AWAY_away' 'WIN_away' 'WIN_HOME_PCT_away'\n 'WIN_AWAY_PCT_away'
# 'WIN_PCT_away' 'NUM_GAMES_HOME_away'\n 'NUM_GAMES_AWAY_away' 'DAYS_DIFF_TOTAL_away' 
# 'DAYS_DIFF_NEXT_GAME_away'\n 'DAYS_DIFF_NEXT_GAMES_STD_away']

In [7]:
# extra_columns = []

# home_columns += [x + "_home" for x in extra_columns]
# away_columns += [x + "_away" for x in extra_columns]

In [8]:
def cria_variaveis_sumarizacao(last_games, team_name, n = 5, data_ref = None, verbose = False):
    resp = {}
    
    # Cria variávies de Win %
    # Visão home
    resp["num_wins_home"] = [np.where((last_games["team_home"] == team_name) &
                             (last_games["fl_home_win"] == 1) , 1, 0).sum()]
    resp["num_games_home"] = [np.where(last_games["team_home"] == team_name, 1, 0).sum()]
    resp["win_home_pct"] = [(resp["num_wins_home"][0] / resp["num_games_home"][0])]
    
    # Visão Away
    resp["num_wins_away"] = [np.where((last_games["team_away"] == team_name) & 
                                      (last_games["fl_home_win"] == 0), 1, 0).sum()]
    resp["num_games_away"] = [np.where(last_games["team_away"] == team_name, 1, 0).sum()]
    resp["win_away_pct"] = [(resp["num_wins_away"][0] / resp["num_games_away"][0])]
    
    # Visão geral
    resp["num_wins_total"] = [resp["num_wins_away"][0] + resp["num_wins_home"][0]]
    resp["win_pct"] = [resp["num_wins_total"][0]/n]
    
    if verbose:
        print("Win_PCT", resp["win_pct"][0], resp["num_wins_away"][0], resp["num_wins_home"][0])
    
    # Cria variáveis de data
    if(data_ref is None):
        data_ref = np.max(last_games["DATE"]) + timedelta(days=1)
    
    # Visão da série
    resp["total_days_diff"] = [(np.max(last_games["DATE"]) - np.min(last_games["DATE"])).days]
    days_diff_last_games = [-x.days if not np.isnan(x.days) else 0 
                            for x in last_games["DATE"].sub(last_games["DATE"].shift(-1).fillna(data_ref))]
    resp["days_diff_last_games_std"] = [np.std(days_diff_last_games)]
    resp["days_diff_last_games_mean"] = [np.mean(days_diff_last_games)]
    
    if verbose:
        print("Days_Diff", days_diff_last_games, resp["days_diff_last_games_mean"][0], resp["total_days_diff"][0]) 
    
    # All
    resp["num_games_last_2_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=2))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
    
    resp["num_games_last_4_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=4))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
                             
    resp["num_games_last_6_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=6))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
    
    resp["num_games_last_8_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=8))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
    
    resp["num_games_last_10_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=10))) & 
                            (last_games["DATE"] < data_ref) &
                            ((last_games["team_home"] == team_name) |
                             (last_games["team_away"] == team_name)), 1, 0).sum()]
    
    # Away
    resp["num_games_away_last_2_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=2))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()]
    
    resp["num_games_away_last_4_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=4))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()]
    
    resp["num_games_away_last_6_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=6))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()] 
    
    resp["num_games_away_last_8_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=8))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()]
    
    resp["num_games_away_last_8_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=10))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_away"] == team_name), 1, 0).sum()]
    
    # Home    
    resp["num_games_home_last_2_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=2))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    resp["num_games_home_last_4_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=4))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    resp["num_games_home_last_6_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=6))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    resp["num_games_home_last_8_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=8))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    resp["num_games_home_last_10_days"] = [np.where((last_games["DATE"] >= (data_ref - timedelta(days=10))) & 
                                    (last_games["DATE"] < data_ref) &
                                    (last_games["team_home"] == team_name), 1, 0).sum()]
    
    if verbose:
        print("Num Games Last X Days", resp["num_games_last_6_days"][0], resp["num_games_away_last_6_days"][0])
    
    return(pd.DataFrame(resp))

In [49]:
def get_last_games(df, data, team_name, n = 5, filter="all", verbose=False):
    if(filter == "all"):
        last_games = df[(df["DATE"] < data) & 
                        ((df["team_home"] == team_name) | (df["team_away"] == team_name))].tail(n)
        
    elif(filter == "home"):
        last_games = df[(df["DATE"] < data) & (df["team_home"] == team_name)].tail(n)
    elif(filter == "away"):
        last_games = df[(df["DATE"] < data) & (df["team_away"] == team_name)].tail(n)

    if(verbose):
        print(last_games[["team_home", "team_away", "PTS_home", "PTS_away", "DATE"]])
    
    return(last_games)

def get_avg_last_games(last_games, team_name, n = 5, data_ref = None, rivals = False, 
                       to_drop=['fl_home_win', 'fl_playoff', 'fl_win','index', 'team_away_game_num',
                                'team_game_num', 'team_home_game_num', "TEAM_CITY_away", "TEAM_CITY_home",
                                "TEAM_CITY_home", "TEAM_ID_home", "TEAM_NAME_advanced_home",
                                "TEAM_NAME_fourfactors_home", "TEAM_NAME_hustle_home",
                                "TEAM_NAME_misc_home", "TEAM_NAME_home", "TEAM_NAME_playertrack_home",
                                "TEAM_NAME_scoring_home", 'TEAM_ID_home', 'TEAM_ID_away', "TEAM_NAME_advanced_away",
                                "TEAM_NAME_fourfactors_away", "TEAM_NAME_hustle_away",
                                "TEAM_NAME_misc_away", "TEAM_NAME_away", "TEAM_NAME_playertrack_away",
                                "TEAM_NAME_scoring_away", "DATE", "SEASON", 'GAME', 'GAME_ID_home', 'GAME_ID_away']):
    
    if(rivals == False):
        last_games_home = last_games[last_games["team_home"] == team_name].drop(away_columns + to_drop, axis=1, errors="ignore").groupby(["team_home"]).sum()
        last_games_away = last_games[last_games["team_away"] == team_name].drop(home_columns + to_drop, axis=1, errors="ignore").groupby(["team_away"]).sum()
        last_games_home.columns = [x.replace("_home","") for x in last_games_home.columns]
        last_games_away.columns = [x.replace("_away","") for x in last_games_away.columns]
    else:
        last_games_home = last_games[last_games["team_home"] != team_name].drop(away_columns + to_drop, axis=1, errors="ignore").drop("team_home", axis = 1).sum()
        last_games_away = last_games[last_games["team_away"] != team_name].drop(home_columns + to_drop, axis=1, errors="ignore").drop("team_away", axis = 1).sum()
          
        last_games_home.index = [x.replace("_home","_opponent") for x in last_games_home.index]
        last_games_away.index = [x.replace("_away","_opponent") for x in last_games_away.index]
    
    if(len(last_games_home) == 0):
        if(rivals):
            resp = (last_games_away/n).to_frame().transpose()
        else:
            resp = last_games_away/n
    
    if(len(last_games_away) == 0):
        if(rivals):
            resp = (last_games_home/n).to_frame().transpose()
        else:
            resp = last_games_home/n
            
    if(len(last_games_away) > 0 and len(last_games_home) > 0):
        
        if(rivals):
            resp = ((last_games_home + last_games_away) / n).to_frame().transpose()
        else:
            resp = ((last_games_home + last_games_away) / n)    
    
    if not rivals:
        var_criadas = cria_variaveis_sumarizacao(last_games, team_name, n = 5, data_ref = None)
        resp = pd.concat([resp, var_criadas], axis=1).sum().to_frame().transpose()
    
    return(resp)

In [10]:
teste = all_games[all_games["team_home"] == "ATL"]
teste = get_last_games(all_games, teste.iloc[10]["DATE"], "ATL", verbose=False).reset_index()

In [11]:
teste

Unnamed: 0,index,GAME,GAME_ID_home,TEAM_ID_home,TEAM_NAME_home,team_home,TEAM_CITY_home,FGM_home,FGA_home,FG_PCT_home,...,LOOSE_BALLS_RECOVERED_away,CHARGES_DRAWN_away,SCREEN_ASSISTS_away,BOX_OUTS_away,DATE,SEASON,fl_playoff,team_home_game_num,team_away_game_num,fl_home_win
0,259,LAC @ ATL 2017-11-22,21700256,1610612737,Hawks,ATL,Atlanta,40,83,0.482,...,8,2,4,19,2017-11-22,2018,0,7,9,0
1,270,NYK @ ATL 2017-11-24,21700268,1610612737,Hawks,ATL,Atlanta,47,99,0.475,...,6,0,7,21,2017-11-24,2018,0,8,6,1
2,278,TOR @ ATL 2017-11-25,21700280,1610612737,Hawks,ATL,Atlanta,26,75,0.347,...,6,2,8,17,2017-11-25,2018,0,9,12,0
3,311,CLE @ ATL 2017-11-30,21700313,1610612737,Hawks,ATL,Atlanta,41,79,0.519,...,11,1,9,47,2017-11-30,2018,0,10,11,0
4,330,ATL @ BKN 2017-12-02,21700328,1610612751,Nets,BKN,Brooklyn,34,82,0.415,...,13,3,18,42,2017-12-02,2018,0,10,12,0


In [12]:
cria_variaveis_sumarizacao(teste, "ATL", verbose = True)

Win_PCT 0.4 1 1
Days_Diff [2, 1, 5, 2, 1] 2.2 10
Num Games Last X Days 2 1


Unnamed: 0,num_wins_home,num_games_home,win_home_pct,num_wins_away,num_games_away,win_away_pct,num_wins_total,win_pct,total_days_diff,days_diff_last_games_std,...,num_games_last_10_days,num_games_away_last_2_days,num_games_away_last_4_days,num_games_away_last_6_days,num_games_away_last_8_days,num_games_home_last_2_days,num_games_home_last_4_days,num_games_home_last_6_days,num_games_home_last_8_days,num_games_home_last_10_days
0,1,4,0.25,1,1,1.0,2,0.4,10,1.469694,...,4,1,1,1,1,0,1,1,2,3


In [13]:
resp = get_avg_last_games(teste, "ATL", rivals=True)

In [14]:
resp

Unnamed: 0,AST_PCT_opponent,AST_RATIO_opponent,AST_TOV_opponent,AST_opponent,AST_playertrack_opponent,BLKA_opponent,BLK_misc_opponent,BLK_opponent,BOX_OUTS_opponent,CFGA_opponent,...,STL_opponent,TCHS_opponent,TM_TOV_PCT_fourfactors_opponent,TM_TOV_PCT_opponent,TO_opponent,TS_PCT_opponent,UFGA_opponent,UFGM_opponent,UFG_PCT_opponent,USG_PCT_opponent
0,0.7206,21.72,1.614,29.4,29.4,2.8,5.8,5.8,28,32,...,5.6,442,0.1894,18.955,17.8,0.6336,48,22.4,0.4672,0.1988


In [15]:
resp = []
season = all_games.reset_index()

for index, row in season.iterrows():
#for index, row in regular_season.groupby("season").first().iterrows():
        # Home team
        home_last_games = get_last_games(season, row["DATE"], row["team_home"])
        home_avg_last_games = get_avg_last_games(home_last_games, row["team_home"])
        home_rivals_last_games = get_avg_last_games(home_last_games, row["team_home"], rivals=True)

        home_avg_last_games["game_ref"] = [row.GAME]
        home_avg_last_games.set_index("game_ref", inplace=True)
        home_avg_last_games.drop(["team_home", "team_away"],axis=1 ,errors="ignore", inplace=True)

        home_rivals_last_games["game_ref"] = [row.GAME]
        home_rivals_last_games.set_index("game_ref", inplace=True)
        home_rivals_last_games.drop(["team_home", "team_away"],axis=1 ,errors="ignore", inplace=True)

        #print(home_rivals_last_games.index, home_avg_last_games.index)

        # Away team
        away_last_games = get_last_games(season, row["DATE"], row["team_away"]).reset_index()
        away_avg_last_games = get_avg_last_games(away_last_games, row["team_away"])
        away_rivals_last_games = get_avg_last_games(away_last_games, row["team_away"], rivals=True)

        away_avg_last_games["game_ref"] = [row.GAME]
        away_avg_last_games.set_index("game_ref", inplace=True)
        away_avg_last_games.drop(["team_home", "team_away"],axis=1 ,errors="ignore", inplace=True)

        away_rivals_last_games["game_ref"] = [row.GAME]
        away_rivals_last_games.set_index("game_ref", inplace=True)
        away_rivals_last_games.drop(["team_home", "team_away"],axis=1 ,errors="ignore", inplace=True)

        #print(away_rivals_last_games.index, away_avg_last_games.index)

        # Junta bases 
        rivals_last_games = home_rivals_last_games.join(away_rivals_last_games, how="inner",
                                            lsuffix='_home_last_5_games', rsuffix='_away_last_5_games').drop('level_0',
                                                                                                             axis=1)
    
        avg_last_games = home_avg_last_games.join(away_avg_last_games, how="inner", 
                             lsuffix='_home_last_5_games', rsuffix='_away_last_5_games')

        
        #print(rivals_last_games.columns)
        
        game_line = avg_last_games.join(rivals_last_games,
                                        how="inner")

        game_line = pd.concat([row.to_frame().transpose().set_index("GAME"), game_line], axis=1)

        print(str(row.GAME), end="\r")
        
        resp.append(game_line)

  if __name__ == '__main__':
  from ipykernel import kernelapp as app
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


HOU @ SAC 2018-04-11

In [16]:
df_resp = pd.concat(resp)

In [24]:
df_resp.drop('level_0', axis=1, inplace=True)

In [29]:
df_resp["week"] = [x.isocalendar()[1] for x in df_resp.DATE]

In [31]:
df_resp.to_csv("bases_nba_stats/base_last_5_+_jogos.csv")

### Cria base de Modelagem

In [34]:
to_drop_modelagem = ['FGM_home', 'FGA_home', 'FG_PCT_home', 'FG3M_home', 'FG3A_home', 
                     'FG3_PCT_home', 'FTM_home', 'FTA_home', 'FT_PCT_home', 'OREB_home', 
                     'DREB_home', 'REB_home', 'AST_home', 'STL_home', 'BLK_home', 'TO_home',
                     'PF_home', 'TEAM_NAME_advanced_home', 'AST_PCT_home',
                     'AST_TOV_home', 'AST_RATIO_home', 'OREB_PCT_home', 'DREB_PCT_home',
                     'REB_PCT_home', 'TM_TOV_PCT_home', 'EFG_PCT_home', 'TS_PCT_home', 
                     'USG_PCT_home', 'PACE_home', 'PIE_home', 'TEAM_NAME_scoring_home', 
                     'PCT_FGA_2PT_home', 'PCT_FGA_3PT_home', 'PCT_PTS_2PT_home', 'PCT_PTS_2PT_MR_home', 
                     'PCT_PTS_3PT_home', 'PCT_PTS_FB_home', 'PCT_PTS_FT_home', 'PCT_PTS_OFF_TOV_home', 
                     'PCT_PTS_PAINT_home', 'PCT_AST_2PM_home', 'PCT_UAST_2PM_home', 'PCT_AST_3PM_home', 
                     'PCT_UAST_3PM_home', 'PCT_AST_FGM_home', 'PCT_UAST_FGM_home', 'TEAM_NAME_misc_home',
                     'PTS_OFF_TOV_home', 'PTS_2ND_CHANCE_home', 'PTS_FB_home', 'PTS_PAINT_home', 
                     'OPP_PTS_OFF_TOV_home', 'OPP_PTS_2ND_CHANCE_home', 'OPP_PTS_FB_home', 
                     'OPP_PTS_PAINT_home', 'BLK_misc_home', 'BLKA_home', 'PF_misc_home', 'PFD_home', 
                     'TEAM_NAME_fourfactors_home', 'EFG_PCT_fourfactors_home', 'FTA_RATE_home', 
                     'TM_TOV_PCT_fourfactors_home', 'OREB_PCT_fourfactors_home', 'OPP_EFG_PCT_home',
                     'OPP_FTA_RATE_home', 'OPP_TOV_PCT_home', 'OPP_OREB_PCT_home', 'TEAM_NAME_playertrack_home',
                     'DIST_home', 'ORBC_home', 'DRBC_home', 'RBC_home', 'TCHS_home', 'SAST_home', 'FTAST_home', 
                     'PASS_home', 'AST_playertrack_home', 'CFGM_home', 'CFGA_home', 'CFG_PCT_home', 'UFGM_home',
                     'UFGA_home', 'UFG_PCT_home', 'FG_PCT_playertrack_home', 'DFGM_home', 'DFGA_home', 
                     'DFG_PCT_home', 'TEAM_NAME_hustle_home', 'CONTESTED_SHOTS_home', 'CONTESTED_SHOTS_2PT_home', 
                     'CONTESTED_SHOTS_3PT_home', 'DEFLECTIONS_home', 'LOOSE_BALLS_RECOVERED_home', 'CHARGES_DRAWN_home',
                     'SCREEN_ASSISTS_home', 'BOX_OUTS_home', 'FGM_away', 'FGA_away', 'FG_PCT_away', 
                     'FG3M_away', 'FG3A_away', 'FG3_PCT_away', 'FTM_away', 'FTA_away', 'FT_PCT_away', 'OREB_away',
                     'DREB_away', 'REB_away', 'AST_away', 'STL_away', 'BLK_away', 'TO_away', 'PF_away',
                     'PLUS_MINUS_away', 'TEAM_NAME_advanced_away', 'AST_PCT_away', 'AST_TOV_away',
                     'AST_RATIO_away', 'OREB_PCT_away',
                     'DREB_PCT_away', 'REB_PCT_away', 'TM_TOV_PCT_away', 'EFG_PCT_away', 'TS_PCT_away', 
                     'USG_PCT_away', 'PACE_away', 'PIE_away', 'TEAM_NAME_scoring_away', 'PCT_FGA_2PT_away',
                     'PCT_FGA_3PT_away', 'PCT_PTS_2PT_away', 'PCT_PTS_2PT_MR_away', 'PCT_PTS_3PT_away', 
                     'PCT_PTS_FB_away', 'PCT_PTS_FT_away', 'PCT_PTS_OFF_TOV_away', 'PCT_PTS_PAINT_away',
                     'PCT_AST_2PM_away', 'PCT_UAST_2PM_away', 'PCT_AST_3PM_away', 'PCT_UAST_3PM_away', 
                     'PCT_AST_FGM_away', 'PCT_UAST_FGM_away', 'TEAM_NAME_misc_away', 'PTS_OFF_TOV_away', 
                     'PTS_2ND_CHANCE_away', 'PTS_FB_away', 'PTS_PAINT_away', 'OPP_PTS_OFF_TOV_away', 
                     'OPP_PTS_2ND_CHANCE_away', 'OPP_PTS_FB_away', 'OPP_PTS_PAINT_away', 'BLK_misc_away',
                     'BLKA_away', 'PF_misc_away', 'PFD_away', 'TEAM_NAME_fourfactors_away', 'EFG_PCT_fourfactors_away', 
                     'FTA_RATE_away', 'TM_TOV_PCT_fourfactors_away', 'OREB_PCT_fourfactors_away', 'OPP_EFG_PCT_away', 
                     'OPP_FTA_RATE_away', 'OPP_TOV_PCT_away', 'OPP_OREB_PCT_away', 'TEAM_NAME_playertrack_away',
                     'DIST_away', 'ORBC_away', 'DRBC_away', 'RBC_away', 'TCHS_away', 'SAST_away', 'FTAST_away', 
                     'PASS_away', 'AST_playertrack_away', 'CFGM_away', 'CFGA_away', 'CFG_PCT_away', 'UFGM_away',
                     'UFGA_away', 'UFG_PCT_away', 'FG_PCT_playertrack_away', 'DFGM_away', 'DFGA_away', 'DFG_PCT_away', 
                     'TEAM_NAME_hustle_away', 'CONTESTED_SHOTS_away', 'CONTESTED_SHOTS_2PT_away', 
                     'CONTESTED_SHOTS_3PT_away', 'DEFLECTIONS_away', 'LOOSE_BALLS_RECOVERED_away', 
                     'CHARGES_DRAWN_away', 'SCREEN_ASSISTS_away', 'BOX_OUTS_away']

In [40]:
df_modelagem = df_resp.drop(to_drop_modelagem, axis=1).rename(columns={"week":"year_week"})

In [41]:
df_modelagem.to_csv("bases_nba_stats/base_modelagem_nba_17_18.csv")

In [42]:
df_truncada = df_modelagem.copy()
df_truncada[df_truncada.select_dtypes(include=['float64']).columns] = df_truncada[df_truncada.select_dtypes(include=['float64']).columns].applymap('{:,.4f}'.format)
df_truncada.to_csv("bases_nba_stats/base_modelagem_nba_17-18_4_digitos.csv")