In [24]:
import pandas as pd
import numpy as np
from datetime import datetime

In [30]:
base_nba_per_game = pd.read_csv("bases_nba_stats/base_nba_per_game_17_18_arrumada.csv", sep=";")

In [31]:
base_nba_per_game["fl_home"] = np.where(base_nba_per_game["GAME"].str[6:9] == base_nba_per_game["TEAM_ABBREVIATION"], 1, 0)

In [32]:
home_games = base_nba_per_game[base_nba_per_game["fl_home"] == 1].set_index("GAME")
away_games = base_nba_per_game[base_nba_per_game["fl_home"] == 0].set_index("GAME")

In [33]:
print("Home", home_games.shape)
print("Away", away_games.shape)

Home (1230, 112)
Away (1230, 112)


In [34]:
all_games = home_games.join(away_games, how="inner", lsuffix="_home", rsuffix="_away")
all_games.drop(["GAME_ID_away", "GAME_DATE_away", "GAME_PLACE_away",
                "MIN_home", "MIN_away", 'PTS_hustle_home', 'PTS_hustle_away',
                "fl_home_away", "fl_home_home"], axis=1, inplace=True)

In [35]:
all_games[["TEAM_ABBREVIATION_away", "PTS_away", "TEAM_ABBREVIATION_home", "PTS_home"]].head()

Unnamed: 0_level_0,TEAM_ABBREVIATION_away,PTS_away,TEAM_ABBREVIATION_home,PTS_home
GAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BOS @ CLE 2017-10-17,BOS,99,CLE,102
HOU @ GSW 2017-10-17,HOU,122,GSW,121
CHA @ DET 2017-10-18,CHA,90,DET,102
BKN @ IND 2017-10-18,BKN,131,IND,140
MIA @ ORL 2017-10-18,MIA,109,ORL,116


In [36]:
def get_season(date):
    ano = date.year
    if(date.month >= 10):
        return ano + 1
    return ano

def is_playoff(date):
    # Playoffs 2016
    if date >= datetime(2016, 4, 16) and date < datetime(2016, 6, 30):
        return 1
    
    # Playoffs 2017
    elif date >= datetime(2017, 4, 15) and date < datetime(2017, 6, 30):
        return 1
    
     # Playoffs 2018
    elif date >= datetime(2018, 4, 14) and date < datetime(2018, 6, 30):
        return 1
    
    return 0

In [52]:
all_games["DATE"] = [datetime.strptime(str(x), '%d/%m/%Y') for x in all_games.GAME_DATE_home]
all_games["SEASON"] = [get_season(x) for x in all_games.DATE]
all_games["fl_playoff"] = [is_playoff(x) for x in all_games.DATE]
all_games['fl_home_win'] = np.where(all_games['PTS_home'] > all_games['PTS_away'], 1, 0)
all_games = all_games.sort_values('DATE')

In [39]:
all_games = all_games.rename(columns={'TEAM_ABBREVIATION_home': "team_home",
                                     'TEAM_ABBREVIATION_away': "team_away"})

In [54]:
all_games["team_home_game_num"] = all_games.groupby(['team_home']).cumcount() + 1
all_games["team_away_game_num"] = all_games.groupby(['team_away']).cumcount() + 1

In [55]:
all_games.to_csv("./bases_nba_stats/all_games_nba_17_18.csv")

### Últimos 5 Jogos

In [98]:
home_columns = [x for x in all_games.columns if x.endswith("_home") and x not in ['GAME_ID_home', 'TEAM_CITY_home', 'GAME_DATE_home', 'GAME_PLACE_home', 'TEAM_NICKNAME_home']]
away_columns = [x for x in all_games.columns if x.endswith("_away") and x not in ['TEAM_CITY_away', 'TEAM_NICKNAME_away']]

In [99]:
home_columns += ["WIN_HOME_TOTAL", "WIN_AWAY_TOTAL", "WIN_TOTAL"]
away_columns += ["WIN_HOME_TOTAL", "WIN_AWAY_TOTAL", "WIN_TOTAL"]

In [100]:
def get_last_games(df, data, team_name, n = 5, filter="all", verbose=False):
    if(filter == "all"):
        last_games = df[(df["DATE"] < data) & 
                        ((df["team_home"] == team_name) | (df["team_away"] == team_name))].tail(n)
        
    elif(filter == "home"):
        last_games = df[(df["DATE"] < data) & (df["team_home"] == team_name)].tail(n)
    elif(filter == "away"):
        last_games = df[(df["DATE"] < data) & (df["team_away"] == team_name)].tail(n)
    
    last_games["WIN_HOME_TOTAL"] = np.where((last_games["team_home"] == team_name) & (last_games["fl_home_win"] == 1) , 1, 0)
    
    last_games["WIN_AWAY_TOTAL"] = np.where((last_games["team_away"] == team_name) & (last_games["fl_home_win"] == 0), 1, 0)
    
    last_games["WIN_TOTAL"] = np.where(((last_games["team_home"] == team_name) & (last_games["fl_home_win"] == 1)) | 
                                            ((last_games["team_away"] == team_name) & (last_games["fl_home_win"] == 0)), 1, 0)
    
    if(verbose):
        print(last_games[["team_home", "team_away", "PTS_home", "PTS_away", "WIN_TOTAL", "WIN_AWAY_TOTAL", "WIN_HOME_TOTAL"]])
    
    return(last_games)

def get_avg_last_games(last_games, team_name, n = 5, rivals = False):
    
    if(rivals == False):
        last_games_home = last_games[last_games["team_home"] == team_name][home_columns].groupby(["team_home"]).sum()
        last_games_away = last_games[last_games["team_away"] == team_name][away_columns].groupby(["team_away"]).sum()
        last_games_home.columns = [x.replace("_home","") for x in last_games_home.columns]
        last_games_away.columns = [x.replace("_away","") for x in last_games_away.columns]
    else:
        last_games_home = last_games[last_games["team_home"] != team_name][home_columns].drop("team_home", axis = 1).sum()
        last_games_away = last_games[last_games["team_away"] != team_name][away_columns].drop("team_away", axis = 1).sum()
        last_games_home.index = [x.replace("_home","_opponent") for x in last_games_home.index]
        last_games_away.index = [x.replace("_away","_opponent") for x in last_games_away.index]
    
    if(len(last_games_home) == 0):
        if(rivals):
            return (last_games_away/n).to_frame().transpose()
        else:
            return (last_games_away/n)
    
    if(len(last_games_away) == 0):
        if(rivals):
            return(last_games_home/n).to_frame().transpose()
        else:
            return(last_games_home/n)
    
    if(rivals):
        return((last_games_home + last_games_away) / n).to_frame().transpose()
    else:
        return((last_games_home + last_games_away) / n)    

In [101]:
teste = all_games[all_games["team_home"] == "ATL"]

In [102]:
teste = get_last_games(all_games, teste.iloc[10]["DATE"], "ATL", verbose=True).reset_index()

                     team_home team_away  PTS_home  PTS_away  WIN_TOTAL  \
GAME                                                                      
LAC @ ATL 2017-11-22       ATL       LAC       103       116          0   
NYK @ ATL 2017-11-24       ATL       NYK       116       104          1   
TOR @ ATL 2017-11-25       ATL       TOR        78       112          0   
CLE @ ATL 2017-11-30       ATL       CLE       114       121          0   
ATL @ BKN 2017-12-02       BKN       ATL       102       114          1   

                      WIN_AWAY_TOTAL  WIN_HOME_TOTAL  
GAME                                                  
LAC @ ATL 2017-11-22               0               0  
NYK @ ATL 2017-11-24               0               1  
TOR @ ATL 2017-11-25               0               0  
CLE @ ATL 2017-11-30               0               0  
ATL @ BKN 2017-12-02               1               0  


In [103]:
get_avg_last_games(teste, "ATL", rivals=False)

Unnamed: 0_level_0,TEAM_ID,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,...,CONTESTED_SHOTS_2PT,CONTESTED_SHOTS_3PT,DEFLECTIONS,LOOSE_BALLS_RECOVERED,CHARGES_DRAWN,SCREEN_ASSISTS,BOX_OUTS,WIN_HOME_TOTAL,WIN_AWAY_TOTAL,WIN_TOTAL
team_home,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ATL,1610613000.0,40.2,87.0,0.4596,10.2,29.0,0.3486,14.4,17.6,0.8068,...,31.4,27.4,14.8,9.2,1.4,11.8,33.4,0.2,0.2,0.4


In [87]:
teste["ATL_win"] = np.where(((teste["team_home"] == "ATL") & (teste["fl_home_win"] == 1)) | 
                            ((teste["team_away"] == "ATL") & (teste["fl_home_win"] == 0)), 1, 0)