In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import geopy.distance
import gc
import warnings
warnings.filterwarnings("ignore")

import data_prep_functions

In [57]:
import pandas as pd
import numpy as np
import os
import time
from datetime import datetime
import json
import requests
import pickle

NBA_STATS_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
}

def get_date_place(game_id, year = "2017"):
    #0021700784
    try:
        resp = requests.get(url="https://data.nba.com/data/10s/v2015/json/mobile_teams/nba/" + str(year) + "/scores/gamedetail/"+ str(game_id) +"_gamedetail.json",
                            headers=NBA_STATS_HEADERS)
        data = resp.json()["g"]["gdte"]
        place = resp.json()["g"]["an"]
    except Exception as e:
        print(e)
        data = np.nan
        place = np.nan
    
    return(data, place)

def get_df_nba_json(resp_json, date_place = False, rs=0):
    dict_resp = resp_json['resultSets'][rs]
    df_resp = pd.DataFrame(dict_resp["rowSet"])
    df_resp.columns = dict_resp["headers"]
    
    if(date_place):
        game_date, game_place = get_date_place(df_resp.GAME_ID.iloc[0])

        df_resp["GAME_DATE"] = np.repeat(game_date, len(df_resp))
        df_resp["GAME_PLACE"] = np.repeat(game_place, len(df_resp))

        teams = df_resp.TEAM_ABBREVIATION.unique()

        df_resp["GAME"] = np.repeat(teams[0] + " @ " + teams[1] + " " + game_date, len(df_resp))
    
    return(df_resp)

def junta_df_tipos(df_tipos, cols_drop = ['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_CITY', 
                                          'PLAYER_ID', 'PLAYER_NAME', 'START_POSITION', 
                                          'COMMENT', 'MIN', 'MINUTES']):
    resp = df_tipos[0]
    for i in range(1, len(df_tipos)):
        junta = df_tipos[i]
        colunas_repetidas = list(set(junta.columns).intersection(resp.columns))
        
        junta = junta.drop(cols_drop, axis=1, errors="ignore")
        junta.columns = [str(col) + '_' + lista_sites[i]
                         if col in colunas_repetidas else str(col) 
                         for col in junta.columns]
        
        resp = resp.merge(junta, how="left", 
                          left_index=True, right_index=True)
    return(resp)

In [19]:
datetime.strptime(str(get_date_place("0021800270", year = "2018")[0]), '%Y-%m-%d')

datetime.datetime(2018, 11, 23, 0, 0)

In [45]:
games_ids = get_list_gameids_until() 

('0021800049', '2018-10-23')

In [44]:
def get_list_gameids_until(date_ref = datetime.today() - timedelta(1), year = '2018'):
    game_ids = []
    for i in range(1, 1231):
        gameid = "002" + year[-2:] + "0" + ('{0:0>4}'.format(i))
        
        game_date = get_date_place(gameid, year = "2018")[0]
        print((gameid, game_date), end="\r")
        if(datetime.strptime(game_date, '%Y-%m-%d') > date_ref):
            break
        
        game_ids.append(gameid)
    return(game_ids)

In [None]:
df_full, df_full_jogo, erros = get_nba_stats_data(games_ids)

In [58]:
def get_nba_stats_data(game_ids, lista_sites = ["traditional", "advanced", "scoring", 
                                                "misc", "usage", "fourfactors", "playertrack", 
                                                "hustle", "defensive"]):    
    params = {}
    
    df_full = []
    df_full_jogo = []
    erros = []
    
    year = "20" + games_ids[0][3:5]

    for game_id in game_ids:

        df_tipos = []
        df_tipos_jogo = []

        try:
            game_date, game_place = get_date_place(game_id, year)

            for site in lista_sites:
                if(site == "hustle"):
                    url = "https://stats.nba.com/stats/hustlestatsboxscore"
                    rs = 1

                elif(site == "defensive"):
                    url = "https://stats.nba.com/stats/boxscore" + site
                    rs = -1

                else:
                    url = "https://stats.nba.com/stats/boxscore" + site + "v2"
                    rs = 0

                print(game_id + " " + str(game_date) + " - " + site + "              ", end="\r")

                params["GameID"] = game_id

                resp = requests.get(url=url, params=params, headers=NBA_STATS_HEADERS)
                #time.sleep(0.5)

                if(rs == 0):
                    df_tipos.append(get_df_nba_json(resp.json(), rs=0))
                    df_tipos_jogo.append(get_df_nba_json(resp.json(), rs=1))
                elif(rs == 1):
                    df_tipos.append(get_df_nba_json(resp.json(), rs=1))
                    df_tipos_jogo.append(get_df_nba_json(resp.json(), rs=2))
                elif(rs == -1):
                    df_tipos.append(get_df_nba_json(resp.json(), rs=0))

            df_resp = junta_df_tipos(df_tipos)
            df_resp_jogo = junta_df_tipos(df_tipos_jogo)

            df_resp["GAME_DATE"] = np.repeat(game_date, len(df_resp))
            df_resp["GAME_PLACE"] = np.repeat(game_place, len(df_resp))

            df_resp_jogo["GAME_DATE"] = np.repeat(game_date, len(df_resp_jogo))
            df_resp_jogo["GAME_PLACE"] = np.repeat(game_place, len(df_resp_jogo))

            teams = df_resp.TEAM_ABBREVIATION.unique()
            game_str = teams[0] + " @ " + teams[1] + " " + game_date

            df_resp["GAME"] = np.repeat(game_str, len(df_resp))
            df_resp_jogo["GAME"] = np.repeat(game_str, len(df_resp_jogo))

            df_full.append(df_resp.set_index("GAME"))
            df_full_jogo.append(df_resp_jogo.set_index("GAME"))

            #pickle.dump(df_full, open("df_full.p", "wb"))
            #pickle.dump(df_full_jogo, open("df_full_jogo.p", "wb"))
        except Exception as e:
            erros.append(game_id)
            print(e)
            time.sleep(3)
    
    return(df_full, df_full_jogo, erros)

In [52]:
base_nba_per_game = pd.read_csv("../bases_nba_stats/base_nba_per_game_15_16.csv")

In [53]:
all_games = prepara_base(base_nba_per_game)

In [54]:
cria_features(all_games)

In [55]:
resp = gera_last_N_games(all_games, N=[5])

DEN @ POR 2016-04-13 5

In [56]:
delta_L5 = variaveis_delta(resp)

5 N_GAMES_AWAY                                  

In [58]:
delta_L5.to_csv("../bases_nba_stats/delta_cross_L5_nba_15_16.csv", sep=";")