In [109]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import geopy.distance
import gc
import warnings
warnings.filterwarnings("ignore")

%load_ext autoreload
%autoreload 2

import data_prep_functions

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
import pandas as pd
import numpy as np
import os
import time
from datetime import datetime
import json
import requests
import pickle

NBA_STATS_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
}

def get_date_place(game_id, year = "2017"):
    #0021700784
    try:
        resp = requests.get(url="https://data.nba.com/data/10s/v2015/json/mobile_teams/nba/" + str(year) + "/scores/gamedetail/"+ str(game_id) +"_gamedetail.json",
                            headers=NBA_STATS_HEADERS)
        data = resp.json()["g"]["gdte"]
        place = resp.json()["g"]["an"]
    except Exception as e:
        print(e)
        data = np.nan
        place = np.nan
    
    return(data, place)

def get_df_nba_json(resp_json, date_place = False, rs=0):
    dict_resp = resp_json['resultSets'][rs]
    df_resp = pd.DataFrame(dict_resp["rowSet"])
    df_resp.columns = dict_resp["headers"]
    
    if(date_place):
        game_date, game_place = get_date_place(df_resp.GAME_ID.iloc[0])

        df_resp["GAME_DATE"] = np.repeat(game_date, len(df_resp))
        df_resp["GAME_PLACE"] = np.repeat(game_place, len(df_resp))

        teams = df_resp.TEAM_ABBREVIATION.unique()

        df_resp["GAME"] = np.repeat(teams[0] + " @ " + teams[1] + " " + game_date, len(df_resp))
    
    return(df_resp)

def junta_df_tipos(df_tipos, cols_drop = ['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_CITY', 
                                          'PLAYER_ID', 'PLAYER_NAME', 'START_POSITION', 
                                          'COMMENT', 'MIN', 'MINUTES'],
                              lista_sites = ["traditional", "advanced", "scoring", 
                                                "misc", "usage", "fourfactors", "playertrack", 
                                                "hustle", "defensive"]):
    resp = df_tipos[0]
    for i in range(1, len(df_tipos)):
        junta = df_tipos[i]
        colunas_repetidas = list(set(junta.columns).intersection(resp.columns))
        
        junta = junta.drop(cols_drop, axis=1, errors="ignore")
        junta.columns = [str(col) + '_' + lista_sites[i]
                         if col in colunas_repetidas else str(col) 
                         for col in junta.columns]
        
        resp = resp.merge(junta, how="left", 
                          left_index=True, right_index=True)
    return(resp)

In [5]:
datetime.strptime(str(get_date_place("0021800270", year = "2018")[0]), '%Y-%m-%d')

datetime.datetime(2018, 11, 23, 0, 0)

In [50]:
def get_list_gameids(max_date = datetime.today() - timedelta(1), year = '2018', start_at=1):
    game_ids = []
    for i in range(start_at, 1231):
        gameid = "002" + year[-2:] + "0" + ('{0:0>4}'.format(i))
        
        game_date = get_date_place(gameid, year = "2018")[0]
        print((gameid, game_date), end="\r")
        if(datetime.strptime(game_date, '%Y-%m-%d') > max_date):
            break
        
        game_ids.append(gameid)
    return(game_ids)

In [8]:
games_ids = get_list_gameids_until() 

('0021800063', '2018-10-25')

In [22]:
def get_nba_stats_data(game_ids, lista_sites = ["traditional", "advanced", "scoring", 
                                                "misc", "usage", "fourfactors", "playertrack", 
                                                "hustle", "defensive"]):    
    params = {
        'EndPeriod':10,
        'EndRange':28800,
        'GameID':'',
        'RangeType':0,
        'Season':2018-19,
        'SeasonType':'Regular+Season',
        'StartPeriod':1,
        'StartRange':0
    }
    
    df_full = []
    df_full_jogo = []
    erros = []
    
    year = "20" + games_ids[0][3:5]

    for game_id in game_ids:

        df_tipos = []
        df_tipos_jogo = []

        try:
            game_date, game_place = get_date_place(game_id, year)
    
            for site in lista_sites:
                if(site == "hustle"):
                    url = "https://stats.nba.com/stats/hustlestatsboxscore"
                    rs = 1

                elif(site == "defensive"):
                    url = "https://stats.nba.com/stats/boxscore" + site
                    rs = -1

                else:
                    url = "https://stats.nba.com/stats/boxscore" + site + "v2"
                    rs = 0

                print(game_id + " " + str(game_date) + " - " + site + "              ", end="\r")
                #print(game_id + " " + str(game_date) + " - " + site)
                
                params["GameID"] = game_id

                resp = requests.get(url=url, params=params, headers=NBA_STATS_HEADERS)
                #time.sleep(0.5)

                if(rs == 0):
                    df_tipos.append(get_df_nba_json(resp.json(), rs=0))
                    df_tipos_jogo.append(get_df_nba_json(resp.json(), rs=1))
                elif(rs == 1):
                    df_tipos.append(get_df_nba_json(resp.json(), rs=1))
                    df_tipos_jogo.append(get_df_nba_json(resp.json(), rs=2))
                elif(rs == -1):
                    df_tipos.append(get_df_nba_json(resp.json(), rs=0))

            df_resp = junta_df_tipos(df_tipos, lista_sites=lista_sites)
            df_resp_jogo = junta_df_tipos(df_tipos_jogo, lista_sites=lista_sites)

            df_resp["GAME_DATE"] = np.repeat(game_date, len(df_resp))
            df_resp["GAME_PLACE"] = np.repeat(game_place, len(df_resp))

            df_resp_jogo["GAME_DATE"] = np.repeat(game_date, len(df_resp_jogo))
            df_resp_jogo["GAME_PLACE"] = np.repeat(game_place, len(df_resp_jogo))

            teams = df_resp.TEAM_ABBREVIATION.unique()
            game_str = teams[0] + " @ " + teams[1] + " " + game_date

            df_resp["GAME"] = np.repeat(game_str, len(df_resp))
            df_resp_jogo["GAME"] = np.repeat(game_str, len(df_resp_jogo))

            df_full.append(df_resp.set_index("GAME"))
            df_full_jogo.append(df_resp_jogo.set_index("GAME"))

            #pickle.dump(df_full, open("df_full.p", "wb"))
            #pickle.dump(df_full_jogo, open("df_full_jogo.p", "wb"))
        except Exception as e:
            erros.append(game_id)
            print(e)
            time.sleep(3)
    
    return(df_full, df_full_jogo, erros)

In [None]:
df_full, df_full_jogo, erros = get_nba_stats_data(games_ids)

In [36]:
base_nba_2018 = pd.concat(df_full_jogo).reset_index()

In [37]:
base_nba_2018.to_csv("base_nba_2018_raw.csv")

In [38]:
all_games = data_prep_functions.prepara_base(base_nba_2018)

In [41]:
data_prep_functions.cria_features(all_games)

In [43]:
resp = data_prep_functions.gera_last_N_games(all_games, N=[5])

WAS @ GSW 2018-10-24 5

In [46]:
delta_L5_2018 = data_prep_functions.variaveis_delta(resp)

5 CFGM                                          

In [62]:
delta_L5_2018.to_csv("delta_cross_L5_nba_18_19.csv")

### Fluxo Predição no Dia

In [53]:
new_games = get_list_gameids(max_date = datetime.today(), year = '2018', start_at=len(all_games))

('0021800067', '2018-10-26')

In [54]:
df_full, df_full_jogo, erros = get_nba_stats_data(new_games)

2018
Length mismatch: Expected axis has 0 elements, new values have 28 elements
Length mismatch: Expected axis has 0 elements, new values have 28 elements


In [96]:
new_games = pd.concat(df_full_jogo).reset_index()

In [97]:
new_games = data_prep_functions.prepara_base(new_games).reset_index()

In [108]:
new_games

Unnamed: 0,GAME,GAME_ID_home,TEAM_ID_home,TEAM_NAME_home,team_home,TEAM_CITY_home,FGM_home,FGA_home,FG_PCT_home,FG3M_home,...,DATE,SEASON,fl_playoff,fl_home_win,team_home_game_num,team_away_game_num,DISTANCE_KM_home,DISTANCE_KM_away,DAYS_FROM_LAST_GAME_home,DAYS_FROM_LAST_GAME_away
0,WAS @ GSW 2018-10-24,21800062,1610612744,Warriors,GSW,Golden State,53.0,92.0,0.576,15.0,...,2018-10-24,2019,0,1,1,1,0.0,3912.001793,2,2
1,CLE @ DET 2018-10-25,21800063,1610612765,Pistons,DET,Detroit,,,,,...,2018-10-25,2019,0,0,1,1,0.0,185.392825,2,1
2,POR @ ORL 2018-10-25,21800064,1610612753,Magic,ORL,Orlando,,,,,...,2018-10-25,2019,0,0,1,1,1794.410551,4069.325464,3,3


In [107]:
new_games = data_prep_functions.cria_features(new_games, all_games)

In [110]:
to_predict = data_prep_functions.gera_last_N_games(new_games, all_games)

POR @ ORL 2018-10-25 5

In [111]:
to_predict = data_prep_functions.variaveis_delta(to_predict)

5 CFGM                                          

In [112]:
to_predict[['D2_PFD_L5', 'D2_NET_RATING_L5', 'D2_EFG_PCT_L5', 'C1_TO_L5']]

Unnamed: 0,D2_PFD_L5,D2_NET_RATING_L5,D2_EFG_PCT_L5,C1_TO_L5
WAS @ GSW 2018-10-24,4.0,5.26,0.1168,4.8
CLE @ DET 2018-10-25,-0.6,4.88,0.0034,-4.8
POR @ ORL 2018-10-25,9.0,-5.36,0.1734,1.2
