In [None]:
import requests
import time
import pickle
import pandas as pd
import numpy as np

In [None]:
def get_all_stats():
    """
    Gets list of all players containing their stats, and a header containing the stat descriptions
    """
    url = "https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2017-18&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36'}
    response = requests.get(url, headers = headers)
    response.raise_for_status() # Raise exception if invalid response
    response_json = response.json()
    player_list = response_json['resultSets'][0]['rowSet']
    attr_header = response_json['resultSets'][0]['headers']
    
    return (player_list, attr_header)

def get_game_log(player_id):
    """
    Get list of games for a given player (use NBA API player_id)
    """
    url = ("https://stats.nba.com/stats/leaguegamefinder?Conference=&DateFrom=&DateTo=&Division=&DraftNumber=&DraftRound=&DraftYear=&GB=N&LeagueID=00&Location=&Outcome=&PlayerID=" 
           + str(player_id) + "&PlayerOrTeam=P&Season=2017-18&SeasonType=Regular+Season&StatCategory=PTS&TeamID=&VsConference=&VsDivision=&VsTeamID=")
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36'}
    response = requests.get(url, headers = headers)
    response.raise_for_status() # Raise exception if invalid response
    response_json = response.json()
    game_list = response_json['resultSets'][0]['rowSet']
    attr_header = response_json['resultSets'][0]['headers']
    
    return (game_list, attr_header)

In [None]:
player_list, _ = get_all_stats()

In [None]:
def make_game_log(player_list, save_as=None):
    """
    player_list: input list from the output of get_all_stats()
    Returns a dictionary mapping player id to list of game logs.
    The dictionary also contains the header, which describes the stats.
    """
    game_log = {}
    for player in player_list:
        player_id = player[0]
        game_list, attr_header = get_game_log(player_id)
        game_log[player_id] = game_list
        if 'attr_header' not in game_log:
            game_log['attr_header'] = attr_header
        time.sleep(.5)
    
    if save_as is not None:
        with open(save_as, 'wb') as handle:
            pickle.dump(game_log, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    return game_log

In [None]:
game_log = make_game_log(player_list)

In [None]:
def create_mapping(game_log):
    """
    Use game_log's 'attr_header' from make_game_log 
    to create a mapping from description to index
    """
    mapping = {}
    for i in xrange(len(game_log['attr_header'])):
        mapping[game_log['attr_header'][i]] = i
    return mapping

def add_mean(means, player_id, important_stats):
    mean_sum = np.zeros(12)
    count = 0
    for game_data in game_log[player_id]:
        new_list = []
        for stats in important_stats:
            if stats == 'DD':
                dd_stats = ['PTS', 'REB', 'AST', 'STL', 'BLK']
                dd_num = 0
                for dd_stat in dd_stats:
                    if game_data[mapping[dd_stat]] >= 10:
                        dd_num += 1
                if dd_num >= 2:
                    new_list.append(1.)
                else:
                    new_list.append(0.)
            else:
                new_list.append(float(game_data[mapping[stats]]))
        mean_sum += np.array(new_list)
        count += 1
    means[player_id] = mean_sum / count
    
def create_means(player_list, important_stats, save_as=None):
    means = {}
    means['cats'] = important_stats
    for player in player_list:
        player_id = player[0]
        add_mean(means, player_id, important_stats)
    if save_as is not None:
        with open(save_as, 'wb') as handle:
            pickle.dump(means, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return means

mapping = create_mapping(game_log)
important_stats = ['PTS', 'FGM', 'FGA', 'FG3M', 'FTM', 'FTA', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'DD']
means = create_means(player_list, important_stats)

In [None]:
def add_cov(covs, means, player_id, important_stats):
    mean_cov = np.zeros((12,12))
    count = 0
    for game_data in game_log[player_id]:
        new_list = []
        for stats in important_stats:
            if stats == 'DD':
                dd_stats = ['PTS', 'REB', 'AST', 'STL', 'BLK']
                dd_num = 0
                for dd_stat in dd_stats:
                    if game_data[mapping[dd_stat]] >= 10:
                        dd_num += 1
                if dd_num >= 2:
                    new_list.append(1.)
                else:
                    new_list.append(0.)
            else:
                new_list.append(float(game_data[mapping[stats]]))
        mean_cov += np.outer(np.array(new_list) - means[player_id], np.array(new_list) - means[player_id])
        count += 1
    if count == 1:
        covs[player_id] = mean_cov
    else:
        covs[player_id] = mean_cov / (count - 1)

def create_covs(means, player_list, important_stats, save_as=None):
    covs = {}
    covs['cats'] = important_stats
    for player in player_list:
        player_id = player[0]
        add_cov(covs, means, player_id, important_stats)
    if save_as is not None:
        with open(save_as, 'wb') as handle:
            pickle.dump(covs, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return covs
        
covs = create_covs(means, player_list, important_stats)