# Mine Player Information
We need additional player info for our expected goals model. Things like shooter/goalie handness and shooter position (forward/defense) might be useful to us. Let us get this data from the NHL API.

In [1]:
import requests
import json
import pandas as pd

##### Get list of team id's

In [2]:
# get all team data
r = requests.get(url='https://statsapi.web.nhl.com/api/v1/teams')
d = r.json()
teams = d['teams']

# extract team_ids
team_ids = []
for team in teams:
    team_ids.append(str(team['id']))
    
team_ids = sorted(team_ids)

##### Get list of all player id's between 2015-2021

In [3]:
def list_to_string(a_list):
    return ",".join(a_list)

In [4]:
player_ids = set()
seasons = ['20152016', '20162017', '20172018', '20182019', '20192020', '20202021', '20212022']
# seasons = ['20152016']

# there is a cap on how many teams we can pass to this api query string parameter
## to work around, make to idential api calls to get rosters for all teams in each season
for season in seasons:
    # get list of player ids that are on rosters
    r = requests.get(url=f'https://statsapi.web.nhl.com/api/v1/teams?teamId={list_to_string(team_ids)}\
                            &expand=team.roster&season={season}')
    d = r.json()
    
    # extract player_ids
    for team in d['teams']:
        for player in team['roster']['roster']:
            player_ids.add(str(player['person']['id']))

In [5]:
len(player_ids)

1786

##### Get player information for all players that were collected in player_ids list

In [6]:
def height_to_inches(h):
    return int(h.split("'")[0].strip()) * 12 + int(h.split("'")[1].strip().strip("\""))

In [7]:
attributes = []
for player_id in player_ids:
    try:
        r = requests.get(url=f'https://statsapi.web.nhl.com/api/v1/people/{player_id}')
        d = r.json()
        player_data = d['people'][0]

        attributes.append({
            'id' : player_data['id'],
            'fullName' : player_data['fullName'],
            'birthDate' : player_data['birthDate'],
            'nationality' : player_data['nationality'],
            'height' : height_to_inches(player_data['height']),
            'weight' : player_data['weight'],
            'shootsCatches' : player_data['shootsCatches'],
            'primaryPosition' : player_data['primaryPosition']['abbreviation'],
            'type' : player_data['primaryPosition']['type']
        })
    except:
        print(player_id)

In [8]:
len(attributes)

1786

##### Read in attributes as df

In [9]:
df = pd.DataFrame(attributes)
df.head(10)

Unnamed: 0,id,fullName,birthDate,nationality,height,weight,shootsCatches,primaryPosition,type
0,8476316,Laurent Brossoit,1993-03-23,CAN,75,215,L,G,Goalie
1,8480373,Josef Korenar,1998-01-31,CZE,73,185,L,G,Goalie
2,8476897,Oskar Sundqvist,1994-03-23,SWE,75,208,R,C,Forward
3,8477461,Remi Elie,1995-04-16,CAN,73,215,L,LW,Forward
4,8479516,Garrett Pilon,1998-04-13,CAN,72,191,R,C,Forward
5,8475842,Sam Carrick,1992-02-04,CAN,72,200,R,C,Forward
6,8479984,Cal Foote,1998-12-13,USA,76,227,R,D,Defenseman
7,8475718,Justin Holl,1992-01-30,USA,76,194,R,D,Defenseman
8,8476971,Kevin Roy,1993-05-20,CAN,69,172,L,C,Forward
9,8470828,Toby Enstrom,1984-11-05,SWE,70,180,L,D,Defenseman


In [10]:
df[df['fullName'] == 'Brady Tkachuk']

Unnamed: 0,id,fullName,birthDate,nationality,height,weight,shootsCatches,primaryPosition,type
412,8480801,Brady Tkachuk,1999-09-16,USA,76,211,L,LW,Forward


##### Save attributes as json

In [11]:
path = '../data/playerAttributes.json'
with open(path, 'w') as f:
    json.dump(attributes, f, indent=2)

In [12]:
# r = requests.get(url=f'https://statsapi.web.nhl.com/api/v1/teams?teamId={list_to_string(team_ids)}\
#                             &expand=team.roster&season={season}')
# d = r.json()

In [13]:
# '8480801' in player_ids