# Mine Player Information
We need additional player info for our expected goals model. Things like shooter/goalie handness and shooter position (forward/defense) might be useful to us. Let us get this data from the NHL API.

In [116]:
import requests
import json
import pandas as pd

##### Get list of team id's

In [63]:
# get all team data
r = requests.get(url='https://statsapi.web.nhl.com/api/v1/teams')
d = r.json()
teams = d['teams']

# extract team_ids
team_ids = []
for team in teams:
    team_ids.append(str(team['franchiseId']))
    
team_ids = sorted(team_ids)

##### Get list of all player id's between 2015-2021

In [64]:
def list_to_string(a_list):
    return ",".join(a_list)

In [111]:
player_ids = set()
seasons = ['20152016', '20162017', '20172018', '20182019', '20192020', '20202021', '20212022']
# seasons = ['20152016']

for season in seasons:
    # get all players on this roster
    r = requests.get(url=f'https://statsapi.web.nhl.com/api/v1/teams?teamId={list_to_string(team_ids)}\
                            &expand=team.roster&season={season}')
    d = r.json()
    
    # extract player_ids
    for team in d['teams']:
        for player in team['roster']['roster']:
            player_ids.add(str(player['person']['id']))

In [112]:
len(player_ids)

1441

##### Get player information for all players that were collected in player_ids list

In [113]:
def height_to_inches(h):
    return int(h.split("'")[0].strip()) * 12 + int(h.split("'")[1].strip().strip("\""))

In [114]:
attributes = []
for player_id in player_ids:
    try:
        r = requests.get(url=f'https://statsapi.web.nhl.com/api/v1/people/{player_id}')
        d = r.json()
        player_data = d['people'][0]

        attributes.append({
            'id' : player_data['id'],
            'fullName' : player_data['fullName'],
            'birthDate' : player_data['birthDate'],
            'nationality' : player_data['nationality'],
            'height' : height_to_inches(player_data['height']),
            'weight' : player_data['weight'],
            'shootsCatches' : player_data['shootsCatches'],
            'primaryPosition' : player_data['primaryPosition']['abbreviation'],
            'type' : player_data['primaryPosition']['type']
        })
    except:
        print(player_id)

In [115]:
len(attributes)

1441

##### Read in attributes as df

In [120]:
df = pd.DataFrame(attributes)
df.head(10)

Unnamed: 0,id,fullName,birthDate,nationality,height,weight,shootsCatches,primaryPosition,type
0,8474685,Matt Calvert,1989-12-24,CAN,71,186,L,LW,Forward
1,8476414,Alexandre Grenier,1991-09-05,CAN,77,200,R,RW,Forward
2,8479314,Matthew Tkachuk,1997-12-11,USA,74,202,L,LW,Forward
3,8478431,Evgeny Svechnikov,1996-10-31,RUS,75,208,L,LW,Forward
4,8473465,Jamie McGinn,1988-08-05,CAN,73,205,L,LW,Forward
5,8477149,Scott Sabourin,1992-07-30,CAN,76,207,R,RW,Forward
6,8475791,Taylor Hall,1991-11-14,CAN,73,207,L,LW,Forward
7,8477831,Pheonix Copley,1992-01-18,USA,76,200,L,G,Goalie
8,8477369,Carson Soucy,1994-07-27,CAN,77,211,L,D,Defenseman
9,8477498,Darnell Nurse,1995-02-04,CAN,76,221,L,D,Defenseman


##### Save attributes as json

In [121]:
path = '../data/playerAttributes.json'
with open(path, 'w') as f:
    json.dump(attributes, f, indent=2)