# Scrape All Teams

In [1]:
import json
import requests
import pprint
import pandas as pd

## Team Data
scrape

In [2]:
url = 'https://statsapi.web.nhl.com/api/v1/teams'
r = requests.get(url=url)
d = r.json()
d['teams'][0]

{'id': 1,
 'name': 'New Jersey Devils',
 'link': '/api/v1/teams/1',
 'venue': {'name': 'Prudential Center',
  'link': '/api/v1/venues/null',
  'city': 'Newark',
  'timeZone': {'id': 'America/New_York', 'offset': -4, 'tz': 'EDT'}},
 'abbreviation': 'NJD',
 'teamName': 'Devils',
 'locationName': 'New Jersey',
 'firstYearOfPlay': '1982',
 'division': {'id': 18,
  'name': 'Metropolitan',
  'nameShort': 'Metro',
  'link': '/api/v1/divisions/18',
  'abbreviation': 'M'},
 'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'},
 'franchise': {'franchiseId': 23,
  'teamName': 'Devils',
  'link': '/api/v1/franchises/23'},
 'shortName': 'New Jersey',
 'officialSiteUrl': 'http://www.newjerseydevils.com/',
 'franchiseId': 23,
 'active': True}

extract

In [3]:
# for team in d['teams']:
#     pprint.pprint(team)
    
teams = [{
    'teamId': team['id'],
    'name': team['name'],
    'locationName': team['locationName'],
    'teamName': team['teamName'],
    'abbreviation': team['abbreviation'],
    'division': team['division']['name'],
    'conference': team['conference']['name'],
    'firstYearOfPlay': team['firstYearOfPlay'],
    'venue': team['venue']['name'],
    'timezone': team['venue']['timeZone']['tz']
} for team in d['teams']]

save to json

In [4]:
df_teams = pd.DataFrame(teams)
df_teams.to_json('../../backend/data/teamsCurrent.json', orient='records')

## Roster Data
scrape

In [5]:
# list of teamIds
teamIds = df_teams['teamId'].tolist()
new_roster = []
for teamId in teamIds:
    # download roster
    r = requests.get(url=f'https://statsapi.web.nhl.com/api/v1/teams/{teamId}?expand=team.roster')
    d = r.json()
    roster = d['teams'][0]['roster']['roster']
    
    # download info about each player in roster
    for player in roster:
        try:
            pid = player['person']['id']
            r_sub = requests.get(url=f'https://statsapi.web.nhl.com/api/v1/people/{pid}')
            d_sub = r_sub.json()
            player = d_sub['people'][0]
            
            # some rookies don't have jersey numbers or positions assigned
            jersey_num = 0
            if 'primaryNumber' in player:
                jersey_num = player['primaryNumber']
                
            new_roster.append({
                'playerId': pid,
                'teamId': player['currentTeam']['id'],
                'birthDate': player['birthDate'],
                'firstName': player['firstName'],
                'lastName': player['lastName'],
                'primaryNumber': jersey_num,
                'birthCity': player['birthCity'],
                'birthCountry': player['birthCountry'],
                'nationality': player['nationality'],
                'height': player['height'],
                'weight': player['weight'],
                'shootsCatches': player['shootsCatches'],
                'position': player['primaryPosition']['abbreviation']    
            })
            
        except:
            print('error on playerId ' + str(pid))
        

In [6]:
# now read in roster and parse height
def parse_ht(ht):
    # format: 7' 0.0"
    ht_ = ht.split("' ")
    ft_ = float(ht_[0])
    in_ = float(ht_[1].replace("\"",""))
    return (12*ft_) + in_

df_roster = pd.DataFrame(new_roster)
df_roster["height"] = df_roster["height"].apply(lambda x:parse_ht(x))
df_roster.head()

Unnamed: 0,playerId,teamId,birthDate,firstName,lastName,primaryNumber,birthCity,birthCountry,nationality,height,weight,shootsCatches,position
0,8473541,1,1988-08-07,Jonathan,Bernier,45,Laval,CAN,CAN,72.0,185,L,G
1,8480054,1,1999-04-21,Reilly,Walsh,8,Framingham,USA,USA,72.0,185,R,D
2,8474056,1,1989-05-13,P.K.,Subban,76,Toronto,CAN,CAN,72.0,210,R,D
3,8475193,1,1990-12-01,Tomas,Tatar,90,Ilava,SVK,SVK,70.0,173,L,LW
4,8476462,1,1993-06-17,Dougie,Hamilton,7,Toronto,CAN,CAN,78.0,230,R,D


In [7]:
# save roster data
df_roster.to_json('../../backend/data/rosterCurrent.json', orient='records')

## Headshot Images

In [8]:
for playerId in df_roster['playerId']:
    link = f'https://cms.nhl.bamgrid.com/images/headshots/current/168x168/{playerId}.jpg'
    img = requests.get(link).content
    with open(f'../../frontend/src/assets/headshots/{playerId}.jpg', 'wb') as handler:
        handler.write(img)