In [110]:
from itertools import combinations
import requests
import json
import datetime
import os

players_with_1000 = json.load(open('data/1000-game-players.json'))
teams_json = json.load(open('data/teams.json'))
teams_dict = dict()
for team in teams_json['teams']:
    teams_dict[str(team['id'])] = team['name']

def write_record_books() -> None:
    urls = [
        'https://records.nhl.com/site/api/skater-career-scoring-regular-plus-playoffs?cayenneExp=gamesPlayed',
        'https://records.nhl.com/site/api/skater-career-scoring-regular-season?cayenneExp=gamesPlayed'
    ]

    for url in urls:
        r = requests.get(url=url)
        record_book = r.json()
        fp = 'data/' + url.split('api/')[1].split('?')[0] + '.json'
        with open(fp, 'w') as f:
            json.dump(record_book, f, indent=2)

def write_1000_gamers() -> None:
    players = {}
    with open('data/skater-career-scoring-regular-season.json', 'r') as f:
        player_data = json.load(f)
    for player in player_data['data']:
        if player['gamesPlayed'] >= 1000:
            k = player['playerId']
            v = player['firstName'] + ' ' + player['lastName']
            players[k] = v
    fp = 'data/1000-game-players.json'
    with open(fp, 'w') as f:
        json.dump(players, f, indent=2)

def write_gamelog() -> None:
    '''
    ### schedule 
    https://statsapi.web.nhl.com/api/v1/schedule?startDate=2020-01-01&endDate=2022-12-23
    '''
    gamelog = open('data/games.csv', 'w')
    gamelog.write('pk,date_str,season,gameType,link\n')
    start = datetime.datetime(1916,1,1)
    end = datetime.datetime.now()
    while (start < end):
        this_end = datetime.datetime(start.year+1, 12, 31)
        start_str = start.strftime("%Y-%m-%d")
        end_str = this_end.strftime("%Y-%m-%d")
        start = datetime.datetime(start.year + 2,1,1)

        url = f'https://statsapi.web.nhl.com/api/v1/schedule?startDate={start_str}&endDate={end_str}'
        print(url)
        r = requests.get(url=url)
        r_json = r.json()
        dates = r_json['dates']        
        for date in dates:
            date_str = date['date']
            for game in date['games']:
                season = game['season']
                pk = game['gamePk']
                link = game['link']
                gameType = game['gameType']
                row = ','.join(str(elem) for elem in [pk,date_str,season,gameType,link])
                gamelog.write(row + '\n')

def write_games() -> None:
    '''
    https://statsapi.web.nhl.com/api/v1/game/1990020374/feed/live?site=en_nhl
    '''
    gamelog = open('data/games.csv', 'r')
    for line in gamelog.readlines():
        link = line.split(',')[4].rstrip('\n')
        if (link == 'link'):
            continue
        
        url = 'https://statsapi.web.nhl.com' + link
        print(url)
        r = requests.get(url=url)
        r_json = r.json()

        game_data = r_json['gameData']
        game      = game_data['game']
        season    = str(game['season'])
        game_type = str(game['type'])
        pk        = str(game['pk'])
        
        dir = 'data/' + season + '_' + game_type
        if not os.path.exists(dir):
            os.makedirs(dir)
        
        fp = dir + '/' + pk + '.json'
        with open(fp, 'w') as fp:
            json.dump(r_json, fp, indent=True)

# write_record_books()
# write_1000_gamers()
# print(players)
# write_gamelog()
# write_games()

def key_to_team(k) -> str:
    k = k.split('_')
    t_id = k[2]
    return team_id_to_str(t_id)

def key_to_string(k) -> str:
    k    = k.split('_')
    p1   = k[0]
    p2   = k[1]
    t_id = k[2]
    return f'{players_to_string(p1,p2)} ({team_id_to_str(t_id)})'    

def team_id_to_str(t_id) -> str:
    if t_id not in teams_dict:
        teams_json = requests.get(url='https://statsapi.web.nhl.com/api/v1/teams/' + str(t_id)).json()
        for team_json in teams_json['teams']:
            teams_dict[str(team_json['id'])] = team_json['name']
    return teams_dict[t_id]

def players_to_string(p1, p2) -> str:
    p1   = players_with_1000[p1]
    p2   = players_with_1000[p2]
    return f'{p1} - {p2}'

def players_key_to_string(k) -> str:
    k    = k.split('_')
    p1   = k[0]
    p2   = k[1]
    return players_to_string(p1,p2)
    
def process_game(fp, totals):
    f = open(fp, 'r')
    game = json.load(f)
    # data = game['gameData']
    boxscore = game['liveData']['boxscore']
    for t in ['home', 'away']:
        team_data = boxscore['teams'][t]
        team_id = team_data['team']['id']
        players = set(team_data['goalies'] + team_data['skaters']) - set(team_data['scratches'])
        thousand_gamers = []
        for player in players:
            if str(player) in players_with_1000:
                thousand_gamers.append(player)
        pairs = list(combinations(thousand_gamers, 2))
        for p in pairs:
            p = sorted(p)
            key = f'{str(p[0])}_{str(p[1])}_{team_id}'
            totals[key] = totals.setdefault(key, 0) + 1

In [111]:
reg_season_files = []
playoff_files = []


for root, dirs, files in os.walk('.', topdown=False):
    for name in files:  
        fp = os.path.join(root, name)
        if '_PR' in fp:
            continue
        if '_R' in fp:
            reg_season_files.append(fp)
        if '_P' in fp:
            playoff_files.append(fp)

playoff_totals = dict()
for file in playoff_files:
    process_game(file, playoff_totals)

In [None]:
sorted_totals = sorted(playoff_totals.items(), key=lambda x:x[1], reverse=True)

for elem in sorted_totals:
    k = elem[0]
    v = elem[1]
    if v > 100:
        print(key_to_string(k) + ' ' + str(v))

In [23]:
print(len(playoff_files))
print(len(reg_season_files))

4754
60006


In [29]:
reg_season_totals = dict()
for file in reg_season_files:
    process_game(file, reg_season_totals)

In [30]:
sorted_totals = sorted(reg_season_totals.items(), key=lambda x:x[1], reverse=True)

for elem in sorted_totals:
    k = elem[0]
    v = elem[1]
    if v > 100:
        print(key_to_string(k) + ' ' + str(v))

Alex Delvecchio - Gordie Howe (Detroit Red Wings) 1353
Daniel Sedin - Henrik Sedin (Vancouver Canucks) 1276
Dustin Brown - Anze Kopitar (Los Angeles Kings) 1158
Kris Draper - Nicklas Lidstrom (Detroit Red Wings) 1107
Duncan Keith - Brent Seabrook (Chicago Blackhawks) 1069
Bob Gainey - Larry Robinson (Montréal Canadiens) 1058
George Armstrong - Tim Horton (Toronto Maple Leafs) 1026
Alex Ovechkin - Nicklas Backstrom (Washington Capitals) 1023
Anze Kopitar - Drew Doughty (Los Angeles Kings) 995
Nicklas Lidstrom - Tomas Holmstrom (Detroit Red Wings) 992
Duncan Keith - Patrick Kane (Chicago Blackhawks) 986
Joe Thornton - Marc-Edouard Vlasic (San Jose Sharks) 980
Dustin Brown - Drew Doughty (Los Angeles Kings) 980
Gilbert Perreault - Craig Ramsay (Buffalo Sabres) 977
Jonathan Toews - Patrick Kane (Chicago Blackhawks) 971
Gordie Howe - Marcel Pronovost (Detroit Red Wings) 964
Joe Thornton - Patrick Marleau (San Jose Sharks) 957
Eric Nesterenko - Bobby Hull (Chicago Blackhawks) 945
Daniel Alfr

In [54]:
master_totals = dict()
for game_type in ['regular_season', 'playoffs']:
    if game_type == 'regular_season':
        dict_to_process = reg_season_totals
    else:
        dict_to_process = playoff_totals
    
    for entry in dict_to_process.items():
        k  = entry[0]
        p1 = k.split('_')[0]
        p2 = k.split('_')[1]
        players_key = f'{p1}_{p2}'
        if players_key not in master_totals:
            master_totals[players_key] = dict()
        if game_type not in master_totals[players_key]:
            master_totals[players_key][game_type] = []
        master_totals[players_key][game_type].append(entry)



In [122]:
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

df = pd.DataFrame()
for entry in master_totals.items():
    players_key = entry[0]
    players_val = entry[1]

    rs_total      = 0
    playoff_total = 0
    rs_string     = ''
    playoff_str   = ''
    if 'regular_season' in players_val:
        for entry in players_val['regular_season']:
            k = entry[0]
            v = entry[1]
            rs_total += v
            rs_string += key_to_team(k) + ' ' + str(v) + '\n'

    if 'playoffs' in players_val:
        for entry in players_val['playoffs']:
            k = entry[0]
            v = entry[1]
            playoff_total += v
            playoff_str += key_to_team(k) + ' ' + str(v) + '\n'

    data = {
        'players': players_key_to_string(players_key),
        'games(total)': rs_total + playoff_total,
        'games (reg. szn)': rs_total,
        'info (reg. szn)': rs_string.rstrip('\n'),
        'games (plaoffs)': playoff_total,
        'info (playoffs)': playoff_str.rstrip('\n')
    }
    df = df.append(data, ignore_index = True)
    

In [123]:
from IPython.display import display, HTML

def pretty_print(df):
    return display( HTML( df.to_html().replace("\\n","<br>") ) )
pretty_print(df)

In [126]:
df.to_excel('output.xlsx')