### Imports

In [None]:
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from datetime import datetime
import pandas as pd
import numpy as np
import requests
import random
import json

### Data Gathering Script

In [None]:
### MULTIPLIERS ###
g_multiplier    = 3
a_multiplier    = 2
pts_multiplier  = 0
pm_multiplier   = 0
pim_multiplier  = 0
ppg_multiplier  = 0
ppa_multiplier  = 0
ppp_multiplier  = 1
shg_multiplier  = 0
sha_multiplier  = 0
shp_multiplier  = 2
gwg_multiplier  = 0
fow_multiplier  = 0
fol_multiplier  = 0
shft_multiplier = 0
sog_multiplier  = 0
hit_multiplier  = 0
blk_multiplier  = 0
defp_multiplier = 0
gs_multiplier   = 0
w_multiplier    = 5
l_multiplier    = 0
sa_multiplier   = 0
ga_multiplier   = 0
sv_multiplier   = 0
so_multiplier   = 3
otl_multiplier  = 2

### CONSTANT STRINGS ###
skater_summary = 'summary'
skater_fo_percentage = 'faceoffpercentages'
skater_fo_wl = 'faceoffwins'
skater_gfga = 'goalsForAgainst'
skater_misc = 'realtime'
skater_penalties = 'penalties'
skater_pk = 'penaltykill'
skater_pp = 'powerplay'
skater_puck_possession = 'puckPossessions'
skater_sat_count = 'summaryshooting'
skater_sat_percentages = 'percentages'
skater_scoring_per_60 = 'scoringRates'
skater_scoring_per_game = 'scoringpergame'
skater_toi = 'timeonice'

goalie_summary = 'summary'
goalie_advanced = 'advanced'
goalie_saves_by_strength = 'savesByStrength'

left_wing_tag = ['L', 'Left Wings']
right_wing_tag = ['R', 'Right Wings']
center_tag = ['C', 'Centers']
defenceman_tag = ['D', 'Defencemen']
goalie_tag = ['G', 'Goalies']

base_skater_url = 'https://api.nhle.com/stats/rest/en/skater/{}?isAggregate=false&isGame=false&start={}&limit=100&factCayenneExp=gamesPlayed%3E=25&cayenneExp=gameTypeId=2%20and%20positionCode%3D%22{}%22%20and%20seasonId%3C={}%20and%20seasonId%3E={}'
base_goalie_url = 'https://api.nhle.com/stats/rest/en/goalie/{}?isAggregate=false&isGame=false&&start={}&limit=100&factCayenneExp=gamesPlayed%3E=15&cayenneExp=gameTypeId=2%20and%20seasonId%3C={}%20and%20seasonId%3E={}'

skater_report_list = [skater_toi, skater_scoring_per_game, skater_scoring_per_60, skater_sat_percentages, skater_sat_count, skater_puck_possession,
                      skater_pp, skater_pk, skater_penalties, skater_misc, skater_gfga, skater_fo_wl, skater_fo_percentage, skater_summary]
goalie_report_list = [goalie_saves_by_strength, goalie_advanced, goalie_summary]

skater_scalable_data = ['assists', 'assists5v5', 'blockedShots', 'defensiveZoneFaceoffLosses', 'defensiveZoneFaceoffWins', 'defensiveZoneFaceoffs',
                		'emptyNetAssists', 'emptyNetGoals', 'emptyNetPoints', 'evFaceoffs', 'evFaceoffsLost', 'evFaceoffsWon', 'evGoals', 'evPoints',
                		'evTimeOnIce', 'evenStrengthGoalDifference', 'evenStrengthGoalsAgainst', 'evenStrengthGoalsFor', 'firstGoals', 'gameMisconductPenalties',
                		'gameWinningGoals', 'giveaways', 'goals', 'goals5v5', 'hits', 'majorPenalties', 'matchPenalties', 'minorPenalties',
                		'misconductPenalties', 'missedShotCrossbar', 'missedShotGoalpost', 'missedShotOverNet', 'missedShotWideOfNet', 'missedShots',
                		'netPenalties', 'neutralZoneFaceoffLosses', 'neutralZoneFaceoffWins', 'neutralZoneFaceoffs', 'offensiveZoneFaceoffLosses',
                		'offensiveZoneFaceoffWins', 'offensiveZoneFaceoffs', 'otGoals', 'otTimeOnIce', 'penalties', 'penaltiesDrawn', 'penaltyMinutes',
               			'plusMinus', 'points', 'points5v5', 'powerPlayGoalFor', 'powerPlayGoalsAgainst', 'ppAssists', 'ppFaceoffs', 'ppFaceoffsLost',
                		'ppFaceoffsWon', 'ppGoals', 'ppIndividualSatFor', 'ppPoints', 'ppPrimaryAssists', 'ppSecondaryAssists', 'ppShots', 'ppTimeOnIce',
                		'primaryAssists5v5', 'satAgainst', 'satAhead', 'satBehind', 'satClose', 'satFor', 'satRelative', 'satRelative5v5', 'satTied',
                		'satTotal', 'secondaryAssists5v5', 'shAssists', 'shFaceoffs', 'shFaceoffsLost', 'shFaceoffsWon', 'shGoals', 'shIndividualSatFor',
                		'shPoints', 'shPrimaryAssists', 'shSecondaryAssists', 'shShots', 'shTimeOnIce', 'shifts', 'shortHandedGoalsAgainst', 'shortHandedGoalsFor',
                		'shots', 'takeaways', 'timeOnIce', 'totalFaceoffLosses', 'totalFaceoffWins', 'totalFaceoffs', 'totalPrimaryAssists', 'totalSecondaryAssists',
               			'usatAgainst', 'usatAhead', 'usatBehind', 'usatClose', 'usatFor', 'usatRelative', 'usatTied', 'usatTotal', 'gamesPlayed']
goalie_scalable_data = ['assists', 'completeGames', 'evGoalsAgainst', 'evSaves', 'evShotsAgainst', 'gamesStarted', 'goals', 'goalsAgainst', 'goalsFor',
						'incompleteGames', 'losses', 'otLosses', 'penaltyMinutes', 'points', 'ppGoalsAgainst', 'ppSaves', 'ppShotsAgainst', 'qualityStart',
						'regulationLosses', 'regulationWins', 'saves', 'shGoalsAgainst', 'shSavePct', 'shSaves', 'shShotsAgainst', 'shotsAgainst', 'shutouts',
						'timeOnIce', 'wins', 'gamesPlayed']

# With the help of api_helper this queries the nhle api the required number of times and joins the data into one json record
def api_main(base_url, tag, report_list, seasonId):
    final_records = {
        'data': [],
        'total': 0
    }

    print('Processing ' + tag[1] + ':')
    temp = api_helper(base_url, tag, report_list, seasonId)
    final_records.update({'data': final_records.get('data') + temp.get('data')})
    final_records.update({'total': final_records.get('total') + temp.get('total')})
    
    print()

    return final_records

def api_helper(base_url, tag, report_list, year_bound):
    temp = {}
    if(tag == goalie_tag):
        temp = requests.get(base_url.format(report_list[0], 0, year_bound, year_bound)).json()
    else:
        temp = requests.get(base_url.format(report_list[0], 0, tag[0], year_bound, year_bound)).json()
    total_length = int(temp.get('total'))
    records = {'total': total_length}

    for i in tqdm(report_list, desc='Batch Querying for seasonId ' + year_bound):
        temp = {}
        for j in range(0, total_length + 1, 100):
            temp2 = {}
            if(tag == goalie_tag):
                temp2 = requests.get(base_url.format(i, j, year_bound, year_bound)).json()
            else:
                temp2 = requests.get(base_url.format(i, j, tag[0], year_bound, year_bound)).json()
            if(temp.get('data') == None):
                temp.update({'data': temp2.get('data')})
            else:
                temp.update({'data': temp.get('data') + temp2.get('data')})

        for k in range(total_length):
            if(records.get('data') == None):
                records.update({'data': temp.get('data')})
            else:
                records.get('data')[k].update(temp.get('data')[k])

    if(tag == goalie_tag):
        for player in records.get('data'):
            player.pop('ties')
            player.pop('shootsCatches')
            player.pop('teamAbbrevs')
    else:
        for player in records.get('data'):
            player.pop('positionCode')
            player.pop('shootsCatches')
            player.pop('teamAbbrevs')

    return records

def extrapolate_data(players, tag):
    scalable_data = skater_scalable_data
    expected_games = 82
    if tag == 'goalie':
        scalable_data = goalie_scalable_data
        expected_games = 68

    # Extrapolates Shortened Seasons
    max_gp = max([player.get('gamesPlayed') for player in players])
    if max_gp < expected_games:
        scale_multiplier = expected_games / max_gp
        for player in players:
            keys = player.keys()
            for key in keys:
                if key in scalable_data:
                    val = player.get(key)
                    player.update({key:  val * scale_multiplier})

    if tag != 'goalie':
        # Extrapolates for Injuries Using Method Described in README
        for player in players:
            starting_gp = player.get('gamesPlayed')
            keys = player.keys()
            for key in keys:
                if key in scalable_data:
                    a = player.get(key)
                    b = starting_gp
                    if a and b:
                        player.update({key:  ((0.4*a) + (0.6*((82*a)/b)))})

def remove_players(current_year_data, next_year_data):
    current_year_player_id_list = [player['playerId'] for player in current_year_data]
    next_year_player_id_list = [player['playerId'] for player in next_year_data]

    matching_ids = list(set(current_year_player_id_list) & set(next_year_player_id_list))

    matching_current_year_players = []
    for player in current_year_data:
        if player['playerId'] in matching_ids:
            matching_current_year_players.append(player)

    matching_next_year_players = []
    for player in next_year_data:
        if player['playerId'] in matching_ids:
            matching_next_year_players.append(player)

    return matching_current_year_players, matching_next_year_players

# Calculates the total number of fantasy points a player had in a given season based on the values returned from the arg parser
def calculate_fantasy_points(curr_year_players, next_year_players, tag):
    fantasy_points_list = []
    for player in next_year_players:
        fantasy_total = 0

        key_names = []
        if(tag == 'goalie'):
            key_names = [['gamesStarted', gs_multiplier], ['wins', w_multiplier], ['losses', l_multiplier], ['shotsAgainst', sa_multiplier],
                         ['goalsAgainst', ga_multiplier], ['saves', sv_multiplier], ['shutouts', so_multiplier], ['otLosses', otl_multiplier]]
        else:
            key_names = [['goals', g_multiplier], ['assists', a_multiplier], ['points', pts_multiplier], ['plusMinus', pm_multiplier],
                         ['penaltyMinutes', pim_multiplier], ['ppGoals', ppg_multiplier], ['ppAssists', ppa_multiplier], ['ppPoints', ppp_multiplier],
                         ['shGoals', shg_multiplier], ['shAssists', sha_multiplier], ['shPoints', shp_multiplier], ['gameWinningGoals', gwg_multiplier],
                         ['totalFaceoffWins', fow_multiplier], ['totalFaceoffLosses', fol_multiplier], ['shifts', shft_multiplier], ['shots', sog_multiplier],
                         ['hits', hit_multiplier], ['blockedShots', blk_multiplier]]

        if(tag == 'defenceman'):
            key_names.append(['points', defp_multiplier])

        for key in key_names:
            fantasy_total = fantasy_total + (player.get(key[0]) * key[1])

        #mathcing_player_idx = curr_year_players.index(lambda x: x.get('playerId') == player.get('playerId'))
        mathcing_player_idx = [i for i, x in enumerate(curr_year_players) if x.get('playerId') == player.get('playerId')]
        curr_year_players[mathcing_player_idx[0]].update({'fantasy_value': fantasy_total})

    return curr_year_players

### Gather Center Data

In [None]:
def gather_center_data():
    ## Current Year ##
    # Download data from the API the NHL uses for nhl.com/stats
    center_records = api_main(base_skater_url, center_tag, skater_report_list, '20172018')

    ## Next Year ##
    # Download data from the API the NHL uses for nhl.com/stats
    center_records_next = api_main(base_skater_url, center_tag, skater_report_list, '20182019')

    # Seperate Data
    center_records = center_records.get('data')
    center_records_next = center_records_next.get('data')

    # Extrapolate Data
    extrapolate_data(center_records, 'center')
    extrapolate_data(center_records_next, 'center')

    # Remove Players
    center_records, center_records_next = remove_players(center_records, center_records_next)

    return center_records, center_records_next

### Gather Wing Data

In [None]:
def gather_wing_data():
    ## Current Year ##
    # Download data from the API the NHL uses for nhl.com/stats
    left_wing_records = api_main(base_skater_url, left_wing_tag, skater_report_list, '20172018')
    right_wing_records = api_main(base_skater_url, right_wing_tag, skater_report_list, '20172018')

    # Join Wing Lists Together
    wing_records = {
        'data': left_wing_records.get('data') + right_wing_records.get('data'),
        'total': left_wing_records.get('total') + right_wing_records.get('total')
    }

    ## Next Year ##
    # Download data from the API the NHL uses for nhl.com/stats
    left_wing_records_next = api_main(base_skater_url, left_wing_tag, skater_report_list, '20182019')
    right_wing_records_next = api_main(base_skater_url, right_wing_tag, skater_report_list, '20182019')

    # Join Wing Lists Together
    wing_records_next = {
        'data': left_wing_records_next.get('data') + right_wing_records_next.get('data'),
        'total': left_wing_records_next.get('total') + right_wing_records_next.get('total')
    }

    # Seperate Data
    wing_records = wing_records.get('data')
    wing_records_next = wing_records_next.get('data')

    # Extrapolate Data
    extrapolate_data(wing_records, 'wing')
    extrapolate_data(wing_records_next, 'wing')

    # Remove Players
    wing_records, wing_records_next = remove_players(wing_records, wing_records_next)

    return wing_records, wing_records_next

### Gather Defenceman Data

In [None]:
def gather_defenceman_data():
    ## Current Year ##
    # Download data from the API the NHL uses for nhl.com/stats
    defenceman_records = api_main(base_skater_url, defenceman_tag, skater_report_list, '20172018')

    ## Next Year ##
    # Download data from the API the NHL uses for nhl.com/stats
    defenceman_records_next = api_main(base_skater_url, defenceman_tag, skater_report_list, '20182019')

    # Seperate Data
    defenceman_records = defenceman_records.get('data')
    defenceman_records_next = defenceman_records_next.get('data')

    # Extrapolate Data
    extrapolate_data(defenceman_records, 'defenceman')
    extrapolate_data(defenceman_records_next, 'defenceman')

    # Remove Players
    defenceman_records, defenceman_records_next = remove_players(defenceman_records, defenceman_records_next)

    return defenceman_records, defenceman_records_next

### Gather Goalie Data

In [None]:
def gather_goalie_data():
    ### GOALIES ###
    ## Current Year ##
    # Download data from the API the NHL uses for nhl.com/stats
    goalie_records = api_main(base_goalie_url, goalie_tag, goalie_report_list, '20172018')

    ## Next Year ##
    # Download data from the API the NHL uses for nhl.com/stats
    goalie_records_next = api_main(base_goalie_url, goalie_tag, goalie_report_list, '20182019')

    # Seperate Data
    goalie_records = goalie_records.get('data')
    goalie_records_next = goalie_records_next.get('data')

    # Extrapolate Data
    extrapolate_data(goalie_records, 'goalie')
    extrapolate_data(goalie_records_next, 'goalie')

    # Remove Players
    goalie_records, goalie_records_next = remove_players(goalie_records, goalie_records_next)

    # Calculate Fantasy Points
    goalie_records = calculate_fantasy_points(goalie_records, goalie_records_next, 'goalie')

    return goalie_records, goalie_records_next

### Calculation Methods

In [None]:
def calculate_r2(df, query_text):
    
    x = df[query_text].tolist()
    y = df['fantasy_value'].tolist()

    correlation_matrix = np.corrcoef(x, y)
    correlation_xy = correlation_matrix[0,1]
    r_squared = np.round(correlation_xy**2, 4)
    
    return r_squared

In [None]:
def merge_df(df, r2_df, stat_list, columns):
    r2_tuples = []
    for stat in stat_list:
        r2_tuples.append(calculate_r2(df, stat))

    temp = pd.DataFrame(r2_tuples, index=stat_list, columns=columns)
    r2_df = pd.concat([r2_df, temp], axis=1)

    return r2_df

### Center Data Exploration

In [None]:
# Download Data
center_records, center_records_next = gather_center_data()

In [None]:
# Calculate Fantasy Points
center_records_with_points = calculate_fantasy_points(center_records, center_records_next, 'center')

# Create DataFrame
center_df = pd.DataFrame(center_records_with_points)

# Remove Useless Data
center_df.drop(['lastName', 'playerId', 'seasonId'], axis=1, inplace=True)

# Stat List to Iterate Through
center_stat_list = ['assists', 'assists5v5', 'assistsPer605v5', 'assistsPerGame', 'blockedShots', 'blockedShotsPer60', 'blocksPerGame',
                    'defensiveZoneFaceoffLosses', 'defensiveZoneFaceoffPct', 'defensiveZoneFaceoffWins', 'defensiveZoneFaceoffs', 'emptyNetAssists',
                    'emptyNetGoals', 'emptyNetPoints', 'evFaceoffPct', 'evFaceoffs', 'evFaceoffsLost', 'evFaceoffsWon', 'evGoals', 'evPoints',
                    'evTimeOnIce', 'evTimeOnIcePerGame', 'evenStrengthGoalDifference', 'evenStrengthGoalsAgainst', 'evenStrengthGoalsFor',
                    'evenStrengthGoalsForPct', 'evenStrengthTimeOnIcePerGame', 'faceoffPct5v5', 'faceoffWinPct', 'firstGoals', 'gameMisconductPenalties',
                    'gameWinningGoals', 'gamesPlayed', 'giveaways', 'giveawaysPer60', 'goals', 'goals5v5', 'goalsPct', 'goalsPer605v5', 'goalsPerGame',
                    'hits', 'hitsPer60', 'hitsPerGame', 'individualSatForPer60', 'individualShotsForPer60', 'majorPenalties', 'matchPenalties',
                    'minorPenalties', 'misconductPenalties', 'missedShotCrossbar', 'missedShotGoalpost', 'missedShotOverNet', 'missedShotWideOfNet',
                    'missedShots', 'netMinorPenaltiesPer60', 'netPenalties', 'netPenaltiesPer60', 'neutralZoneFaceoffLosses', 'neutralZoneFaceoffPct',
                    'neutralZoneFaceoffWins', 'neutralZoneFaceoffs', 'offensiveZoneFaceoffLosses', 'offensiveZoneFaceoffPct', 'offensiveZoneFaceoffWins',
                    'offensiveZoneFaceoffs', 'offensiveZoneStartPct5v5', 'onIceShootingPct', 'onIceShootingPct5v5', 'otGoals', 'otTimeOnIce',
                    'otTimeOnIcePerOtGame', 'penalties', 'penaltiesDrawn', 'penaltiesDrawnPer60', 'penaltiesTakenPer60', 'penaltyMinutes',
                    'penaltyMinutesPerGame', 'penaltyMinutesPerTimeOnIce', 'penaltySecondsPerGame', 'plusMinus', 'points', 'points5v5', 'pointsPer605v5',
                    'pointsPerGame', 'powerPlayGoalFor', 'powerPlayGoalsAgainst', 'powerPlayTimeOnIcePerGame', 'ppAssists', 'ppFaceoffPct', 'ppFaceoffs',
                    'ppFaceoffsLost', 'ppFaceoffsWon', 'ppGoals', 'ppGoalsAgainstPer60', 'ppGoalsForPer60', 'ppGoalsPer60', 'ppIndividualSatFor',
                    'ppIndividualSatForPer60', 'ppPoints', 'ppPointsPer60', 'ppPrimaryAssists', 'ppPrimaryAssistsPer60', 'ppSecondaryAssists',
                    'ppSecondaryAssistsPer60', 'ppShootingPct', 'ppShots', 'ppShotsPer60', 'ppTimeOnIce', 'ppTimeOnIcePctPerGame', 'ppTimeOnIcePerGame',
                    'primaryAssists5v5', 'primaryAssistsPer605v5', 'primaryAssistsPerGame', 'satAgainst', 'satAhead', 'satBehind', 'satClose', 'satFor',
                    'satPct', 'satPercentage', 'satPercentageAhead', 'satPercentageBehind', 'satPercentageClose', 'satPercentageTied', 'satRelative',
                    'satRelative5v5', 'satTied', 'satTotal', 'secondaryAssists5v5', 'secondaryAssistsPer605v5', 'secondaryAssistsPerGame', 'shAssists',
                    'shFaceoffPct', 'shFaceoffs', 'shFaceoffsLost', 'shFaceoffsWon', 'shGoals', 'shGoalsPer60', 'shIndividualSatFor',
                    'shIndividualSatForPer60', 'shPoints', 'shPointsPer60', 'shPrimaryAssists', 'shPrimaryAssistsPer60', 'shSecondaryAssists',
                    'shSecondaryAssistsPer60', 'shShootingPct', 'shShots', 'shShotsPer60', 'shTimeOnIce', 'shTimeOnIcePctPerGame', 'shTimeOnIcePerGame',
                    'shifts', 'shiftsPerGame', 'shootingPct', 'shootingPct5v5', 'shortHandedGoalsAgainst', 'shortHandedGoalsFor', 'shortHandedTimeOnIcePerGame',
                    'shots', 'shotsPerGame', 'skaterSavePct5v5', 'skaterShootingPlusSavePct5v5', 'takeaways', 'takeawaysPer60', 'timeOnIce', 'timeOnIcePerGame',
                    'timeOnIcePerGame5v5', 'timeOnIcePerShift', 'totalFaceoffLosses', 'totalFaceoffWins', 'totalFaceoffs', 'totalPrimaryAssists',
                    'totalSecondaryAssists', 'usatAgainst', 'usatAhead', 'usatBehind', 'usatClose', 'usatFor', 'usatPct', 'usatPercentage',
                    'usatPercentageAhead', 'usatPercentageBehind', 'usatPercentageTied', 'usatPrecentageClose', 'usatRelative', 'usatTied', 'usatTotal',
                    'zoneStartPct', 'zoneStartPct5v5']

# Define Columns for DataFrame
columns = [(f'R2 Value (g={g_multiplier},a={a_multiplier},pts={pts_multiplier},pm={pm_multiplier},pim={pim_multiplier},ppg={ppg_multiplier},'
            f'ppa={ppa_multiplier},ppp={ppp_multiplier},shg={shg_multiplier},sha={sha_multiplier},shp={shp_multiplier},gwg={gwg_multiplier},'
            f'fow={fow_multiplier},fol={fol_multiplier},shft={shft_multiplier},sog={sog_multiplier},hit={hit_multiplier},blk={blk_multiplier})')]

# Create Initial Data Frame Using Realistic Multipliers
r2_tuples = []
for stat in center_stat_list:
    r2_tuples.append(calculate_r2(center_df, stat))

center_r2_df = pd.DataFrame(r2_tuples, index=center_stat_list, columns=columns)

# Add Columns Using Random Multipliers
for i in range(199):
    g_multiplier    = random.randint(-100, 100)
    a_multiplier    = random.randint(-100, 100)
    pts_multiplier  = random.randint(-100, 100)
    pm_multiplier   = random.randint(-100, 100)
    pim_multiplier  = random.randint(-100, 100)
    ppg_multiplier  = random.randint(-100, 100)
    ppa_multiplier  = random.randint(-100, 100)
    ppp_multiplier  = random.randint(-100, 100)
    shg_multiplier  = random.randint(-100, 100)
    sha_multiplier  = random.randint(-100, 100)
    shp_multiplier  = random.randint(-100, 100)
    gwg_multiplier  = random.randint(-100, 100)
    fow_multiplier  = random.randint(-100, 100)
    fol_multiplier  = random.randint(-100, 100)
    shft_multiplier = random.randint(-100, 100)
    sog_multiplier  = random.randint(-100, 100)
    hit_multiplier  = random.randint(-100, 100)
    blk_multiplier  = random.randint(-100, 100)
    
    # Calculate Fantasy Points
    center_records_with_points = calculate_fantasy_points(center_records, center_records_next, 'center')

    # Create DataFrame
    center_df = pd.DataFrame(center_records_with_points)

    # Remove Useless Data
    center_df.drop(['lastName', 'playerId', 'seasonId'], axis=1, inplace=True)

    # Merge DataFrames Together
    center_r2_df = merge_df(center_df, center_r2_df, center_stat_list, columns)

In [None]:
required_center_stats_for_multipliers = ['goals', 'assists', 'points', 'plusMinus', 'penaltyMinutes', 'ppGoals', 'ppAssists', 'ppPoints', 'shGoals', 'shAssists',
                                         'shPoints', 'gameWinningGoals', 'totalFaceoffWins', 'totalFaceoffLosses', 'shifts', 'shots', 'hits', 'blockedShots']

center_comp_stats_df = center_r2_df.apply(pd.Series.describe, axis=1)
center_stats_to_use = center_comp_stats_df.index[(center_comp_stats_df['mean'] >= 0.125) & (center_comp_stats_df['max'] >= 0.375)].tolist()

center_stats_to_use = list(set(required_center_stats_for_multipliers + center_stats_to_use))

print(f'Center Stats to Use: {center_stats_to_use}')

### Wing Data Exploration

In [None]:
# Download Data
wing_records, wing_records_next = gather_wing_data()

In [None]:
# Calculate Fantasy Points
wing_records_with_points = calculate_fantasy_points(wing_records, wing_records_next, 'wing')

# Create DataFrame
wing_df = pd.DataFrame(wing_records_with_points)

# Remove Useless Data
wing_df.drop(['lastName', 'playerId', 'seasonId'], axis=1, inplace=True)

# Stat List to Iterate Through
wing_stat_list = ['assists', 'assists5v5', 'assistsPer605v5', 'assistsPerGame', 'blockedShots', 'blockedShotsPer60', 'blocksPerGame',
                  'defensiveZoneFaceoffLosses', 'defensiveZoneFaceoffPct', 'defensiveZoneFaceoffWins', 'defensiveZoneFaceoffs', 'emptyNetAssists',
                  'emptyNetGoals', 'emptyNetPoints', 'evFaceoffPct', 'evFaceoffs', 'evFaceoffsLost', 'evFaceoffsWon', 'evGoals', 'evPoints',
                  'evTimeOnIce', 'evTimeOnIcePerGame', 'evenStrengthGoalDifference', 'evenStrengthGoalsAgainst', 'evenStrengthGoalsFor',
                  'evenStrengthGoalsForPct', 'evenStrengthTimeOnIcePerGame', 'faceoffPct5v5', 'faceoffWinPct', 'firstGoals', 'gameMisconductPenalties',
                  'gameWinningGoals', 'gamesPlayed', 'giveaways', 'giveawaysPer60', 'goals', 'goals5v5', 'goalsPct', 'goalsPer605v5', 'goalsPerGame',
                  'hits', 'hitsPer60', 'hitsPerGame', 'individualSatForPer60', 'individualShotsForPer60', 'majorPenalties', 'matchPenalties',
                  'minorPenalties', 'misconductPenalties', 'missedShotCrossbar', 'missedShotGoalpost', 'missedShotOverNet', 'missedShotWideOfNet',
                  'missedShots', 'netMinorPenaltiesPer60', 'netPenalties', 'netPenaltiesPer60', 'neutralZoneFaceoffLosses', 'neutralZoneFaceoffPct',
                  'neutralZoneFaceoffWins', 'neutralZoneFaceoffs', 'offensiveZoneFaceoffLosses', 'offensiveZoneFaceoffPct', 'offensiveZoneFaceoffWins',
                  'offensiveZoneFaceoffs', 'offensiveZoneStartPct5v5', 'onIceShootingPct', 'onIceShootingPct5v5', 'otGoals', 'otTimeOnIce',
                  'otTimeOnIcePerOtGame', 'penalties', 'penaltiesDrawn', 'penaltiesDrawnPer60', 'penaltiesTakenPer60', 'penaltyMinutes',
                  'penaltyMinutesPerGame', 'penaltyMinutesPerTimeOnIce', 'penaltySecondsPerGame', 'plusMinus', 'points', 'points5v5', 'pointsPer605v5',
                  'pointsPerGame', 'powerPlayGoalFor', 'powerPlayGoalsAgainst', 'powerPlayTimeOnIcePerGame', 'ppAssists', 'ppFaceoffPct', 'ppFaceoffs',
                  'ppFaceoffsLost', 'ppFaceoffsWon', 'ppGoals', 'ppGoalsAgainstPer60', 'ppGoalsForPer60', 'ppGoalsPer60', 'ppIndividualSatFor',
                  'ppIndividualSatForPer60', 'ppPoints', 'ppPointsPer60', 'ppPrimaryAssists', 'ppPrimaryAssistsPer60', 'ppSecondaryAssists',
                  'ppSecondaryAssistsPer60', 'ppShootingPct', 'ppShots', 'ppShotsPer60', 'ppTimeOnIce', 'ppTimeOnIcePctPerGame', 'ppTimeOnIcePerGame',
                  'primaryAssists5v5', 'primaryAssistsPer605v5', 'primaryAssistsPerGame', 'satAgainst', 'satAhead', 'satBehind', 'satClose', 'satFor',
                  'satPct', 'satPercentage', 'satPercentageAhead', 'satPercentageBehind', 'satPercentageClose', 'satPercentageTied', 'satRelative',
                  'satRelative5v5', 'satTied', 'satTotal', 'secondaryAssists5v5', 'secondaryAssistsPer605v5', 'secondaryAssistsPerGame', 'shAssists',
                  'shFaceoffPct', 'shFaceoffs', 'shFaceoffsLost', 'shFaceoffsWon', 'shGoals', 'shGoalsPer60', 'shIndividualSatFor', 'shIndividualSatForPer60',
                  'shPoints', 'shPointsPer60', 'shPrimaryAssists', 'shPrimaryAssistsPer60', 'shSecondaryAssists', 'shSecondaryAssistsPer60',
                  'shShootingPct', 'shShots', 'shShotsPer60', 'shTimeOnIce', 'shTimeOnIcePctPerGame', 'shTimeOnIcePerGame', 'shifts', 'shiftsPerGame',
                  'shootingPct', 'shootingPct5v5', 'shortHandedGoalsAgainst', 'shortHandedGoalsFor', 'shortHandedTimeOnIcePerGame', 'shots',
                  'shotsPerGame', 'skaterSavePct5v5', 'skaterShootingPlusSavePct5v5', 'takeaways', 'takeawaysPer60', 'timeOnIce', 'timeOnIcePerGame',
                  'timeOnIcePerGame5v5', 'timeOnIcePerShift', 'totalFaceoffLosses', 'totalFaceoffWins', 'totalFaceoffs', 'totalPrimaryAssists',
                  'totalSecondaryAssists', 'usatAgainst', 'usatAhead', 'usatBehind', 'usatClose', 'usatFor', 'usatPct', 'usatPercentage',
                  'usatPercentageAhead', 'usatPercentageBehind', 'usatPercentageTied', 'usatPrecentageClose', 'usatRelative', 'usatTied',
                  'usatTotal', 'zoneStartPct', 'zoneStartPct5v5']

# Define Columns for DataFrame
columns = [(f'R2 Value (g={g_multiplier},a={a_multiplier},pts={pts_multiplier},pm={pm_multiplier},pim={pim_multiplier},ppg={ppg_multiplier},'
            f'ppa={ppa_multiplier},ppp={ppp_multiplier},shg={shg_multiplier},sha={sha_multiplier},shp={shp_multiplier},gwg={gwg_multiplier},'
            f'fow={fow_multiplier},fol={fol_multiplier},shft={shft_multiplier},sog={sog_multiplier},hit={hit_multiplier},blk={blk_multiplier})')]

# Create Initial Data Frame Using Realistic Multipliers
r2_tuples = []
for stat in wing_stat_list:
    r2_tuples.append(calculate_r2(wing_df, stat))

wing_r2_df = pd.DataFrame(r2_tuples, index=wing_stat_list, columns=columns)

# Add Columns Using Random Multipliers
for i in range(99):
    g_multiplier    = random.randint(-100, 100)
    a_multiplier    = random.randint(-100, 100)
    pts_multiplier  = random.randint(-100, 100)
    pm_multiplier   = random.randint(-100, 100)
    pim_multiplier  = random.randint(-100, 100)
    ppg_multiplier  = random.randint(-100, 100)
    ppa_multiplier  = random.randint(-100, 100)
    ppp_multiplier  = random.randint(-100, 100)
    shg_multiplier  = random.randint(-100, 100)
    sha_multiplier  = random.randint(-100, 100)
    shp_multiplier  = random.randint(-100, 100)
    gwg_multiplier  = random.randint(-100, 100)
    fow_multiplier  = random.randint(-100, 100)
    fol_multiplier  = random.randint(-100, 100)
    shft_multiplier = random.randint(-100, 100)
    sog_multiplier  = random.randint(-100, 100)
    hit_multiplier  = random.randint(-100, 100)
    blk_multiplier  = random.randint(-100, 100)
    
    # Calculate Fantasy Points
    wing_records_with_points = calculate_fantasy_points(wing_records, wing_records_next, 'wing')

    # Create DataFrame
    wing_df = pd.DataFrame(wing_records_with_points)

    # Remove Useless Data
    wing_df.drop(['lastName', 'playerId', 'seasonId'], axis=1, inplace=True)

    # Merge DataFrames Together
    wing_r2_df = merge_df(wing_df, wing_r2_df, wing_stat_list, columns)

In [None]:
required_wing_stats_for_multipliers = ['goals', 'assists', 'points', 'plusMinus', 'penaltyMinutes', 'ppGoals', 'ppAssists', 'ppPoints', 'shGoals', 'shAssists',
                                         'shPoints', 'gameWinningGoals', 'totalFaceoffWins', 'totalFaceoffLosses', 'shifts', 'shots', 'hits', 'blockedShots']

wing_comp_stats_df = wing_r2_df.apply(pd.Series.describe, axis=1)
wing_stats_to_use = wing_comp_stats_df.index[(wing_comp_stats_df['mean'] >= 0.125) & (wing_comp_stats_df['max'] >= 0.375)].tolist()

wing_stats_to_use = list(set(required_wing_stats_for_multipliers + wing_stats_to_use))

print(f'Wing Stats to Use: {wing_stats_to_use}')

### Defenceman Data Exploration

In [None]:
# Download Data
defenceman_records, defenceman_records_next = gather_defenceman_data()

In [None]:
# Calculate Fantasy Points
defenceman_records_with_points = calculate_fantasy_points(defenceman_records, defenceman_records_next, 'defenceman')

# Create DataFrame
defenceman_df = pd.DataFrame(defenceman_records_with_points)

# Remove Useless Data
defenceman_df.drop(['lastName', 'playerId', 'seasonId'], axis=1, inplace=True)

# Stat List to Iterate Through
defenceman_stat_list = ['assists', 'assists5v5', 'assistsPer605v5', 'assistsPerGame', 'blockedShots', 'blockedShotsPer60', 'blocksPerGame',
                        'defensiveZoneFaceoffLosses', 'defensiveZoneFaceoffPct', 'defensiveZoneFaceoffWins', 'defensiveZoneFaceoffs',
                        'emptyNetAssists', 'emptyNetGoals', 'emptyNetPoints', 'evFaceoffPct', 'evFaceoffs', 'evFaceoffsLost', 'evFaceoffsWon',
                        'evGoals', 'evPoints', 'evTimeOnIce', 'evTimeOnIcePerGame', 'evenStrengthGoalDifference', 'evenStrengthGoalsAgainst',
                        'evenStrengthGoalsFor', 'evenStrengthGoalsForPct', 'evenStrengthTimeOnIcePerGame', 'faceoffPct5v5', 'faceoffWinPct',
                        'firstGoals', 'gameMisconductPenalties', 'gameWinningGoals', 'gamesPlayed', 'giveaways', 'giveawaysPer60', 'goals',
                        'goals5v5', 'goalsPct', 'goalsPer605v5', 'goalsPerGame', 'hits', 'hitsPer60', 'hitsPerGame', 'individualSatForPer60',
                        'individualShotsForPer60', 'majorPenalties', 'matchPenalties', 'minorPenalties', 'misconductPenalties', 'missedShotCrossbar',
                        'missedShotGoalpost', 'missedShotOverNet', 'missedShotWideOfNet', 'missedShots', 'netMinorPenaltiesPer60', 'netPenalties',
                        'netPenaltiesPer60', 'neutralZoneFaceoffLosses', 'neutralZoneFaceoffPct', 'neutralZoneFaceoffWins', 'neutralZoneFaceoffs',
                        'offensiveZoneFaceoffLosses', 'offensiveZoneFaceoffPct', 'offensiveZoneFaceoffWins', 'offensiveZoneFaceoffs',
                        'offensiveZoneStartPct5v5', 'onIceShootingPct', 'onIceShootingPct5v5', 'otGoals', 'otTimeOnIce', 'otTimeOnIcePerOtGame',
                        'penalties', 'penaltiesDrawn', 'penaltiesDrawnPer60', 'penaltiesTakenPer60', 'penaltyMinutes', 'penaltyMinutesPerGame',
                        'penaltyMinutesPerTimeOnIce', 'penaltySecondsPerGame', 'plusMinus', 'points', 'points5v5', 'pointsPer605v5', 'pointsPerGame',
                        'powerPlayGoalFor', 'powerPlayGoalsAgainst', 'powerPlayTimeOnIcePerGame', 'ppAssists', 'ppFaceoffPct', 'ppFaceoffs',
                        'ppFaceoffsLost', 'ppFaceoffsWon', 'ppGoals', 'ppGoalsAgainstPer60', 'ppGoalsForPer60', 'ppGoalsPer60', 'ppIndividualSatFor',
                        'ppIndividualSatForPer60', 'ppPoints', 'ppPointsPer60', 'ppPrimaryAssists', 'ppPrimaryAssistsPer60', 'ppSecondaryAssists',
                        'ppSecondaryAssistsPer60', 'ppShootingPct', 'ppShots', 'ppShotsPer60', 'ppTimeOnIce', 'ppTimeOnIcePctPerGame',
                        'ppTimeOnIcePerGame', 'primaryAssists5v5', 'primaryAssistsPer605v5', 'primaryAssistsPerGame', 'satAgainst', 'satAhead',
                        'satBehind', 'satClose', 'satFor', 'satPct', 'satPercentage', 'satPercentageAhead', 'satPercentageBehind', 'satPercentageClose',
                        'satPercentageTied', 'satRelative', 'satRelative5v5', 'satTied', 'satTotal', 'secondaryAssists5v5', 'secondaryAssistsPer605v5',
                        'secondaryAssistsPerGame', 'shAssists', 'shFaceoffPct', 'shFaceoffs', 'shFaceoffsLost', 'shFaceoffsWon', 'shGoals', 'shGoalsPer60',
                        'shIndividualSatFor', 'shIndividualSatForPer60', 'shPoints', 'shPointsPer60', 'shPrimaryAssists', 'shPrimaryAssistsPer60',
                        'shSecondaryAssists', 'shSecondaryAssistsPer60', 'shShootingPct', 'shShots', 'shShotsPer60', 'shTimeOnIce', 'shTimeOnIcePctPerGame',
                        'shTimeOnIcePerGame', 'shifts', 'shiftsPerGame', 'shootingPct', 'shootingPct5v5', 'shortHandedGoalsAgainst', 'shortHandedGoalsFor',
                        'shortHandedTimeOnIcePerGame', 'shots', 'shotsPerGame', 'skaterSavePct5v5', 'skaterShootingPlusSavePct5v5', 'takeaways',
                        'takeawaysPer60', 'timeOnIce', 'timeOnIcePerGame', 'timeOnIcePerGame5v5', 'timeOnIcePerShift', 'totalFaceoffLosses',
                        'totalFaceoffWins', 'totalFaceoffs', 'totalPrimaryAssists', 'totalSecondaryAssists', 'usatAgainst', 'usatAhead', 'usatBehind',
                        'usatClose', 'usatFor', 'usatPct', 'usatPercentage', 'usatPercentageAhead', 'usatPercentageBehind', 'usatPercentageTied',
                        'usatPrecentageClose', 'usatRelative', 'usatTied', 'usatTotal', 'zoneStartPct', 'zoneStartPct5v5']

# Define Columns for DataFrame
columns = [(f'R2 Value (g={g_multiplier},a={a_multiplier},pts={pts_multiplier},pm={pm_multiplier},pim={pim_multiplier},ppg={ppg_multiplier},'
            f'ppa={ppa_multiplier},ppp={ppp_multiplier},shg={shg_multiplier},sha={sha_multiplier},shp={shp_multiplier},gwg={gwg_multiplier},'
            f'fow={fow_multiplier},fol={fol_multiplier},shft={shft_multiplier},sog={sog_multiplier},hit={hit_multiplier},blk={blk_multiplier},'
            f'defp={defp_multiplier})')]

# Create Initial Data Frame Using Realistic Multipliers
r2_tuples = []
for stat in defenceman_stat_list:
    r2_tuples.append(calculate_r2(defenceman_df, stat))

defenceman_r2_df = pd.DataFrame(r2_tuples, index=defenceman_stat_list, columns=columns)

# Add Columns Using Random Multipliers
for i in range(199):
    g_multiplier    = random.randint(-100, 100)
    a_multiplier    = random.randint(-100, 100)
    pts_multiplier  = random.randint(-100, 100)
    pm_multiplier   = random.randint(-100, 100)
    pim_multiplier  = random.randint(-100, 100)
    ppg_multiplier  = random.randint(-100, 100)
    ppa_multiplier  = random.randint(-100, 100)
    ppp_multiplier  = random.randint(-100, 100)
    shg_multiplier  = random.randint(-100, 100)
    sha_multiplier  = random.randint(-100, 100)
    shp_multiplier  = random.randint(-100, 100)
    gwg_multiplier  = random.randint(-100, 100)
    fow_multiplier  = random.randint(-100, 100)
    fol_multiplier  = random.randint(-100, 100)
    shft_multiplier = random.randint(-100, 100)
    sog_multiplier  = random.randint(-100, 100)
    hit_multiplier  = random.randint(-100, 100)
    blk_multiplier  = random.randint(-100, 100)
    defp_multiplier = random.randint(-100, 100)
    
    # Calculate Fantasy Points
    defenceman_records_with_points = calculate_fantasy_points(defenceman_records, defenceman_records_next, 'defenceman')

    # Create DataFrame
    defenceman_df = pd.DataFrame(defenceman_records_with_points)

    # Remove Useless Data
    defenceman_df.drop(['lastName', 'playerId', 'seasonId'], axis=1, inplace=True)

    # Merge DataFrames Together
    defenceman_r2_df = merge_df(defenceman_df, defenceman_r2_df, defenceman_stat_list, columns)

In [None]:
required_defenceman_stats_for_multipliers = ['goals', 'assists', 'points', 'plusMinus', 'penaltyMinutes', 'ppGoals', 'ppAssists', 'ppPoints', 'shGoals', 'shAssists',
                                         'shPoints', 'gameWinningGoals', 'totalFaceoffWins', 'totalFaceoffLosses', 'shifts', 'shots', 'hits', 'blockedShots']

defenceman_comp_stats_df = defenceman_r2_df.apply(pd.Series.describe, axis=1)
defenceman_stats_to_use = defenceman_comp_stats_df.index[(defenceman_comp_stats_df['mean'] >= 0.125) & (defenceman_comp_stats_df['max'] >= 0.375)].tolist()

defenceman_stats_to_use = list(set(required_defenceman_stats_for_multipliers + defenceman_stats_to_use))

print(f'Defenceman Stats to Use: {defenceman_stats_to_use}')

### Goalie Data Exploration

In [None]:
# Download Data
goalie_records, goalie_records_next = gather_goalie_data()

In [None]:
# Calculate Fantasy Points
goalie_records_with_points = calculate_fantasy_points(goalie_records, goalie_records_next, 'goalie')

# Create DataFrame
goalie_df = pd.DataFrame(goalie_records_with_points)

# Remove Useless Data
goalie_df.drop(['lastName', 'playerId', 'seasonId'], axis=1, inplace=True)

# Stat List to Iterate Through
goalie_stat_list = ['assists', 'completeGamePct', 'completeGames', 'evGoalsAgainst', 'evSavePct', 'evSaves', 'evShotsAgainst', 'gamesPlayed',
                    'gamesStarted', 'goalsAgainst', 'goalsAgainstAverage', 'goalsFor', 'goalsForAverage', 'incompleteGames', 'losses',
                    'otLosses', 'penaltyMinutes', 'points', 'ppGoalsAgainst', 'ppSavePct', 'ppSaves', 'ppShotsAgainst', 'qualityStart',
                    'qualityStartsPct', 'regulationLosses', 'regulationWins', 'savePct', 'saves', 'shGoalsAgainst', 'shSavePct', 'shSaves',
                    'shShotsAgainst', 'shotsAgainst', 'shotsAgainstPer60', 'shutouts', 'timeOnIce', 'wins']

# Define Columns for DataFrame
columns = [(f'R2 Value (gs={gs_multiplier},w={w_multiplier},l={l_multiplier},sa={sa_multiplier},ga={ga_multiplier},sv={sv_multiplier},'
            f'so={so_multiplier},otl={otl_multiplier})')]

# Create Initial Data Frame Using Realistic Multipliers
r2_tuples = []
for stat in goalie_stat_list:
    r2_tuples.append(calculate_r2(goalie_df, stat))

goalie_r2_df = pd.DataFrame(r2_tuples, index=goalie_stat_list, columns=columns)

# Add Columns Using Random Multipliers
for i in range(199):
    gs_multiplier   = random.randint(-100, 100)
    w_multiplier    = random.randint(-100, 100)
    l_multiplier    = random.randint(-100, 100)
    sa_multiplier   = random.randint(-100, 100)
    ga_multiplier   = random.randint(-100, 100)
    sv_multiplier   = random.randint(-100, 100)
    so_multiplier   = random.randint(-100, 100)
    otl_multiplier  = random.randint(-100, 100)
    
    # Calculate Fantasy Points
    goalie_records_with_points = calculate_fantasy_points(goalie_records, goalie_records_next, 'goalie')

    # Create DataFrame
    goalie_df = pd.DataFrame(goalie_records_with_points)

    # Remove Useless Data
    goalie_df.drop(['lastName', 'playerId', 'seasonId'], axis=1, inplace=True)

    # Merge DataFrames Together
    goalie_r2_df = merge_df(goalie_df, goalie_r2_df, goalie_stat_list, columns)

In [None]:
required_goalie_stats_for_multipliers = ['gamesStarted', 'wins', 'losses', 'shotsAgainst', 'goalsAgainst', 'saves', 'shutouts', 'otLosses']

goalie_comp_stats_df = goalie_r2_df.apply(pd.Series.describe, axis=1)
goalie_stats_to_use = goalie_comp_stats_df.index[(goalie_comp_stats_df['mean'] >= 0.125) & (goalie_comp_stats_df['max'] >= 0.375)].tolist()

goalie_stats_to_use = list(set(required_goalie_stats_for_multipliers + goalie_stats_to_use))

print(f'Goalie Stats to Use: {goalie_stats_to_use}')