In [74]:
import pandas as pd
import datetime
from random import randint
from time import sleep
import os
import requests
import json
import matplotlib.pyplot as plt
import numpy as np
import hvplot.pandas
import geopandas as gpd
import seaborn as sns
from scipy.stats import pearsonr, mannwhitneyu

In [75]:
# Get game dates of games played

url = "https://en.wikipedia.org/wiki/List_of_Stanley_Cup_champions"

stanleycup_champs = pd.read_html(url)

stanleycup_byyear_df = stanleycup_champs[2]
stanleycup_byyear_df.head()

Unnamed: 0,Year,Winning team,Coach,Games,Losing team,Coach.1,Winning goal
0,1927,"Ottawa Senators (C) (5, 4–1)",Dave Gill,2–0,"Boston Bruins (A) (1, 0–1)",Art Ross,"Cy Denneny (7:30, second)"
1,1928,"New York Rangers (A) (1, 1–0)",Lester Patrick-playing,3–2,"Montreal Maroons (C) (2, 1–1)",Eddie Gerard,"Frank Boucher (3:35, third)"
2,1929,"Boston Bruins (A) (2, 1–1)",Art Ross,2–0,"New York Rangers (A) (2, 1–1)",Lester Patrick,"Bill Carson (18:02, third)"
3,1930,"Montreal Canadiens (C) (6, 3–2)",Cecil Hart,2–0,"Boston Bruins (A) (3, 1–2)",Art Ross,"Howie Morenz (1:00, second)"
4,1931,"Montreal Canadiens (C) (7, 4–2)",Cecil Hart,3–2,"Chicago Black Hawks (A) (1, 0–1)",Dick Irvin,"Johnny Gagnon (9:59, second)"


In [76]:
# Find Winning Teams from 1991-2002

stanleycup_winner_cleaned_df = stanleycup_byyear_df.loc[stanleycup_byyear_df['Year'].between(1992,2022)]
stanleycup_winner_cleaned_df = stanleycup_winner_cleaned_df[["Year", "Winning team"]]

stanleycup_winner_cleaned_df.reset_index(drop=True, inplace=True)

stanleycup_winner_cleaned_df["Year"] = (stanleycup_winner_cleaned_df["Year"] -1).map(str) + (stanleycup_winner_cleaned_df["Year"]).map(str)
stanleycup_winner_cleaned_df["Winning team"] = stanleycup_winner_cleaned_df["Winning team"].str.split('(').str[0]

stanleycup_winner_cleaned_df.drop(stanleycup_winner_cleaned_df[(stanleycup_winner_cleaned_df['Year'] == "20042005")].index, inplace=True)
stanleycup_winner_cleaned_df.rename(columns={"Winning team":"Team Name"}, inplace=True)
stanleycup_winner_cleaned_df["Team Name"] = stanleycup_winner_cleaned_df["Team Name"].str.strip()

# Change names 
stanleycup_winner_cleaned_df["Team Name"] = stanleycup_winner_cleaned_df["Team Name"].str.replace("Montreal Canadiens", "Montréal Canadiens")
stanleycup_winner_cleaned_df

Unnamed: 0,Year,Team Name
0,19911992,Pittsburgh Penguins
1,19921993,Montréal Canadiens
2,19931994,New York Rangers
3,19941995,New Jersey Devils
4,19951996,Colorado Avalanche
5,19961997,Detroit Red Wings
6,19971998,Detroit Red Wings
7,19981999,Dallas Stars
8,19992000,New Jersey Devils
9,20002001,Colorado Avalanche


In [77]:
# Find Losing Team from 1991-2002

stanleycup_losing_cleaned_df = stanleycup_byyear_df.loc[stanleycup_byyear_df['Year'].between(1992,2022)]
stanleycup_losing_cleaned_df = stanleycup_losing_cleaned_df[["Year", "Losing team"]]
stanleycup_losing_cleaned_df.reset_index(drop=True, inplace=True)

stanleycup_losing_cleaned_df["Year"] = (stanleycup_losing_cleaned_df["Year"] -1).map(str) + (stanleycup_losing_cleaned_df["Year"]).map(str)

stanleycup_losing_cleaned_df["Losing team"] = stanleycup_losing_cleaned_df["Losing team"].str.split('(').str[0]

stanleycup_losing_cleaned_df.drop(stanleycup_losing_cleaned_df[(stanleycup_losing_cleaned_df['Year'] == "20042005")].index, inplace=True)

stanleycup_losing_cleaned_df.rename(columns={"Losing team":"Team Name"}, inplace=True)
stanleycup_losing_cleaned_df["Team Name"] = stanleycup_losing_cleaned_df["Team Name"].str.strip()

# Change names 
stanleycup_losing_cleaned_df["Team Name"] = stanleycup_losing_cleaned_df["Team Name"].str.replace("Montreal Canadiens", "Montréal Canadiens")
stanleycup_losing_cleaned_df["Team Name"] = stanleycup_losing_cleaned_df["Team Name"].str.replace("Minnesota North Stars", "Dallas Stars")
stanleycup_losing_cleaned_df["Team Name"] = stanleycup_losing_cleaned_df["Team Name"].str.replace("Mighty Ducks of Anaheim", "Anaheim Ducks")

stanleycup_losing_cleaned_df

Unnamed: 0,Year,Team Name
0,19911992,Chicago Blackhawks
1,19921993,Los Angeles Kings
2,19931994,Vancouver Canucks
3,19941995,Detroit Red Wings
4,19951996,Florida Panthers
5,19961997,Philadelphia Flyers
6,19971998,Washington Capitals
7,19981999,Buffalo Sabres
8,19992000,Dallas Stars
9,20002001,New Jersey Devils


In [78]:
# Get Team Name and associated with its ID in the NHL API JSON request

teamID_url = "https://statsapi.web.nhl.com/api/v1/teams"
   
response = requests.get(teamID_url)

response_json = response.json()
team_info = {}

for team in response_json["teams"]:
    team_id = team["id"]
    team_name = team["name"]
    team_info[team_id] = team_name
   
team_info_df = pd.DataFrame.from_dict(team_info, orient="index", columns=["Team Name"])
team_info_df.reset_index(inplace=True)
team_info_df.columns = ["Team ID", "Team Name"]

team_info_df

Unnamed: 0,Team ID,Team Name
0,1,New Jersey Devils
1,2,New York Islanders
2,3,New York Rangers
3,4,Philadelphia Flyers
4,5,Pittsburgh Penguins
5,6,Boston Bruins
6,7,Buffalo Sabres
7,8,Montréal Canadiens
8,9,Ottawa Senators
9,10,Toronto Maple Leafs


In [79]:
# Merge dataframes to associate the Winning Team and Season won with the Team ID

stanleycup_winning_merge_byname_ID = pd.DataFrame.merge(stanleycup_winner_cleaned_df,team_info_df, on='Team Name', how='left')
stanleycup_winning_merge_byname_ID

Unnamed: 0,Year,Team Name,Team ID
0,19911992,Pittsburgh Penguins,5
1,19921993,Montréal Canadiens,8
2,19931994,New York Rangers,3
3,19941995,New Jersey Devils,1
4,19951996,Colorado Avalanche,21
5,19961997,Detroit Red Wings,17
6,19971998,Detroit Red Wings,17
7,19981999,Dallas Stars,25
8,19992000,New Jersey Devils,1
9,20002001,Colorado Avalanche,21


In [80]:
# Merge dataframes to associate the Losing Team and Season lost with the Team ID

stanleycup_losing_merge_byname_ID = pd.DataFrame.merge(stanleycup_losing_cleaned_df,team_info_df, on="Team Name", how='left')
stanleycup_losing_merge_byname_ID

Unnamed: 0,Year,Team Name,Team ID
0,19911992,Chicago Blackhawks,16
1,19921993,Los Angeles Kings,26
2,19931994,Vancouver Canucks,23
3,19941995,Detroit Red Wings,17
4,19951996,Florida Panthers,13
5,19961997,Philadelphia Flyers,4
6,19971998,Washington Capitals,15
7,19981999,Buffalo Sabres,7
8,19992000,Dallas Stars,25
9,20002001,New Jersey Devils,1


In [81]:
# Get Roster for each Winning team per Season.  

player_list = []
base_url = "https://statsapi.web.nhl.com/api/v1/teams/{}?expand=team.roster&season={}"
for index, row in stanleycup_winning_merge_byname_ID.iterrows():
    team_ids = row ["Team ID"]
    # print(team_ids)
    year_cl= row ["Year"]
    # print(year_cl)
    url = base_url.format(team_ids, year_cl)
    # print(url)
    response = requests.get(url)
    data = response.json()
    
    for player in data["teams"][0]["roster"]["roster"]:
        
        person = player["person"]["fullName"]
        # Create dictionary
        player_id = player["person"]["id"]
        # Printing for testing purposes
        #print("Name:", person)
        #print("ID:", player_id)
        player_list.append({
            "Name":person,
            "Player ID":player_id,
            "Team ID": team_ids,
            "Season":year_cl
        })
winning_df=pd.DataFrame(player_list)
winning_df.head()
     

Unnamed: 0,Name,Player ID,Team ID,Season
0,Phil Bourque,8445629,5,19911992
1,Jay Caufield,8446013,5,19911992
2,Jeff Chychrun,8446050,5,19911992
3,Paul Coffey,8446117,5,19911992
4,Jeff Daniels,8446305,5,19911992


In [82]:
# Get Roster for each Losing team per Season
player_list = []
base_url = "https://statsapi.web.nhl.com/api/v1/teams/{}?expand=team.roster&season={}"
for index, row in stanleycup_losing_merge_byname_ID.iterrows():
    team_ids = row ["Team ID"]
    # print(team_ids)
    year_cl= row ["Year"]
    # print(year_cl)
    url = base_url.format(team_ids, year_cl)
    # print(url)
    response = requests.get(url)
    data = response.json()
    
    for player in data["teams"][0]["roster"]["roster"]:
        
        person = player["person"]["fullName"]
        # Create dictionary
        player_id = player["person"]["id"]
        # Printing for testing purposes
        #print("Name:", person)
        #print("ID:", player_id)
        player_list.append({
            "Name":person,
            "Player ID":player_id,
            "Team ID": team_ids,
            "Season":year_cl
        })
losing_df=pd.DataFrame(player_list)
losing_df.head()

Unnamed: 0,Name,Player ID,Team ID,Season
0,Rob Brown,8445689,16,19911992
1,Keith Brown,8445713,16,19911992
2,Rod Buskas,8445796,16,19911992
3,Shawn Byram,8445823,16,19911992
4,Chris Chelios,8446053,16,19911992


In [83]:
   
# Player Profile - Winning Team per Season

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}"
player_ages = []
player_nationalities = []

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    url = base_url.format(player_id)
    response = requests.get(url)
    data = response.json()

    player_age = data['people'][0]['birthDate']
    player_nationality = data['people'][0]['nationality']
    player_birthyear_yyyy = player_age[:4]
    winning_season_yyyy = row["Season"][4:]

    winning_age = (int(winning_season_yyyy)) - (int(player_birthyear_yyyy))
    
    player_ages.append(winning_age)
    player_nationalities.append(player_nationality)


In [84]:
winning_df["Age"] = player_ages
winning_df["Nationality"] = player_nationalities
winning_df.head()

Unnamed: 0,Name,Player ID,Team ID,Season,Age,Nationality
0,Phil Bourque,8445629,5,19911992,30,USA
1,Jay Caufield,8446013,5,19911992,32,USA
2,Jeff Chychrun,8446050,5,19911992,26,CAN
3,Paul Coffey,8446117,5,19911992,31,CAN
4,Jeff Daniels,8446305,5,19911992,24,CAN


In [85]:
# Player Profile - Losing Team per Season
base_url = "https://statsapi.web.nhl.com/api/v1/people/{}"
player_ages = []
player_nationalities = []
for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    url = base_url.format(player_id)
    response = requests.get(url)
    data = response.json()

    player_age = data['people'][0]['birthDate']
    
    player_nationality = data['people'][0]['nationality']
    player_birthyear_yyyy = player_age[:4]
    losing_season_yyyy = row["Season"][4:]

    losing_age = (int(losing_season_yyyy)) - (int(player_birthyear_yyyy))
    
    player_ages.append(losing_age)
    player_nationalities.append(player_nationality)

KeyboardInterrupt: 

In [None]:
losing_df["Age"] = player_ages
losing_df["Nationality"] = player_nationalities
losing_df.head()

In [None]:
# Get averave age per season of winning seasons

for row in winning_df:
    avg_player_age = winning_df. groupby(by='Season')['Age'].mean().round()
avg_player_age_winning_df = pd.DataFrame(avg_player_age)
avg_player_age_winning_df

In [None]:
# Get averave age per season of losing seasons
for row in losing_df:
    avg_player_age = losing_df. groupby(by='Season')['Age'].mean().round()
    
avg_player_age_losing_df = pd.DataFrame(avg_player_age)
avg_player_age_losing_df

In [None]:
# Nationality Count from 19911992 season

player_nat_1992 = winning_df.loc[winning_df['Season'] == "19911992"]["Nationality"]
player_nat_1992.value_counts()


In [None]:
# Nationality Count from 20212022 season

player_nat_2022 = winning_df.loc[winning_df['Season'] == "20212022"]["Nationality"]
player_nat_2022.value_counts()


In [None]:
# Building Penalty In Minutes (PIM) for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
pim=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        pim_data = data["stats"][0]["splits"][0]['stat']['pim']
        print(pim_data)
        pim.append(pim_data)
        
    except:
        print("not found")
        pim.append(0)    
        pass

In [None]:
# Building Penalty In Minutes (PIM) for Losing DF

Lpim=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        pim_data = data["stats"][0]["splits"][0]['stat']['pim']
        print(pim_data)
        Lpim.append(pim_data)
        
    except:
        print("not found")
        Lpim.append(0)    
        pass

In [None]:
# Building Power Play Goals for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
power_play_goal=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        ppg_data = data["stats"][0]["splits"][0]['stat']['powerPlayGoals']
        print(ppg_data)
        power_play_goal.append(ppg_data)
        
    except:
        print("not found")
        power_play_goal.append(0)    
        pass

In [None]:
# Building Power Play Goals for Losing DF

Lpower_play_goal=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        ppg_data = data["stats"][0]["splits"][0]['stat']['powerPlayGoals']
        print(ppg_data)
        Lpower_play_goal.append(ppg_data)
        
    except:
        print("not found")
        Lpower_play_goal.append(0)    
        pass

In [None]:
# Building Over Time Goal for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
over_time_goal=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        otg_data = data["stats"][0]["splits"][0]['stat']['overTimeGoals']
        print(otg_data)
        over_time_goal.append(otg_data)
        
    except:
        print("not found")
        over_time_goal.append(0)    
        pass

In [None]:
# Building Power Play Goals for Losing DF

Lover_time_goal=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        otg_data = data["stats"][0]["splits"][0]['stat']['overTimeGoals']
        print(otg_data)
        Lover_time_goal.append(otg_data)
        
    except:
        print("not found")
        Lover_time_goal.append(0)    
        pass

In [None]:
# Building Plus Minus (+/-) for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
plus_mins=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        pm_data = data["stats"][0]["splits"][0]['stat']['plusMinus']
        print(pm_data)
        plus_mins.append(pm_data)
        
    except:
        print("not found")
        plus_mins.append(0)    
        pass

In [None]:
# Building Plus Minus (+/-) for Losing DF

Lplus_mins=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        pm_data = data["stats"][0]["splits"][0]['stat']['plusMinus']
        print(pm_data)
        Lplus_mins.append(pm_data)
        
    except:
        print("not found")
        Lplus_mins.append(0)    
        pass

In [None]:
# Building Points for Winning DF

base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}" 
points=[]

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        points_data = data["stats"][0]["splits"][0]['stat']['points']
        print(points_data)
        points.append(points_data)
        
    except:
        print("not found")
        points.append(0)    
        pass

In [None]:
# Building Points for Losing DF

Lpoints=[]

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id,year)
    response = requests.get(url)
    data = response.json()
    try:
        points_data = data["stats"][0]["splits"][0]['stat']['points']
        print(points_data)
        Lpoints.append(points_data)
        
    except:
        print("not found")
        Lpoints.append(0)    
        pass

In [None]:
winning_df['PIM'] = pim 
winning_df['PPG'] = power_play_goal
winning_df['OTG'] = over_time_goal
winning_df['+/-'] = plus_mins
winning_df['Pts'] = points
winning_df.head()

In [None]:
losing_df['PIM'] = Lpim 
losing_df['PPG'] = Lpower_play_goal
losing_df['OTG'] = Lover_time_goal
losing_df['+/-'] = Lplus_mins
losing_df['Pts'] = Lpoints
losing_df.head()

In [None]:
# Get averave PIM of winning team over 29 seasons

for row in winning_df:
    avg_player_pim = winning_df. groupby(by='Season')['PIM'].mean().round()
avg_player_pim_winning_df = pd.DataFrame(avg_player_pim)
avg_player_pim_winning_df.mean()

In [None]:
# Get averave PIM of losing team over 29 seasons

for row in losing_df:
    avg_player_pim = losing_df. groupby(by='Season')['PIM'].mean().round()
avg_player_pim_losing_df = pd.DataFrame(avg_player_pim)
avg_player_pim_losing_df.mean()

In [None]:
# Get averave OT goals of winning team over 29 seasons

for row in winning_df:
    avg_player_otg = winning_df. groupby(by='Season')['OTG'].mean()
avg_player_otg_winning_df = pd.DataFrame(avg_player_otg)
avg_player_otg_winning_df.mean()

In [None]:
# Get averave OT goals of losing team over 29 seasons

for row in losing_df:
    avg_player_otg = losing_df. groupby(by='Season')['OTG'].mean()
avg_player_otg_losing_df = pd.DataFrame(avg_player_otg)
avg_player_otg_losing_df.mean()

In [None]:
# Get averave PP goals of winning team over 29 seasons

for row in winning_df:
    avg_player_ppg = winning_df. groupby(by='Season')['PPG'].mean().round()
avg_player_ppg_winning_df = pd.DataFrame(avg_player_ppg)
avg_player_ppg_winning_df.mean()

In [None]:
# Get averave PP goals of losing team over 29 seasons

for row in losing_df:
    avg_player_ppg = losing_df. groupby(by='Season')['PPG'].mean().round()
avg_player_ppg_losing_df = pd.DataFrame(avg_player_ppg)
avg_player_ppg_losing_df.mean()

In [None]:
# Get averave points of winning team over 29 seasons

for row in winning_df:
    avg_player_pts = winning_df. groupby(by='Season')['Pts'].mean().round()
avg_player_pts_winning_df = pd.DataFrame(avg_player_pts)
avg_player_pts_winning_df.mean()

In [None]:
# Get averave points of losing team over 29 seasons

for row in losing_df:
    avg_player_pts = losing_df. groupby(by='Season')['Pts'].mean().round()
avg_player_pts_losing_df = pd.DataFrame(avg_player_pts)
avg_player_pts_losing_df.mean()

In [None]:
# Get averave +/- of winning team over 29 seasons

for row in winning_df:
    avg_player_plusminus = winning_df. groupby(by='Season')['+/-'].mean().round()
avg_player_plusminus_winning_df = pd.DataFrame(avg_player_plusminus)
avg_player_plusminus_winning_df.mean()

In [None]:
# Get averave +/- of losing team over 29 seasons

for row in losing_df:
    avg_player_plusminus = losing_df. groupby(by='Season')['+/-'].mean().round()
avg_player_plusminus_losing_df = pd.DataFrame(avg_player_plusminus)
avg_player_plusminus_losing_df.mean()

In [None]:
# Pie chart, then and now comparison of nationality breakdown for the 1992 and 2022 winning teams
# Labels for the sections of our pie chart
labels = ["Europe", "Canada", 'USA']

# The values of each section of the pie chart.  Player total count value for each continent
sizes = [6, 21, 10]

# The colors of each section of the pie chart
colors = ["yellow", "red", 'lightblue']

# Tells matplotlib to separate the "Humans" section from the others
explode = (0.1, 0, 0)

# Labels for current the sections of our pie chart
labels2 = ["Europe", "Canada", 'USA']

# The values of each section of the pie chart.  Player total count value for each continent
sizes2 = [12, 17, 10]

# The colors of each section of the pie chart
colors2 = ["yellow", "red", "lightblue"]

# Tells matplotlib to separate the "Humans" section from the others
explode2 = (0.1, 0, 0)

# Create a figure with two subplots arranged side by side
fig, (ax1, ax2) = plt.subplots(1, 2)

# Plot the first pie chart in the left subplot
ax1.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
ax1.set_title("1992 player distribution")

# Plot the second pie chart in the right subplot
ax2.pie(sizes2, explode=explode2, labels=labels2, colors=colors2, autopct='%1.1f%%', startangle=90)
ax2.set_title("2022 player distribution")

# Adjust the spacing between subplots
fig.tight_layout()

# Save the figure
##plt.savefig("output_data/Fig1.png")

# Display the figure with the two pie charts
plt.show()

In [None]:
# line graph with trendline to identify geographical shifts in player's nationality/hockey program per country
nationalities = ['USA', 'Canada', 'Europe']
decades = [2002, 2012, 2022]
data_points = {
    'USA': [73, 67, 88],
    'Canada': [211, 162, 163],
    'Europe': [86, 65, 91],  
}

# Create line graph
for nationality in nationalities:
    plt.plot(decades, data_points[nationality], label=nationality)

# Add trendline (linear regression)
for nationality in nationalities:
    z = np.polyfit(decades, data_points[nationality], 1)
    p = np.poly1d(z)
    plt.plot(decades, p(decades), '--')

# Set labels and title
plt.xlabel('Decades')
plt.ylabel('Number of Players')
plt.title('Winning Team Player Nationality Breakdown Trend')

# Set x-axis tick positions and labels
plt.xticks(decades, ['2002', '2012', '2022'])

# Remove decimals from y-axis
plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda value, tick_number: f'{int(value)}'))

# Add legend
plt.legend()

# Save the figure
##plt.savefig("output_data/Fig2.png")

# Display the plot
plt.show()

In [None]:
# plus minus win
base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}"
plus_mins_win = []

for index, row in winning_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id, year)
    response = requests.get(url)
    data = response.json()
    
    try:
        pm_data_win = data["stats"][0]["splits"][0]['stat']['plusMinus']
        #print(pm_data_win)
        plus_mins_win.append(pm_data_win)  # Store each value in a separate list
        
    except:
        #print("not found")
        plus_mins_win.append(0)  # Store 0 for missing values
        
overall_mean_win = sum(plus_mins_win) / len(plus_mins_win)
overall_mean_win

In [None]:
# plus minus lose
base_url = "https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=statsSingleSeason&season={}"
plus_mins_lose = []

for index, row in losing_df.iterrows():
    player_id = row["Player ID"]
    year = row["Season"]
    url = base_url.format(player_id, year)
    response = requests.get(url)
    data = response.json()
    
    try:
        pm_data_lose = data["stats"][0]["splits"][0]['stat']['plusMinus']
        #print(pm_data_lose)
        plus_mins_lose.append(pm_data_lose)  # Store each value in a separate list
        
    except:
        #print("not found")
        plus_mins_lose.append(0)  # Store 0 for missing values
        
overall_mean_lose = sum(plus_mins_lose) / len(plus_mins_lose)
overall_mean_lose


In [None]:
#Scatter plot for losing team +/-
# Remove gridlines
plt.grid(False)

# Scatter plot for losing team
plt.scatter(range(len(plus_mins_lose)), plus_mins_lose, color='red', label='Losing team')

# Calculate correlation coefficient and p-value
corr, p_value = pearsonr(range(len(plus_mins_lose)), plus_mins_lose)

# Add correlation and p-value to the plot
plt.text(0.95, 0.05, f"Correlation: {corr:.2f}\nP-value: {p_value:.2e}", transform=plt.gca().transAxes,
         ha='right', va='bottom', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))

# Set plot title and labels
plt.title('Plus/Minus Data')
plt.xlabel('Index')
plt.ylabel('Plus/Minus')

# Display legend
plt.legend()

# Show the plot
plt.show()

# Save the figure
##plt.savefig("output_data/Fig3.png")

In [None]:
# scatter plot for winning team +/-
# Remove gridlines
plt.grid(False)

# Scatter plot for losing team
plt.scatter(range(len(plus_mins_win)), plus_mins_win, color='blue', label='Winning team')

# Calculate correlation coefficient and p-value
corr, p_value = pearsonr(range(len(plus_mins_win)), plus_mins_win)

# Add correlation and p-value to the plot
plt.text(0.95, 0.05, f"Correlation: {corr:.2f}\nP-value: {p_value:.2e}", transform=plt.gca().transAxes,
         ha='right', va='bottom', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))

# Set plot title and labels
plt.title('Plus/Minus Data')
plt.xlabel('Index')
plt.ylabel('Plus/Minus')

# Display legend
plt.legend()

# Show the plot
plt.show()


# Save the figure
##plt.savefig("output_data/Fig4.png")

In [None]:
# scatter plot comparison, winning vs losing with mannwhitneyu pvalue calculation
# Remove gridlines
plt.grid(False)

# Scatter plot for losing team
plt.scatter(range(len(plus_mins_lose)), plus_mins_lose, color='red', label='Losing team')

# Scatter plot for winning team
plt.scatter(range(len(plus_mins_win)), plus_mins_win, color='blue', label='Winning team')

# Calculate correlation coefficient and p-value
corr, p_value = pearsonr(range(len(plus_mins_lose)), plus_mins_lose)

# Calculate Mann-Whitney U test for p-value
mwu, mwu_pvalue = mannwhitneyu(plus_mins_win, plus_mins_lose, alternative='two-sided')

# Add correlation and p-value to the plot
text_box_props = dict(facecolor='white', edgecolor='black', boxstyle='round')
plt.text(0.95, 0.05, f"Correlation: {corr:.2f}\nMann-Whitney U p-value: {mwu_pvalue:.2e}", transform=plt.gca().transAxes,
         ha='right', va='bottom', bbox=text_box_props)

# Set plot title and labels
plt.title('Plus/Minus Data')
plt.xlabel('Index')
plt.ylabel('Plus/Minus')

# Create a legend with a box
legend_box_props = dict(facecolor='white', edgecolor='black', boxstyle='round')
plt.legend(prop={'size': 12}, loc='upper right', bbox_to_anchor=(1.0, 1.0), borderaxespad=0., framealpha=1.0, frameon=True, handlelength=1, handletextpad=0.5, labelspacing=0.5, borderpad=0.5, fancybox=True)

# Add box around the legend
legend_box = plt.gca().get_legend()
legend_box.set_frame_on(True)
legend_box.get_frame().set_edgecolor('black')
legend_box.get_frame().set_facecolor('white')
legend_box.get_frame().set_linestyle('-')

# Show the plot
plt.tight_layout()
plt.show()
# Save the figure
##plt.savefig("output_data/Fig5.png")

In [None]:
#box plot for winning team +/-
#Create the figure and axes
fig, ax = plt.subplots(figsize=(20, 15))

# Create the boxplot for the winning team with blue fill color
boxplot = ax.boxplot(plus_mins_win, labels=["Winning"], patch_artist=True)
colors = ['blue']
for patch, color in zip(boxplot['boxes'], colors):
    patch.set_facecolor(color)

# Set the color and opacity of the outliers to blue
for flier in boxplot['fliers']:
    flier.set(marker='o', color='blue', alpha=1.0)

# Set the mean line color to orange and make it bold
for median in boxplot['medians']:
    median.set(color='orange', linewidth=2)

# Calculate correlation coefficient and p-value
corr, p_value = pearsonr(range(len(plus_mins_win)), plus_mins_win)

# Add correlation and p-value to the plot with larger font size
ax.text(0.95, 0.05, f"Correlation: {corr:.2f}\nP-value: {p_value:.2e}", transform=ax.transAxes,
        ha='right', va='bottom', fontsize=20)

# Set the plot title and labels with larger font size
ax.set_title("Plus/Minus Boxplot (Winning Team)", fontsize=30)
ax.set_xlabel("Team", fontsize=30)
ax.set_ylabel("Plus/Minus", fontsize=30)

# Set the font size of the tick labels
ax.tick_params(axis='both', labelsize=30)

# Display the plot
plt.show()


# Save the figure
##plt.savefig("output_data/Fig6.png")

In [None]:
#boxplot for losing team +/-
# Create the figure and axes
fig, ax = plt.subplots(figsize=(20, 15))

# Create the boxplot for the winning team with blue fill color
boxplot = ax.boxplot(plus_mins_lose, labels=["Losing"], patch_artist=True)
colors = ['orange']
for patch, color in zip(boxplot['boxes'], colors):
    patch.set_facecolor(color)

# Set the color and opacity of the outliers to blue
for flier in boxplot['fliers']:
    flier.set(marker='o', color='orange', alpha=1.0)

# Set the mean line color to orange and make it bold
for median in boxplot['medians']:
    median.set(color='black', linewidth=2)

# Calculate correlation coefficient and p-value
corr, p_value = pearsonr(range(len(plus_mins_lose)), plus_mins_lose)

# Add correlation and p-value to the plot with larger font size
ax.text(0.95, 0.05, f"Correlation: {corr:.2f}\nP-value: {p_value:.2e}", transform=ax.transAxes,
        ha='right', va='bottom', fontsize=20)

# Set the plot title and labels with larger font size
ax.set_title("Plus/Minus Boxplot (Losing Team)", fontsize=30)
ax.set_xlabel("Team", fontsize=30)
ax.set_ylabel("Plus/Minus", fontsize=30)

# Set the font size of the tick labels
ax.tick_params(axis='both', labelsize=30)

# Display the plot
plt.show()
# Save the figure
##plt.savefig("output_data/Fig7.png")

In [None]:
# boxplot for winning vs losing team +/-, using mannwhitneyu calc for pvalue
# Create the figure and axes
fig, ax = plt.subplots(figsize=(20, 15))

# Create the boxplot for the winning team with blue fill color
boxplot1 = ax.boxplot(plus_mins_win, positions=[1], labels=["Winning"], patch_artist=True)
colors1 = ['blue']
for patch, color in zip(boxplot1['boxes'], colors1):
    patch.set_facecolor(color)

# Set the color and opacity of the outliers to blue
for flier in boxplot1['fliers']:
    flier.set(marker='o', color='blue', alpha=1.0)

# Set the mean line color to orange and make it bold
for median in boxplot1['medians']:
    median.set(color='orange', linewidth=2)

# Create the boxplot for the losing team with orange fill color
boxplot2 = ax.boxplot(plus_mins_lose, positions=[2], labels=["Losing"], patch_artist=True)
colors2 = ['orange']
for patch, color in zip(boxplot2['boxes'], colors2):
    patch.set_facecolor(color)

# Set the color and opacity of the outliers to orange
for flier in boxplot2['fliers']:
    flier.set(marker='o', color='orange', alpha=1.0)

# Set the mean line color to black and make it bold
for median in boxplot2['medians']:
    median.set(color='black', linewidth=2)

# Calculate correlation coefficient and p-value using Mann-Whitney U test
statistic, p_value = mannwhitneyu(plus_mins_win, plus_mins_lose, alternative='two-sided')

# Add correlation and p-value to the plot with larger font size
ax.text(0.95, 0.05, f"Mann-Whitney U test:\nCorrelation: N/A\nP-value: {p_value:.2e}",
        transform=ax.transAxes, ha='right', va='bottom', fontsize=20)

# Set the plot title and labels with larger font size
ax.set_title("Plus/Minus Boxplots", fontsize=30)
ax.set_xlabel("Team", fontsize=30)
ax.set_ylabel("Plus/Minus", fontsize=30)

# Set the font size of the tick labels
ax.tick_params(axis='both', labelsize=30)

# Set the x-axis limits
ax.set_xlim(0.5, 2.5)

# Display the plot
plt.show()
# Save the figure
##plt.savefig("output_data/Fig8.png")

In [None]:
# Comparison bar graph for defined stats, winning vs losing team
categories = ['Age','Plus/Minus', 'PIM', 'OT Goals', 'PP Goals', 'Points']
winning_stats = [28.1, 4.5, 34.36, 0, 2.0, 21.9]
losing_stats = [27.8, 2.1, 35.03, 0, 2.1, 20.3]

# Set the positions of the bars on the x-axis
bar_width = 0.35
bar_positions = np.arange(len(categories))

# Create the figure and axes
fig, ax = plt.subplots()

# Plot the winning team's stats
ax.bar(bar_positions, winning_stats, bar_width, label='Winning Team')

# Plot the losing team's stats
ax.bar(bar_positions + bar_width, losing_stats, bar_width, label='Losing Team')

# Set labels and title
ax.set_xlabel('Categories')
ax.set_ylabel('Average result per statistic')
ax.set_title('Stats Comparison: Winning Team vs Losing Team')

# Set x-axis tick labels
ax.set_xticks(bar_positions + bar_width / 2)
ax.set_xticklabels(categories)

# Add a legend
ax.legend()

# Save the figure
##plt.savefig("output_data/Fig9.png")

# Display the plot
plt.show()