In [1]:
# Load libraries
import pandas as pd
import glob
import os
import numpy as np
from IPython.display import display, HTML
from datetime import datetime

In [2]:
# Define the folder path containing the Excel files
folder_path = "../../data/instat/"

# Pattern matching Excel files starting with "Games" and ending with ".xlsx"
file_pattern = os.path.join(folder_path, "Skaters*.xlsx")
files = glob.glob(file_pattern)

print("Found files:")
for f in files:
    print(f)

# Define the mapping from part of the filename to Skaters type label
type_mapping = {
    "Total": "Total",
    "EV": "Even Strength",
    "PP": "Power Play",
    "PK": "Penalty Kill"
}

# List to collect DataFrames after processing each file
dfs = []

# Process each file
for file in files:
    # Read the file using the openpyxl engine
    df = pd.read_excel(file, engine='openpyxl')
    print(f"\nProcessing file: {file}")
    print("Original DataFrame:")
    print(df.head())

    # Infer the game type from the filename.
    base_name = os.path.basename(file)
    skater_type = None
    for key, label in type_mapping.items():
        if key in base_name:
            skater_type = label
            break
    # Fallback if no matching key is found.
    if skater_type is None:
        skater_type = "Unknown"

    # Add a new column indicating what the file represents
    df['Type'] = skater_type

    # Append the processed DataFrame to our list
    dfs.append(df)

Found files:
../../data/instat\SkatersEV.xlsx
../../data/instat\SkatersPK.xlsx
../../data/instat\SkatersPP.xlsx
../../data/instat\SkatersTotal.xlsx

Processing file: ../../data/instat\SkatersEV.xlsx
Original DataFrame:
   Shirt number            Player Position Time on ice  Games played  \
0             6     Alyssa Wilson        F       15:20            23   
1            12        Ella Bynan        F       13:58            24   
2            88        Reece Gall        F       11:07            24   
3            42      Averi Reider        F       06:21            23   
4            91  Olivia Sharkoski        F       10:54            24   

   All shifts Goals First assist Second assist Assists  ...  \
0          17  0.04         0.04          0.04    0.09  ...   
1          16     -         0.08          0.13    0.21  ...   
2          12  0.08            -             -       -  ...   
3           7  0.04            -             -       -  ...   
4          15  0.13         0.04 

In [3]:
skaters_df = pd.concat(dfs, ignore_index=True)

# Display the DataFrame as a scrollable HTML table
display(HTML(skaters_df.to_html(index=False, max_rows=5, max_cols=None)))

Shirt number,Player,Position,Time on ice,Games played,All shifts,Goals,First assist,Second assist,Assists,Points,+/-,Scoring chances,Penalties drawn,Penalty time,Faceoffs,Faceoffs won,"Faceoffs won, %",Hits,Shots,Shots on goal,Blocked shots,Power play shots,Short-handed shots,Passes to the slot,Faceoffs in DZ,Faceoffs won in DZ,"Faceoffs won in DZ, %",Faceoffs in NZ,Faceoffs won in NZ,"Faceoffs won in NZ, %",Faceoffs in OZ,Faceoffs won in OZ,"Faceoffs won in OZ, %",Puck touches,Puck control time,Plus,Minus,Penalties,Faceoffs lost,Hits against,Error leading to goal,Dump ins,Dump outs,Team goals when on ice,Opponent's goals when on ice,Power play,Successful power play,Power play time,Short-handed,Penalty killing,Short-handed time,Missed shots,% shots on goal,Slapshot,Wrist shot,Shootouts,Shootouts scored,Shootouts missed,1-on-1 shots,1-on-1 goals,"Shots conversion 1 on 1, %",Positional attack shots,Shots 5 v 5,Counter-attack shots,xG per shot,xG (Expected goals),xG per goal,Net xG (xG player on - opp. team's xG),Team xG when on ice,Opponent's xG when on ice,xG conversion,CORSI,CORSI-,CORSI+,"CORSI for, %",Fenwick for,Fenwick against,"Fenwick for, %",Playing in attack,Playing in defense,OZ possession,NZ possession,DZ possession,Puck battles,Puck battles won,"Puck battles won, %",Puck battles in DZ,Puck battles in NZ,Puck battles in OZ,Shots blocking,Dekes,Dekes successful,Dekes unsuccessful,"Dekes successful, %",Passes,Accurate passes,"Accurate passes, %",Pre-shots passes,Pass receptions,Scoring chances - total,Scoring chances - scored,Scoring chances missed,Scoring chances saved,"Scoring Chances, %",Inner slot shots - total,Inner slot shots - scored,Inner slot shots - missed,Inner slot shots - saved,"Inner slot shots, %",Outer slot shots - total,Outer slot shots - scored,Outer slot shots - missed,Outer slot shots - saved,"Outer slot shots, %",Blocked shots from the slot,Blocked shots outside of the slot,Takeaways,Puck retrievals after shots,Opponent’s dump-in retrievals,Takeaways in DZ,Loose puck recovery,Takeaways in NZ,Takeaways in OZ,EV DZ retrievals,Puck losses,Puck losses in DZ,EV OZ retrievals,Puck losses in NZ,Power play retrievals,Penalty kill retrievals,Puck losses in OZ,Entries,Entries via pass,Entries via dump in,Entries via stickhandling,Breakouts,Breakouts via pass,Breakouts via dump out,Breakouts via stickhandling,Date of birth,Nationality,National team,Height,Weight,Contract,Active hand,Type
6,Alyssa Wilson,F,15:20,23,17.0,0.04,0.04,0.04,0.09,0.13,-1.04,0.04,0.04,00:26,0.09,0.04,50%,0.13,2.0,0.74,0.61,-,-,0.17,0.04,0.04,100%,0.04,-,-,-,-,-,57,00:16,0.22,1.26,0.17,0.04,0.04,0.96,1.61,1.96,0.22,1.26,-,-,-,-,-,-,0.65,37%,0.74,0.87,-,-,-,-,-,-,1.17,2.0,0.04,0.31,0.05,0.29,-0.75,0.46,1.22,0.91%,-8.0,18,10.0,35%,7.0,14.0,35%,06:15,07:07,04:56,03:08,07:16,10.0,5.2,54%,7.0,1.61,1.39,0.78,0.26,0.22,0.04,83%,12.0,10.0,83%,0.22,6.0,0.26,0.04,0.04,0.17,17%,0.04,0.04,-,-,100%,0.22,-,0.04,0.17,-,-,0.61,7.0,1.87,1.09,5.2,1.48,1.61,0.39,1.39,4.2,1.3,0.48,1.04,-,-,1.87,1.48,0.52,0.22,0.74,2.3,1.48,0.13,0.7,2006-08-29,[object Object],-,-,-,-,Right,Even Strength
12,Ella Bynan,F,13:58,24,16.0,-,0.08,0.13,0.21,0.21,-0.58,0.38,0.13,00:10,9,4.3,48%,-,1.54,1.17,0.21,-,-,0.25,3.8,1.79,47%,2.8,1.38,49%,2.4,1.17,48%,55,00:14,0.21,0.79,0.04,4.7,0.04,0.08,1.79,0.5,0.21,0.79,-,-,-,-,-,-,0.17,76%,0.54,0.38,-,-,-,-,-,-,0.5,1.54,0.08,1.33,0.12,-,-0.6,0.43,1.03,-,-7.0,17,9.0,36%,7.0,13.0,35%,05:49,06:18,04:28,02:49,06:40,14.0,5.9,42%,6.0,2.0,5.7,0.92,0.63,0.42,0.21,67%,9.0,7.0,71%,0.29,8.0,0.71,-,0.08,0.63,-,0.38,-,0.04,0.33,-,0.33,-,0.04,0.29,-,-,0.21,3.4,1.04,0.04,2.5,0.21,0.67,0.29,0.54,5.3,0.54,0.5,1.63,-,-,3.1,2.0,0.33,0.25,1.46,2.7,1.04,0.04,1.58,-,[object Object],-,-,-,-,Right,Even Strength
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,Lillian Scardicchio,D,10:45,24,13.0,-,0.08,0.08,0.17,0.17,-0.21,0.13,0.08,00:30,0.04,-,-,-,1.29,0.71,0.25,0.08,-,0.25,0.04,-,-,-,-,-,-,-,-,36,00:09,0.25,0.46,0.25,0.04,0.04,0.04,0.92,0.75,0.29,0.46,1.42,0.04,00:47,0.04,0.04,00:00,0.33,55%,0.46,0.38,-,-,-,0.04,-,-,0.54,1.21,0.08,1.07,0.08,-,-0.23,0.38,0.64,-,-2.7,11,8.0,42%,5.3,8.0,40%,04:33,04:40,04:04,02:09,04:32,9.0,4.2,46%,3.5,1.92,3.8,0.33,0.25,0.21,0.04,83%,7.0,5.0,72%,0.29,6.0,0.5,-,0.17,0.21,-,0.13,-,0.04,0.08,-,0.38,-,0.13,0.13,-,0.13,0.13,2.2,1.17,0.08,1.21,1.13,0.71,0.25,0.46,3.9,0.63,0.67,1.25,0.04,-,2.0,1.83,0.33,0.17,1.33,1.38,0.63,0.04,0.71,2006-08-01,[object Object],-,-,-,-,Right,Total
19,Lucianna Szczubiala,D,08:11,24,10.0,-,0.04,0.04,0.08,0.08,-0.29,-,0.04,00:30,-,-,-,0.04,1.13,0.25,0.5,0.21,-,0.08,-,-,-,-,-,-,-,-,-,24,00:04,0.21,0.5,0.25,-,-,0.25,0.29,0.42,0.21,0.5,0.67,-,00:20,0.67,0.67,00:16,0.38,22%,0.71,0.38,-,-,-,-,-,-,0.58,0.92,-,0.22,0.02,-,-0.38,0.23,0.62,-,-4.8,9,4.2,32%,3.1,7.0,32%,03:12,03:55,02:25,01:43,04:02,4.2,2.9,69%,3.2,0.58,0.42,0.58,0.04,0.04,-,100%,5.8,4.2,72%,0.08,2.4,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,0.5,3.8,0.83,0.71,3.2,1.25,0.38,0.17,0.63,1.75,0.63,-,0.71,0.04,0.17,0.42,0.42,0.21,0.08,0.13,0.79,0.5,-,0.29,2006-06-05,[object Object],-,7,2.6,-,Right,Total


In [4]:

skaters_df.to_excel("SkatersCombined.xlsx", index=False)
# skaters_df.to_csv("SkatersCombined.csv", index=False)

In [5]:
skaters_df =  pd.read_excel("SkatersCombined.xlsx")

In [6]:
skaters_df.replace('-', np.nan, inplace=True)

  skaters_df.replace('-', np.nan, inplace=True)


In [7]:
# Count the number of null values in each column
null_counts = skaters_df.isnull().sum()

# Display columns with null values and their counts
null_info = null_counts[null_counts > 0]
print("Columns with null values and their counts:")
print(null_info)

Columns with null values and their counts:
Time on ice        1
Goals             52
First assist      46
Second assist     53
Assists           40
                  ..
Date of birth     48
National team     80
Height            64
Weight            72
Contract          80
Length: 140, dtype: int64


In [8]:
# Replace NAs in time-related columns with "0:00"
# Define time columns to be split
time_columns = [
    'Time on ice', 'Penalty time', 'Puck control time', 'Power play time',
    'Short-handed time', 'Playing in attack', 'Playing in defense',
    'OZ possession', 'NZ possession', 'DZ possession'
]

skaters_df[time_columns] = skaters_df[time_columns].fillna("0:00")

# Replace NAs in percentage-related columns with "0%"
percentage_columns = [col for col in skaters_df.columns if '%' in col]
skaters_df[percentage_columns] = skaters_df[percentage_columns].fillna("0%")

# Replace all remaining NAs with 0
skaters_df.fillna(0, inplace=True)

In [9]:
# Get the data types of the columns in the DataFrame
column_dtypes = skaters_df.dtypes

# Convert the data types Series to a DataFrame for better formatting
column_dtypes_df = column_dtypes.reset_index()
column_dtypes_df.columns = ['Column', 'Data Type']

# Display the DataFrame as a scrollable HTML table
display(HTML(column_dtypes_df[column_dtypes_df['Data Type'] == 'object'].to_html(index=False, max_rows=None, max_cols=None)))

Column,Data Type
Player,object
Position,object
Time on ice,object
Penalty time,object
"Faceoffs won, %",object
"Faceoffs won in DZ, %",object
"Faceoffs won in NZ, %",object
"Faceoffs won in OZ, %",object
Puck control time,object
Power play time,object


In [10]:
# Remove '%' and convert percentage columns to numeric
for col in percentage_columns:
    skaters_df[col] = skaters_df[col].str.rstrip('%').astype(float) * 0.01

print("Percentage columns converted to numeric:")
print(skaters_df[percentage_columns].dtypes)

Percentage columns converted to numeric:
Faceoffs won, %               float64
Faceoffs won in DZ, %         float64
Faceoffs won in NZ, %         float64
Faceoffs won in OZ, %         float64
% shots on goal               float64
Shots conversion 1 on 1, %    float64
CORSI for, %                  float64
Fenwick for, %                float64
Puck battles won, %           float64
Dekes successful, %           float64
Accurate passes, %            float64
Scoring Chances, %            float64
Inner slot shots, %           float64
Outer slot shots, %           float64
dtype: object


In [11]:
# Filter non-numeric columns
non_numeric_columns = skaters_df.select_dtypes(include='object')

# Display the non-numeric columns as a scrollable HTML table
display(HTML(non_numeric_columns.to_html(index=False, max_rows=5, max_cols=None)))

Player,Position,Time on ice,Penalty time,Puck control time,Power play time,Short-handed time,xG conversion,Playing in attack,Playing in defense,OZ possession,NZ possession,DZ possession,Date of birth,Nationality,Active hand,Type
Alyssa Wilson,F,15:20,00:26,00:16,0:00,0:00,0.91%,06:15,07:07,04:56,03:08,07:16,2006-08-29,[object Object],Right,Even Strength
Ella Bynan,F,13:58,00:10,00:14,0:00,0:00,0,05:49,06:18,04:28,02:49,06:40,0,[object Object],Right,Even Strength
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Lillian Scardicchio,D,10:45,00:30,00:09,00:47,00:00,0,04:33,04:40,04:04,02:09,04:32,2006-08-01,[object Object],Right,Total
Lucianna Szczubiala,D,08:11,00:30,00:04,00:20,00:16,0,03:12,03:55,02:25,01:43,04:02,2006-06-05,[object Object],Right,Total


In [12]:
skaters_df['xG conversion'] = skaters_df['xG conversion'].str.rstrip('%').astype(float) * 0.01
print("xG conversion column converted to numeric:")
print(skaters_df['xG conversion'].dtypes)

xG conversion column converted to numeric:
float64


In [13]:
# Define time columns to be split
time_columns = [
    'Time on ice', 'Penalty time', 'Puck control time', 'Power play time',
    'Short-handed time', 'Playing in attack', 'Playing in defense',
    'OZ possession', 'NZ possession', 'DZ possession'
]

# Split the time columns into minutes and seconds
for col in time_columns:
    split_time = skaters_df[col].str.split(':', expand=True)
    skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
    skaters_df[f'{col}_seconds'] = split_time[1].astype(int)

# Display the updated DataFrame with new columns
print(skaters_df[[col] + [f'{col}_minutes', f'{col}_seconds']].head())

  DZ possession  DZ possession_minutes  DZ possession_seconds
0         07:16                      7                     16
1         06:40                      6                     40
2         05:20                      5                     20
3         02:54                      2                     54
4         05:18                      5                     18


  skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
  skaters_df[f'{col}_seconds'] = split_time[1].astype(int)
  skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
  skaters_df[f'{col}_seconds'] = split_time[1].astype(int)
  skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
  skaters_df[f'{col}_seconds'] = split_time[1].astype(int)
  skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
  skaters_df[f'{col}_seconds'] = split_time[1].astype(int)
  skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
  skaters_df[f'{col}_seconds'] = split_time[1].astype(int)
  skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
  skaters_df[f'{col}_seconds'] = split_time[1].astype(int)
  skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
  skaters_df[f'{col}_seconds'] = split_time[1].astype(int)
  skaters_df[f'{col}_minutes'] = split_time[0].astype(int)
  skaters_df[f'{col}_seconds'] = split_time[1].astype(int)
  skaters_df[f'{col}_minutes'] = split_time[0].astype(in

In [14]:
# Drop the time columns from the DataFrame
skaters_df.drop(columns=time_columns, inplace=True)

# Display the updated DataFrame
display(HTML(skaters_df.to_html(index=False, max_rows=5, max_cols=None, notebook=True)))

Shirt number,Player,Position,Games played,All shifts,Goals,First assist,Second assist,Assists,Points,+/-,Scoring chances,Penalties drawn,Faceoffs,Faceoffs won,"Faceoffs won, %",Hits,Shots,Shots on goal,Blocked shots,Power play shots,Short-handed shots,Passes to the slot,Faceoffs in DZ,Faceoffs won in DZ,"Faceoffs won in DZ, %",Faceoffs in NZ,Faceoffs won in NZ,"Faceoffs won in NZ, %",Faceoffs in OZ,Faceoffs won in OZ,"Faceoffs won in OZ, %",Puck touches,Plus,Minus,Penalties,Faceoffs lost,Hits against,Error leading to goal,Dump ins,Dump outs,Team goals when on ice,Opponent's goals when on ice,Power play,Successful power play,Short-handed,Penalty killing,Missed shots,% shots on goal,Slapshot,Wrist shot,Shootouts,Shootouts scored,Shootouts missed,1-on-1 shots,1-on-1 goals,"Shots conversion 1 on 1, %",Positional attack shots,Shots 5 v 5,Counter-attack shots,xG per shot,xG (Expected goals),xG per goal,Net xG (xG player on - opp. team's xG),Team xG when on ice,Opponent's xG when on ice,xG conversion,CORSI,CORSI-,CORSI+,"CORSI for, %",Fenwick for,Fenwick against,"Fenwick for, %",Puck battles,Puck battles won,"Puck battles won, %",Puck battles in DZ,Puck battles in NZ,Puck battles in OZ,Shots blocking,Dekes,Dekes successful,Dekes unsuccessful,"Dekes successful, %",Passes,Accurate passes,"Accurate passes, %",Pre-shots passes,Pass receptions,Scoring chances - total,Scoring chances - scored,Scoring chances missed,Scoring chances saved,"Scoring Chances, %",Inner slot shots - total,Inner slot shots - scored,Inner slot shots - missed,Inner slot shots - saved,"Inner slot shots, %",Outer slot shots - total,Outer slot shots - scored,Outer slot shots - missed,Outer slot shots - saved,"Outer slot shots, %",Blocked shots from the slot,Blocked shots outside of the slot,Takeaways,Puck retrievals after shots,Opponent’s dump-in retrievals,Takeaways in DZ,Loose puck recovery,Takeaways in NZ,Takeaways in OZ,EV DZ retrievals,Puck losses,Puck losses in DZ,EV OZ retrievals,Puck losses in NZ,Power play retrievals,Penalty kill retrievals,Puck losses in OZ,Entries,Entries via pass,Entries via dump in,Entries via stickhandling,Breakouts,Breakouts via pass,Breakouts via dump out,Breakouts via stickhandling,Date of birth,Nationality,National team,Height,Weight,Contract,Active hand,Type,Time on ice_minutes,Time on ice_seconds,Penalty time_minutes,Penalty time_seconds,Puck control time_minutes,Puck control time_seconds,Power play time_minutes,Power play time_seconds,Short-handed time_minutes,Short-handed time_seconds,Playing in attack_minutes,Playing in attack_seconds,Playing in defense_minutes,Playing in defense_seconds,OZ possession_minutes,OZ possession_seconds,NZ possession_minutes,NZ possession_seconds,DZ possession_minutes,DZ possession_seconds
6,Alyssa Wilson,F,23,17.0,0.04,0.04,0.04,0.09,0.13,-1.04,0.04,0.04,0.09,0.04,0.50,0.13,2.00,0.74,0.61,0.00,0.0,0.17,0.04,0.04,1.00,0.04,0.00,0.00,0.0,0.00,0.00,57.0,0.22,1.26,0.17,0.04,0.04,0.96,1.61,1.96,0.22,1.26,0.00,0.00,0.00,0.00,0.65,0.37,0.74,0.87,0.0,0.0,0.0,0.00,0.0,0.0,1.17,2.00,0.04,0.31,0.05,0.29,-0.75,0.46,1.22,0.0091,-8.0,18.0,10.0,0.35,7.0,14.0,0.35,10.0,5.2,0.54,7.0,1.61,1.39,0.78,0.26,0.22,0.04,0.83,12.0,10.0,0.83,0.22,6.0,0.26,0.04,0.04,0.17,0.17,0.04,0.04,0.00,0.00,1.0,0.22,0.0,0.04,0.17,0.0,0.00,0.61,7.0,1.87,1.09,5.20,1.48,1.61,0.39,1.39,4.20,1.30,0.48,1.04,0.00,0.00,1.87,1.48,0.52,0.22,0.74,2.30,1.48,0.13,0.70,2006-08-29,[object Object],0.0,0.0,0.0,0.0,Right,Even Strength,15,20,0,26,0,16,0,0,0,0,6,15,7,7,4,56,3,8,7,16
12,Ella Bynan,F,24,16.0,0.00,0.08,0.13,0.21,0.21,-0.58,0.38,0.13,9.00,4.30,0.48,0.00,1.54,1.17,0.21,0.00,0.0,0.25,3.80,1.79,0.47,2.80,1.38,0.49,2.4,1.17,0.48,55.0,0.21,0.79,0.04,4.70,0.04,0.08,1.79,0.50,0.21,0.79,0.00,0.00,0.00,0.00,0.17,0.76,0.54,0.38,0.0,0.0,0.0,0.00,0.0,0.0,0.50,1.54,0.08,1.33,0.12,0.00,-0.60,0.43,1.03,,-7.0,17.0,9.0,0.36,7.0,13.0,0.35,14.0,5.9,0.42,6.0,2.00,5.70,0.92,0.63,0.42,0.21,0.67,9.0,7.0,0.71,0.29,8.0,0.71,0.00,0.08,0.63,0.00,0.38,0.00,0.04,0.33,0.0,0.33,0.0,0.04,0.29,0.0,0.00,0.21,3.4,1.04,0.04,2.50,0.21,0.67,0.29,0.54,5.30,0.54,0.50,1.63,0.00,0.00,3.10,2.00,0.33,0.25,1.46,2.70,1.04,0.04,1.58,0,[object Object],0.0,0.0,0.0,0.0,Right,Even Strength,13,58,0,10,0,14,0,0,0,0,5,49,6,18,4,28,2,49,6,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,Lillian Scardicchio,D,24,13.0,0.00,0.08,0.08,0.17,0.17,-0.21,0.13,0.08,0.04,0.00,0.00,0.00,1.29,0.71,0.25,0.08,0.0,0.25,0.04,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,36.0,0.25,0.46,0.25,0.04,0.04,0.04,0.92,0.75,0.29,0.46,1.42,0.04,0.04,0.04,0.33,0.55,0.46,0.38,0.0,0.0,0.0,0.04,0.0,0.0,0.54,1.21,0.08,1.07,0.08,0.00,-0.23,0.38,0.64,,-2.7,11.0,8.0,0.42,5.3,8.0,0.40,9.0,4.2,0.46,3.5,1.92,3.80,0.33,0.25,0.21,0.04,0.83,7.0,5.0,0.72,0.29,6.0,0.50,0.00,0.17,0.21,0.00,0.13,0.00,0.04,0.08,0.0,0.38,0.0,0.13,0.13,0.0,0.13,0.13,2.2,1.17,0.08,1.21,1.13,0.71,0.25,0.46,3.90,0.63,0.67,1.25,0.04,0.00,2.00,1.83,0.33,0.17,1.33,1.38,0.63,0.04,0.71,2006-08-01,[object Object],0.0,0.0,0.0,0.0,Right,Total,10,45,0,30,0,9,0,47,0,0,4,33,4,40,4,4,2,9,4,32
19,Lucianna Szczubiala,D,24,10.0,0.00,0.04,0.04,0.08,0.08,-0.29,0.00,0.04,0.00,0.00,0.00,0.04,1.13,0.25,0.50,0.21,0.0,0.08,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,24.0,0.21,0.50,0.25,0.00,0.00,0.25,0.29,0.42,0.21,0.50,0.67,0.00,0.67,0.67,0.38,0.22,0.71,0.38,0.0,0.0,0.0,0.00,0.0,0.0,0.58,0.92,0.00,0.22,0.02,0.00,-0.38,0.23,0.62,,-4.8,9.0,4.2,0.32,3.1,7.0,0.32,4.2,2.9,0.69,3.2,0.58,0.42,0.58,0.04,0.04,0.00,1.00,5.8,4.2,0.72,0.08,2.4,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0.00,0.50,3.8,0.83,0.71,3.20,1.25,0.38,0.17,0.63,1.75,0.63,0.00,0.71,0.04,0.17,0.42,0.42,0.21,0.08,0.13,0.79,0.50,0.00,0.29,2006-06-05,[object Object],0.0,7.0,2.6,0.0,Right,Total,8,11,0,30,0,4,0,20,0,16,3,12,3,55,2,25,1,43,4,2


In [15]:
non_numeric_columns = skaters_df.select_dtypes(include='object')

# Display all non-numeric columns as a scrollable HTML table
display(HTML(non_numeric_columns.to_html(index=False, max_rows=10, max_cols=None)))

Player,Position,Date of birth,Nationality,Active hand,Type
Alyssa Wilson,F,2006-08-29,[object Object],Right,Even Strength
Ella Bynan,F,0,[object Object],Right,Even Strength
Reece Gall,F,0,[object Object],Right,Even Strength
Averi Reider,F,0,[object Object],Right,Even Strength
Olivia Sharkoski,F,0,[object Object],Right,Even Strength
...,...,...,...,...,...
Kayla MacKinnon,D,0,[object Object],Right,Total
Kaytlen Kratzer,D,2004-11-30,[object Object],Left,Total
Julia Marano,D,0,[object Object],Left,Total
Lillian Scardicchio,D,2006-08-01,[object Object],Right,Total


In [16]:
skaters_df.drop(columns=['Nationality', 'Date of birth'], inplace=True)

# Display the updated DataFrame
display(HTML(skaters_df.to_html(index=False, max_rows=5, max_cols=None, notebook=True)))

Shirt number,Player,Position,Games played,All shifts,Goals,First assist,Second assist,Assists,Points,+/-,Scoring chances,Penalties drawn,Faceoffs,Faceoffs won,"Faceoffs won, %",Hits,Shots,Shots on goal,Blocked shots,Power play shots,Short-handed shots,Passes to the slot,Faceoffs in DZ,Faceoffs won in DZ,"Faceoffs won in DZ, %",Faceoffs in NZ,Faceoffs won in NZ,"Faceoffs won in NZ, %",Faceoffs in OZ,Faceoffs won in OZ,"Faceoffs won in OZ, %",Puck touches,Plus,Minus,Penalties,Faceoffs lost,Hits against,Error leading to goal,Dump ins,Dump outs,Team goals when on ice,Opponent's goals when on ice,Power play,Successful power play,Short-handed,Penalty killing,Missed shots,% shots on goal,Slapshot,Wrist shot,Shootouts,Shootouts scored,Shootouts missed,1-on-1 shots,1-on-1 goals,"Shots conversion 1 on 1, %",Positional attack shots,Shots 5 v 5,Counter-attack shots,xG per shot,xG (Expected goals),xG per goal,Net xG (xG player on - opp. team's xG),Team xG when on ice,Opponent's xG when on ice,xG conversion,CORSI,CORSI-,CORSI+,"CORSI for, %",Fenwick for,Fenwick against,"Fenwick for, %",Puck battles,Puck battles won,"Puck battles won, %",Puck battles in DZ,Puck battles in NZ,Puck battles in OZ,Shots blocking,Dekes,Dekes successful,Dekes unsuccessful,"Dekes successful, %",Passes,Accurate passes,"Accurate passes, %",Pre-shots passes,Pass receptions,Scoring chances - total,Scoring chances - scored,Scoring chances missed,Scoring chances saved,"Scoring Chances, %",Inner slot shots - total,Inner slot shots - scored,Inner slot shots - missed,Inner slot shots - saved,"Inner slot shots, %",Outer slot shots - total,Outer slot shots - scored,Outer slot shots - missed,Outer slot shots - saved,"Outer slot shots, %",Blocked shots from the slot,Blocked shots outside of the slot,Takeaways,Puck retrievals after shots,Opponent’s dump-in retrievals,Takeaways in DZ,Loose puck recovery,Takeaways in NZ,Takeaways in OZ,EV DZ retrievals,Puck losses,Puck losses in DZ,EV OZ retrievals,Puck losses in NZ,Power play retrievals,Penalty kill retrievals,Puck losses in OZ,Entries,Entries via pass,Entries via dump in,Entries via stickhandling,Breakouts,Breakouts via pass,Breakouts via dump out,Breakouts via stickhandling,National team,Height,Weight,Contract,Active hand,Type,Time on ice_minutes,Time on ice_seconds,Penalty time_minutes,Penalty time_seconds,Puck control time_minutes,Puck control time_seconds,Power play time_minutes,Power play time_seconds,Short-handed time_minutes,Short-handed time_seconds,Playing in attack_minutes,Playing in attack_seconds,Playing in defense_minutes,Playing in defense_seconds,OZ possession_minutes,OZ possession_seconds,NZ possession_minutes,NZ possession_seconds,DZ possession_minutes,DZ possession_seconds
6,Alyssa Wilson,F,23,17.0,0.04,0.04,0.04,0.09,0.13,-1.04,0.04,0.04,0.09,0.04,0.50,0.13,2.00,0.74,0.61,0.00,0.0,0.17,0.04,0.04,1.00,0.04,0.00,0.00,0.0,0.00,0.00,57.0,0.22,1.26,0.17,0.04,0.04,0.96,1.61,1.96,0.22,1.26,0.00,0.00,0.00,0.00,0.65,0.37,0.74,0.87,0.0,0.0,0.0,0.00,0.0,0.0,1.17,2.00,0.04,0.31,0.05,0.29,-0.75,0.46,1.22,0.0091,-8.0,18.0,10.0,0.35,7.0,14.0,0.35,10.0,5.2,0.54,7.0,1.61,1.39,0.78,0.26,0.22,0.04,0.83,12.0,10.0,0.83,0.22,6.0,0.26,0.04,0.04,0.17,0.17,0.04,0.04,0.00,0.00,1.0,0.22,0.0,0.04,0.17,0.0,0.00,0.61,7.0,1.87,1.09,5.20,1.48,1.61,0.39,1.39,4.20,1.30,0.48,1.04,0.00,0.00,1.87,1.48,0.52,0.22,0.74,2.30,1.48,0.13,0.70,0.0,0.0,0.0,0.0,Right,Even Strength,15,20,0,26,0,16,0,0,0,0,6,15,7,7,4,56,3,8,7,16
12,Ella Bynan,F,24,16.0,0.00,0.08,0.13,0.21,0.21,-0.58,0.38,0.13,9.00,4.30,0.48,0.00,1.54,1.17,0.21,0.00,0.0,0.25,3.80,1.79,0.47,2.80,1.38,0.49,2.4,1.17,0.48,55.0,0.21,0.79,0.04,4.70,0.04,0.08,1.79,0.50,0.21,0.79,0.00,0.00,0.00,0.00,0.17,0.76,0.54,0.38,0.0,0.0,0.0,0.00,0.0,0.0,0.50,1.54,0.08,1.33,0.12,0.00,-0.60,0.43,1.03,,-7.0,17.0,9.0,0.36,7.0,13.0,0.35,14.0,5.9,0.42,6.0,2.00,5.70,0.92,0.63,0.42,0.21,0.67,9.0,7.0,0.71,0.29,8.0,0.71,0.00,0.08,0.63,0.00,0.38,0.00,0.04,0.33,0.0,0.33,0.0,0.04,0.29,0.0,0.00,0.21,3.4,1.04,0.04,2.50,0.21,0.67,0.29,0.54,5.30,0.54,0.50,1.63,0.00,0.00,3.10,2.00,0.33,0.25,1.46,2.70,1.04,0.04,1.58,0.0,0.0,0.0,0.0,Right,Even Strength,13,58,0,10,0,14,0,0,0,0,5,49,6,18,4,28,2,49,6,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,Lillian Scardicchio,D,24,13.0,0.00,0.08,0.08,0.17,0.17,-0.21,0.13,0.08,0.04,0.00,0.00,0.00,1.29,0.71,0.25,0.08,0.0,0.25,0.04,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,36.0,0.25,0.46,0.25,0.04,0.04,0.04,0.92,0.75,0.29,0.46,1.42,0.04,0.04,0.04,0.33,0.55,0.46,0.38,0.0,0.0,0.0,0.04,0.0,0.0,0.54,1.21,0.08,1.07,0.08,0.00,-0.23,0.38,0.64,,-2.7,11.0,8.0,0.42,5.3,8.0,0.40,9.0,4.2,0.46,3.5,1.92,3.80,0.33,0.25,0.21,0.04,0.83,7.0,5.0,0.72,0.29,6.0,0.50,0.00,0.17,0.21,0.00,0.13,0.00,0.04,0.08,0.0,0.38,0.0,0.13,0.13,0.0,0.13,0.13,2.2,1.17,0.08,1.21,1.13,0.71,0.25,0.46,3.90,0.63,0.67,1.25,0.04,0.00,2.00,1.83,0.33,0.17,1.33,1.38,0.63,0.04,0.71,0.0,0.0,0.0,0.0,Right,Total,10,45,0,30,0,9,0,47,0,0,4,33,4,40,4,4,2,9,4,32
19,Lucianna Szczubiala,D,24,10.0,0.00,0.04,0.04,0.08,0.08,-0.29,0.00,0.04,0.00,0.00,0.00,0.04,1.13,0.25,0.50,0.21,0.0,0.08,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,24.0,0.21,0.50,0.25,0.00,0.00,0.25,0.29,0.42,0.21,0.50,0.67,0.00,0.67,0.67,0.38,0.22,0.71,0.38,0.0,0.0,0.0,0.00,0.0,0.0,0.58,0.92,0.00,0.22,0.02,0.00,-0.38,0.23,0.62,,-4.8,9.0,4.2,0.32,3.1,7.0,0.32,4.2,2.9,0.69,3.2,0.58,0.42,0.58,0.04,0.04,0.00,1.00,5.8,4.2,0.72,0.08,2.4,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0.00,0.50,3.8,0.83,0.71,3.20,1.25,0.38,0.17,0.63,1.75,0.63,0.00,0.71,0.04,0.17,0.42,0.42,0.21,0.08,0.13,0.79,0.50,0.00,0.29,0.0,7.0,2.6,0.0,Right,Total,8,11,0,30,0,4,0,20,0,16,3,12,3,55,2,25,1,43,4,2


In [17]:
skaters_df.to_csv("SkatersCleaned.csv", index=False)
skaters_df.to_excel("SkatersCleaned.xlsx", index=False)