In [1]:
from dataclasses import dataclass
from dash import html, dash_table, Input, Output, callback, dcc
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd

from app import app

# --------------------------- Column Filters ------------------------------------------------------

dropped_columns = [
   'birthDate',
   'nationality',
   'id',
   'jerseyNumber',
   'code',
   'type', 
   'abbreviation',
   'birthStateProvince',
   'Team_Number',
   'Unnamed: 0',
   'index',
   'alternateCaptain',
   'captain',
   'active',
   'rookie',
   'rosterStatus',
   'birthCity'
]
offense = [
     'fullName',
     'Salary_2021-22',
     'name',
     'assists22',
     'goals22',
     'shots22',
     'faceOffPct22',
     'shotPct22',
     'gameWinningGoals22',
     'overTimeGoals22',
     'points22',
     'plusMinus22',
]
special_teams = [
     'fullName',
     'Salary_2021-22',
     'name',
     'powerPlayGoals22',
     'powerPlayPoints22',
     'powerPlayTimeOnIce22',
     'shortHandedGoals22',
     'shortHandedPoints22',
     'shortHandedTimeOnIce22',
]
enforcer = [
     'fullName',
     'Salary_2021-22',
     'name',
     'hits22',
     'penaltyMinutes22',
]
endurance = [
         'fullName',
         'Salary_2021-22',
         'name',
         'timeOnIce22',
         'games22',
         'shifts22',
         'blocked22',
         'timeOnIcePerGame22',
         'evenTimeOnIcePerGame22',
         'shortHandedTimeOnIcePerGame22',
         'powerPlayTimeOnIcePerGame22',
]

# --------------------------- Base DataFrame -------------------------------------------------

df = pd.read_csv('~/Desktop/NHL-Salary-Predictions/data/cleaned_player_df_dash.csv').drop(dropped_columns, axis=1)
df['shootsCatches'] = df['shootsCatches'].replace('L', 'Left').replace('R', 'Right')
df = df[df['Salary_2021-22'] != 0.0]
df['id'] = df['fullName']
df.set_index('id', inplace=True, drop=False)

active_cell = {'row': 0, 'column': 1, 'column_id': 'Player Name', 'row_id': 0}

# --------------------------- Inital Data Table DataFrame -------------------------------------
active_cell = {'row': 0, 'column': 1, 'column_id': 'Player Name', 'row_id': 0}

basic_player_data = df[['fullName',
                     'Salary_2021-22',
                     'name',
                     'currentAge',
                     'height',
                     'weight',
                     'shootsCatches',
                     'birthCountry']] \
                    .sort_values('Salary_2021-22', ascending=False)
basic_player_data['Salary_Rank'] = basic_player_data['Salary_2021-22'].rank(method='first', ascending=False)
basic_player_data = basic_player_data[['Salary_Rank',
                                 'fullName',
                                 'Salary_2021-22',
                                 'name',
                                 'currentAge',
                                 'height',
                                 'weight',
                                 'shootsCatches',
                                 'birthCountry']]



# Dash Formatting
money = dash_table.FormatTemplate.money(2)

basic_player_columns = [
    dict(id='Salary_Rank',
         name='Salary Rank',
         type='numeric'),
    dict(id='fullName', 
         name='Player Name'),
    dict(id='Salary_2021-22',
         name='Salary 2021-22',
         type='numeric',
         format=money),
    dict(id='name', 
         name='Position'),
    dict(id='currentAge',
         name='Age'),
    dict(id='height',
         name='Height',
         type='any'),
    dict(id='weight',
         name='Weight'),
    dict(id='shootsCatches',
         name='Shoots'),
    dict(id='birthCountry',
         name='Nationality')
]

# --------------------------- Offensive Stat's -------------------------------------------------

offense_data = df[offense].copy()

# Dash Formatting
offensive_columns = [
     dict(id='fullName', name='Player Name'),
     dict(id='Salary_2021-22',
          name='Salary 2021-22',
          type='numeric',
          format=money),
     dict(id='name',
          name='Position'),
     dict(id='assists22',
          name='Total Assists',
          type='numeric'),
     dict(id='goals22',
          name='Total Goals',
          type='numeric'),
     dict(id='shots22',
          name='Total Shots',
          type='numeric'),
     dict(id='faceOffPct22',
          name='Face Off Percentage',
          type='numeric',
          format=dash_table.Format.Format(precision=2,
                                          scheme=dash_table.Format.Scheme.percentage)),
     dict(id='shotPct22',
          name='Shot Percentage',
          type='numeric',
          format=dash_table.Format.Format(precision=2,
                                          scheme=dash_table.Format.Scheme.percentage)),
     dict(id='gameWinningGoals22',
          name='Game Winning Goals',
          type='numeric'),
     dict(id='overTimeGoals22',
          name='Over Time Goals',
          type='numeric'),
     dict(id='points22',
          name='Points',
          type='numeric'),
     dict(id='plusMinus22',
          name='Plus Minus',
          type='numeric')
]

# --------------------------- Special Team Stat's ----------------------------------------------

special_team_data = df[special_teams].copy()
special_team_data['powerPlayTimeOnIce22'] = special_team_data['powerPlayTimeOnIce22'].str.replace(':', '.').astype(float)
special_team_data['shortHandedTimeOnIce22'] = special_team_data['shortHandedTimeOnIce22'].str.replace(':', '.').astype(float)

# Dash Formatting
special_teams_columns = [
     dict(id='fullName',
          name='Player Name'),
     dict(id='Salary_2021-22',
          name='Salary 2021-22',
          type='numeric',
          format=money),
     dict(id='name',
          name='Position'),
     dict(id='powerPlayGoals22',
          name='Power Play Goals', 
          type='numeric'),
     dict(id='powerPlayPoints22',
          name='Power Play Points',
          type='numeric'),
     dict(id='powerPlayTimeOnIce22',
          name='Power Play Time On Ice',
          type='numeric',
          format=dash_table.Format.Format(decimal_delimiter=':').scheme('f').precision(2)),
     dict(id='shortHandedGoals22',
          name='Short Handed Goals',
          type='numeric'),
     dict(id='shortHandedPoints22',
          name='Short Handed Points',
          type='numeric'),
     dict(id='shortHandedTimeOnIce22',
          name='Short Handed Time On Ice', type='numeric',
          format=dash_table.Format.Format(decimal_delimiter=':').scheme('f').precision(2)),
]

# --------------------------- Enforcer Stat's ----------------------------------------------

enforcer_data = df[enforcer].copy()

# Dash Formatting
enforcer_columns = [
     dict(id='fullName', name='Player Name'),
     dict(id='Salary_2021-22',
          name='Salary 2021-22',
          type='numeric',
          format=money),
     dict(id='name',
          name='Position'),
     dict(id='hits22', name='Total Hits', type='numeric'),
     dict(id='penaltyMinutes22', name='Total Penalty Minutes', type='numeric')
]

# --------------------------- Endurance'hits22' Stat's ----------------------------------------------

endurance_data = df[endurance].copy()

endurance_data['timeOnIce22'] = endurance_data['timeOnIce22'] \
                                        .str.replace(':', '.').astype(float)
endurance_data['timeOnIcePerGame22'] = endurance_data['timeOnIcePerGame22'] \
                                        .str.replace(':', '.').astype(float)
endurance_data['evenTimeOnIcePerGame22'] = endurance_data['evenTimeOnIcePerGame22'] \
                                        .str.replace(':', '.').astype(float)
endurance_data['shortHandedTimeOnIcePerGame22'] = endurance_data['shortHandedTimeOnIcePerGame22'] \
                                        .str.replace(':', '.').astype(float)
endurance_data['powerPlayTimeOnIcePerGame22'] = endurance_data['powerPlayTimeOnIcePerGame22'] \
                                        .str.replace(':', '.').astype(float)

In [4]:
dropped_columns = [
   'birthDate',
   'nationality',
   'id',
   'jerseyNumber',
   'code',
   'type', 
   'abbreviation',
   'birthStateProvince',
   'Team_Number',
   'Unnamed: 0',
   'index',
   'alternateCaptain',
   'captain',
   'active',
   'rookie',
   'rosterStatus',
   'birthCity'
]

basic_player = ['Salary_Rank',
                'fullName',
                'Salary_2021-22',
                'name',
                'currentAge',
                'height',
                'weight',
                'shootsCatches',
                'birthCountry'
]
offense = [
     'fullName',
     'Salary_Rank',
     'Salary_2021-22',
     'name',
     'assists22',
     'goals22',
     'shots22',
     'faceOffPct22',
     'shotPct22',
     'gameWinningGoals22',
     'overTimeGoals22',
     'points22',
     'plusMinus22',
]
special_teams = [
     'fullName',
     'Salary_2021-22',
     'Salary_Rank',
     'name',
     'powerPlayGoals22',
     'powerPlayPoints22',
     'powerPlayTimeOnIce22',
     'shortHandedGoals22',
     'shortHandedPoints22',
     'shortHandedTimeOnIce22',
]
enforcer = [
     'fullName',
     'Salary_2021-22',
     'Salary_Rank',
     'name',
     'hits22',
     'penaltyMinutes22',
]
endurance = [
          'fullName',
          'Salary_2021-22',
           'name',
          'timeOnIce22',
          'games22',
          'shifts22',
          'blocked22',
          'timeOnIcePerGame22',
          'evenTimeOnIcePerGame22',
          'shortHandedTimeOnIcePerGame22',                  
          'powerPlayTimeOnIcePerGame22',

]



filter_list = [offense, special_teams, enforcer, endurance]
filter_str = ['offense', 'special_teams', 'enforcer', 'endurance']


df = pd.read_csv('~/Desktop/NHL-Salary-Predictions/data/cleaned_player_df_dash.csv').drop(dropped_columns, axis=1)
df['shootsCatches'] = df['shootsCatches'].replace('L', 'Left').replace('R', 'Right')
df['Salary_Rank'] = df['Salary_2021-22'].rank(method='first', ascending=False)
df = df[df['Salary_2021-22'] != 0.0]
df['timeOnIce22'] = df['timeOnIce22'] \
                                        .str.replace(':', '.').astype(float)
df['timeOnIcePerGame22'] = df['timeOnIcePerGame22'] \
                                        .str.replace(':', '.').astype(float)
df['evenTimeOnIcePerGame22'] = df['evenTimeOnIcePerGame22'] \
                                        .str.replace(':', '.').astype(float)
df['shortHandedTimeOnIcePerGame22'] = df['shortHandedTimeOnIcePerGame22'] \
                                        .str.replace(':', '.').astype(float)
df['powerPlayTimeOnIcePerGame22'] = df['powerPlayTimeOnIcePerGame22'] \
                                        .str.replace(':', '.').astype(float)
df['powerPlayTimeOnIce22'] = df['powerPlayTimeOnIce22'].str.replace(':', '.').astype(float)
df['shortHandedTimeOnIce22'] = df['shortHandedTimeOnIce22'].str.replace(':', '.').astype(float)

df['id'] = df['fullName']
df.set_index('id', inplace=True, drop=False)

# Formatting for % in data table and creating quantiles
for col in df.columns:
     if 'Pct' in col:
          df[col] = df[col]/100

offense_columns_ = []
special_teams_columns_ = []
enforcer_columns_ = []
endurance_columns_ = []

for idx, filter_ in enumerate(filter_list):
     dff = df[filter_].copy()
     for col in dff.columns:
          if dff[col].dtype != 'object' and 'Rank' not in col:
               df[f'{col}_quantile'] = pd.qcut(dff[col].rank(method='first'),5,labels=False).copy()
     for col_str in filter_:
          for col in df.columns:
               if 'quantile' in col and col_str in col:
                    if idx == 0:
                         offense_columns_.append(f'{col_str}_quantile')
                    elif idx == 1:
                         special_teams_columns_.append(f'{col_str}_quantile')
                    elif idx == 2:
                         enforcer_columns_.append(f'{col_str}_quantile')
                    else:
                         endurance_columns_.append(f'{col_str}_quantile')
     for filter_string in filter_str:
          if filter_string == 'offense':
               dff = df[offense_columns_]
          elif filter_string == 'special_teams':
               dff = df[special_teams_columns_]
          elif filter_string == 'enforcer':
               dff = df[enforcer_columns_]
          else:
               dff = df[endurance_columns_]
          df[f"{filter_string}_quantiles_total"] = dff.sum(axis=1)
          df[f"{filter_string}_overall_rank"] = df[f"{filter_string}_quantiles_total"].rank(method='first', ascending=False).astype('int64')
      

In [10]:
test = {'overall_rank_bins': ([  1.        , 147.66666667, 294.33333333, 441.        ])}

NameError: name 'array' is not defined

In [8]:
dff['testing'] = dff.sum(axis=1)

In [9]:
dff

Unnamed: 0_level_0,Salary_2021-22_quantile,assists22_quantile,goals22_quantile,shots22_quantile,faceOffPct22_quantile,shotPct22_quantile,gameWinningGoals22_quantile,overTimeGoals22_quantile,points22_quantile,plusMinus22_quantile,testing
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Blake Lizotte,1,2,2,1,4,3,3,3,2,4,25
Dmitry Orlov,3,3,3,2,0,2,4,4,3,4,28
Jesse Puljujarvi,1,3,3,3,1,2,2,2,3,4,24
Ondrej Palat,3,3,3,3,1,3,4,4,3,4,31
Alexander Kerfoot,2,4,3,2,2,3,1,1,3,4,25
...,...,...,...,...,...,...,...,...,...,...,...
Gustav Lindstrom,0,1,0,0,1,0,0,2,0,0,4
Jake McCabe,2,2,1,1,0,1,0,0,2,0,9
Kevin Hayes,4,2,2,2,2,2,1,0,2,0,17
Nathan Bastian,0,0,2,2,3,3,1,0,1,0,12


In [144]:
df[['rank', 'fullName']].sort_values('rank', ascending=True)

Unnamed: 0_level_0,rank,fullName
id,Unnamed: 1_level_1,Unnamed: 2_level_1
Connor McDavid,1.0,Connor McDavid
Artemi Panarin,2.0,Artemi Panarin
Tyler Seguin,3.0,Tyler Seguin
Mark Stone,4.0,Mark Stone
Roman Josi,5.0,Roman Josi
...,...,...
Jeremy Davies,437.0,Jeremy Davies
Jake Walman,438.0,Jake Walman
Dominik Simon,439.0,Dominik Simon
William Lagesson,440.0,William Lagesson


In [145]:
text = {'points': [{'curveNumber': 0, 'pointNumber': 82, 'pointIndex': 82, 'x': 27, 'y': 5400000, 'text': 'Brady Skjei', 'customdata': [352, 83, 'Age']}]}

In [151]:
df = df.rename(columns={'Salary_Rank': 'salary_rank'})

In [152]:
df['salary_rank']

id
P.K. Subban         23.0
Ryan Murray        223.0
Damon Severson      97.0
Will Butcher       132.0
Miles Wood         158.0
                   ...  
Alex Tuch          130.0
Keegan Kolesar     383.0
Nicolas Roy        384.0
Dylan Coghlan      385.0
Zach Whitecloud    386.0
Name: salary_rank, Length: 441, dtype: float64

In [155]:
df

Unnamed: 0_level_0,fullName,link,name,currentAge,birthCountry,height,weight,shootsCatches,timeOnIce22,assists22,...,penaltyMinutes22_quantile,timeOnIce22_quantile,games22_quantile,shifts22_quantile,blocked22_quantile,timeOnIcePerGame22_quantile,evenTimeOnIcePerGame22_quantile,shortHandedTimeOnIcePerGame22_quantile,powerPlayTimeOnIcePerGame22_quantile,rank
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P.K. Subban,P.K. Subban,/api/v1/people/8474056,Defenseman,32,CAN,"6' 0""",210,Right,1408.48,17.0,...,4,3,3,3,4,3,4,2,2,23.0
Ryan Murray,Ryan Murray,/api/v1/people/8476850,Defenseman,27,CAN,"6' 1""",206,Left,521.14,4.0,...,0,0,0,0,2,1,1,2,0,223.0
Damon Severson,Damon Severson,/api/v1/people/8476923,Defenseman,26,CAN,"6' 2""",205,Right,1888.23,35.0,...,4,4,4,4,4,4,4,4,3,97.0
Will Butcher,Will Butcher,/api/v1/people/8477355,Defenseman,26,USA,"5' 10""",190,Left,609.53,6.0,...,0,0,0,0,2,2,3,0,2,132.0
Miles Wood,Miles Wood,/api/v1/people/8477425,Left Wing,25,USA,"6' 2""",195,Left,44.17,0.0,...,0,0,0,0,0,1,1,1,2,158.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Alex Tuch,Alex Tuch,/api/v1/people/8477949,Right Wing,25,USA,"6' 4""",220,Right,920.25,26.0,...,1,1,0,1,3,3,2,3,4,130.0
Keegan Kolesar,Keegan Kolesar,/api/v1/people/8478434,Right Wing,24,CAN,"6' 2""",217,Right,974.05,17.0,...,4,1,3,1,3,0,0,1,1,383.0
Nicolas Roy,Nicolas Roy,/api/v1/people/8478462,Center,24,CAN,"6' 4""",205,Right,1267.39,24.0,...,4,3,3,3,2,2,1,3,3,384.0
Dylan Coghlan,Dylan Coghlan,/api/v1/people/8479639,Defenseman,23,CAN,"6' 2""",206,Right,837.49,10.0,...,1,1,1,1,3,1,1,1,2,385.0


In [166]:
import numpy as np

values, bins = pd.qcut(df['currentAge'], 3, retbins=True)

In [167]:
qcut_bins = {'salary_rank_quantile': array([  1.,  89., 177., 265., 353., 441.]),
             'salary_2021-22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'assists22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'goals22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'shots22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'faceOffPct22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'shotPct22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'gameWinningGoals22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'overTimeGoals22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'points22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'plusMinus22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'powerPlayGoals22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'powerPlayPoints22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'powerPlayTimeOnIce22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'shortHandedGoals22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'shortHandedPoints22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'shortHandedTimeOnIce22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'hits22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'penaltyMinutes22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'timeOnIce22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'games22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'shifts22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'blocked22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'timeOnIcePerGame22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'evenTimeOnIcePerGame22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'shortHandedTimeOnIcePerGame22_quantile': array([  1.,  89., 177., 265., 353., 441.]), 
             'powerPlayTimeOnIcePerGame22_quantile': array([  1.,  89., 177., 265., 353., 441.])}


array([19., 25., 29., 44.])

In [86]:
custom_data = [data_['goals22']]

hover_template = "<b>Player Name: </b> %{text} <br><br>"
hover_template += "<b>{}: </b> %{x} <br>"
hover_template += "<b>Salary: </b> %{y} <br>"
# hover_template += "<b>Salary Rank: </b> %{customdata[1]} <br>"
hover_template += "<b>Player Rank: </b> %{customdata[0]}"

fig = go.Figure()

fig.add_trace(go.Scatter(
                    x=data_[str(col_name)],
                    y=data_['Salary_2021-22'],
                    mode='markers',
                    text=data_['fullName'],
                    hovertemplate=hover_template,
                    showlegend=False,
                    name='playerName',
                    customdata=custom_data
                    ))
fig.show()

In [89]:
import numpy as np

In [117]:
x = [{'id': 'fullName', 'name': 'Player Name'}, {'id': 'Salary_2021-22', 'name': 'Salary', 'type': 'numeric', 'format': <dash.dash_table.Format.Format object at 0x7f678b4d2af0>}, {'id': 'Salary_Rank', 'name': 'Salary Rank', 'type': 'numeric'}, {'id': 'overall_rank', 'name': 'Player Rank', 'type': 'numeric'}, {'id': 'name', 'name': 'Position'}, {'id': 'assists22', 'name': 'Total Assists', 'type': 'numeric'}, {'id': 'goals22', 'name': 'Total Goals', 'type': 'numeric'}, {'id': 'shots22', 'name': 'Total Shots', 'type': 'numeric'}, {'id': 'faceOffPct22', 'name': 'Face Off Percentage', 'type': 'numeric', 'format': <dash.dash_table.Format.Format object at 0x7f6797ffff70>}, {'id': 'shotPct22', 'name': 'Shot Percentage', 'type': 'numeric', 'format': <dash.dash_table.Format.Format object at 0x7f677dc48af0>}, {'id': 'gameWinningGoals22', 'name': 'Game Winning Goals', 'type': 'numeric'}, {'id': 'overTimeGoals22', 'name': 'Over Time Goals', 'type': 'numeric'}, {'id': 'points22', 'name': 'Points', 'type': 'numeric'}, {'id': 'plusMinus22', 'name': 'Plus Minus', 'type': 'numeric'}]

SyntaxError: invalid syntax (4235701462.py, line 1)

In [113]:
x

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,293,294,295,296,297,298,299,300,301,302
0,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22
1,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22
2,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22
3,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22
4,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
436,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22
437,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22
438,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22
439,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,...,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22,assists22


In [116]:
data_

Unnamed: 0_level_0,fullName,link,name,currentAge,birthCountry,height,weight,shootsCatches,timeOnIce22,assists22,...,powerPlayTimeOnIcePerGame14,Salary_2014-15,Salary_2015-16,Salary_2016-17,Salary_2017-18,Salary_2018-19,Salary_2019-20,Salary_2020-21,Salary_2021-22,id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P.K. Subban,P.K. Subban,/api/v1/people/8474056,Defenseman,32,CAN,"6' 0""",210,Right,1408:48,17.0,...,04:39,7000000.0,7000000.0,11000000.0,11000000.0,10000000.0,10000000.0,8000000.0,8000000.0,P.K. Subban


In [109]:
data_.shape

(441, 303)

In [104]:
len(x)

441

In [105]:
len(data_)

441

In [111]:
np.stack((data_['assists22'], data_['goals22']), axis=-1)

array([[17.,  5.],
       [ 4.,  0.],
       [35., 11.],
       [ 6.,  2.],
       [ 0.,  0.],
       [22.,  6.],
       [13.,  1.],
       [21., 15.],
       [47., 26.],
       [ 6., 12.],
       [11.,  6.],
       [39., 21.],
       [22., 24.],
       [30., 26.],
       [ 8.,  2.],
       [ 9.,  6.],
       [30., 14.],
       [ 4.,  3.],
       [18., 15.],
       [ 6., 10.],
       [22., 37.],
       [15.,  3.],
       [25.,  3.],
       [16.,  5.],
       [ 5.,  2.],
       [44., 15.],
       [22., 12.],
       [13.,  6.],
       [ 2.,  0.],
       [10.,  2.],
       [ 4.,  4.],
       [25., 52.],
       [10., 10.],
       [33., 21.],
       [52., 29.],
       [20., 13.],
       [28., 11.],
       [74., 22.],
       [ 6.,  6.],
       [63., 11.],
       [11.,  4.],
       [ 4.,  3.],
       [ 1.,  0.],
       [14.,  8.],
       [13.,  7.],
       [11.,  7.],
       [12., 19.],
       [44., 21.],
       [12.,  6.],
       [56.,  6.],
       [21., 10.],
       [19., 11.],
       [37.,

In [46]:
# for col in offense_data.columns:
#     if offense_data[col].dtype != 'object':
#         offense_data[f'{col}_quantile'] = pd.qcut(offense_data[col].rank(method='first'),5,labels=False).copy()

# offense_data['sum_quantiles'] = offense_data[['assists22_quantile',
#                                               'goals22_quantile',
#                                               'shots22_quantile',
#                                               'faceOffPct22_quantile', 
#                                               'shotPct22_quantile',
#                                               'gameWinningGoals22_quantile', 
#                                               'overTimeGoals22_quantile',
#                                               'points22_quantile', 
#                                               'plusMinus22_quantile']].sum(axis=1)

# offense_data['overall_rank'] = offense_data['sum_quantiles'].rank(method='first', ascending=False).astype('int64')

In [50]:
for col in offense_data.columns:
    if 'Pct' in col:
        offense_data[col] = offense_data[col]/100
    if offense_data[col].dtype != 'object':
        offense_data[f'{col}_quantile'] = pd.qcut(offense_data[col].rank(method='first', ascending=False),5,labels=False).copy()

columns = []

for col in offense_data.columns:
    if 'quantile' in col:
        columns.append(col)
offense_data['sum_quantiles'] = offense_data[columns].sum(axis=1)

In [51]:
offense_data[offense_data['name'] == 'Defenseman']

Unnamed: 0_level_0,fullName,Salary_2021-22,name,assists22,goals22,shots22,faceOffPct22,shotPct22,gameWinningGoals22,overTimeGoals22,...,assists22_quantile_quantile,goals22_quantile_quantile,shots22_quantile_quantile,faceOffPct22_quantile_quantile,shotPct22_quantile_quantile,gameWinningGoals22_quantile_quantile,overTimeGoals22_quantile_quantile,points22_quantile_quantile,plusMinus22_quantile_quantile,sum_quantiles_quantile
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P.K. Subban,P.K. Subban,8000000.0,Defenseman,17.0,5.0,114.0,0.0,0.00044,1.0,0.0,...,1,0,1,0,0,1,2,1,0,3
Ryan Murray,Ryan Murray,2000000.0,Defenseman,4.0,0.0,26.0,0.0,0.00000,0.0,0.0,...,0,0,0,1,0,0,3,0,1,4
Damon Severson,Damon Severson,5050000.0,Defenseman,35.0,11.0,158.0,0.0,0.00070,0.0,0.0,...,3,1,2,1,1,1,3,2,0,2
Will Butcher,Will Butcher,4100000.0,Defenseman,6.0,2.0,35.0,0.0,0.00057,0.0,0.0,...,0,0,0,1,1,1,3,0,1,4
Ryan Graves,Ryan Graves,3000000.0,Defenseman,22.0,6.0,111.0,0.0,0.00054,0.0,0.0,...,2,1,2,1,1,1,3,2,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Alex Pietrangelo,Alex Pietrangelo,5000000.0,Defenseman,31.0,13.0,227.0,0.0,0.00057,0.0,0.0,...,3,3,4,0,1,0,0,3,3,1
Brayden McNabb,Brayden McNabb,2000000.0,Defenseman,15.0,3.0,85.0,0.0,0.00035,0.0,0.0,...,2,0,1,0,0,0,0,1,3,3
Shea Theodore,Shea Theodore,5200000.0,Defenseman,38.0,14.0,205.0,0.0,0.00068,5.0,3.0,...,4,3,4,0,1,4,4,3,3,1
Dylan Coghlan,Dylan Coghlan,750000.0,Defenseman,10.0,3.0,108.0,0.0,0.00028,1.0,0.0,...,1,0,2,0,0,1,0,0,1,3


In [1]:
columns = [{'id': 'Salary_Rank', 'name': 'Salary Rank', 'type': 'numeric'}, {'id': 'fullName', 'name': 'Player Name'}, {'id': 'Salary_2021-22', 'name': 'Salary', 'type': 'numeric', 'format': <dash.dash_table.Format.Format object at 0x7fa9be2c6be0>}, {'id': 'name', 'name': 'Position'}, {'id': 'currentAge', 'name': 'Age'}, {'id': 'height', 'name': 'Height', 'type': 'any'}, {'id': 'weight', 'name': 'Weight'}, {'id': 'shootsCatches', 'name': 'Shoots'}, {'id': 'birthCountry', 'name': 'Nationality'}]

SyntaxError: invalid syntax (3605217480.py, line 1)

In [8]:
import plotly.express as px

In [9]:
offense_data

Unnamed: 0_level_0,fullName,Salary_2021-22,name,assists22,goals22,shots22,faceOffPct22,shotPct22,gameWinningGoals22,overTimeGoals22,points22,plusMinus22
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
P.K. Subban,P.K. Subban,8000000.0,Defenseman,17.0,5.0,114.0,0.00,4.4,1.0,0.0,22.0,-8.0
Ryan Murray,Ryan Murray,2000000.0,Defenseman,4.0,0.0,26.0,0.00,0.0,0.0,0.0,4.0,-3.0
Damon Severson,Damon Severson,5050000.0,Defenseman,35.0,11.0,158.0,0.00,7.0,0.0,0.0,46.0,-14.0
Will Butcher,Will Butcher,4100000.0,Defenseman,6.0,2.0,35.0,0.00,5.7,0.0,0.0,8.0,-10.0
Miles Wood,Miles Wood,3500000.0,Left Wing,0.0,0.0,2.0,0.00,0.0,0.0,0.0,0.0,-2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Alex Tuch,Alex Tuch,4180000.0,Right Wing,26.0,12.0,139.0,39.13,8.6,0.0,0.0,38.0,-3.0
Keegan Kolesar,Keegan Kolesar,750000.0,Right Wing,17.0,7.0,94.0,40.42,7.4,0.0,0.0,24.0,-6.0
Nicolas Roy,Nicolas Roy,750000.0,Center,24.0,15.0,145.0,48.76,10.3,3.0,0.0,39.0,12.0
Dylan Coghlan,Dylan Coghlan,750000.0,Defenseman,10.0,3.0,108.0,0.00,2.8,1.0,0.0,13.0,-5.0


In [15]:
offense_data.select_dtypes(['float64', 'int64'])

Unnamed: 0_level_0,Salary_2021-22,assists22,goals22,shots22,faceOffPct22,shotPct22,gameWinningGoals22,overTimeGoals22,points22,plusMinus22
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
P.K. Subban,8000000.0,17.0,5.0,114.0,0.00,4.4,1.0,0.0,22.0,-8.0
Ryan Murray,2000000.0,4.0,0.0,26.0,0.00,0.0,0.0,0.0,4.0,-3.0
Damon Severson,5050000.0,35.0,11.0,158.0,0.00,7.0,0.0,0.0,46.0,-14.0
Will Butcher,4100000.0,6.0,2.0,35.0,0.00,5.7,0.0,0.0,8.0,-10.0
Miles Wood,3500000.0,0.0,0.0,2.0,0.00,0.0,0.0,0.0,0.0,-2.0
...,...,...,...,...,...,...,...,...,...,...
Alex Tuch,4180000.0,26.0,12.0,139.0,39.13,8.6,0.0,0.0,38.0,-3.0
Keegan Kolesar,750000.0,17.0,7.0,94.0,40.42,7.4,0.0,0.0,24.0,-6.0
Nicolas Roy,750000.0,24.0,15.0,145.0,48.76,10.3,3.0,0.0,39.0,12.0
Dylan Coghlan,750000.0,10.0,3.0,108.0,0.00,2.8,1.0,0.0,13.0,-5.0


In [14]:
px.scatter(data_frame=offense_data,
        x='goals22', 
        y='Salary_2021-22',
        hover_data=['fullName', 'Salary_2021-22'])

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 441 entries, P.K. Subban to Zach Whitecloud
Columns: 324 entries, fullName to shifts22_quantile
dtypes: float64(222), int64(23), object(79)
memory usage: 1.1+ MB


In [25]:
columns

['overall_rank_quantile']

In [9]:
offense_data.sort_values('Salary_2021-22', ascending=False)

Unnamed: 0_level_0,fullName,Salary_2021-22,name,assists22,goals22,shots22,faceOffPct22,shotPct22,gameWinningGoals22,overTimeGoals22,...,goals22_quantile,shots22_quantile,faceOffPct22_quantile,shotPct22_quantile,gameWinningGoals22_quantile,overTimeGoals22_quantile,points22_quantile,plusMinus22_quantile,sum_quantiles,overall_rank
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Connor McDavid,Connor McDavid,15000000.0,Center,79.0,44.0,314.0,53.72,14.0,9.0,4.0,...,4,4,4,4,4,4,4,4,36,7
Artemi Panarin,Artemi Panarin,13000000.0,Left Wing,74.0,22.0,177.0,50.00,12.4,5.0,1.0,...,4,3,3,3,4,3,4,4,32,25
Tyler Seguin,Tyler Seguin,13000000.0,Center,25.0,24.0,218.0,56.03,11.0,2.0,1.0,...,4,4,4,3,3,4,3,0,28,87
Mark Stone,Mark Stone,12000000.0,Right Wing,21.0,9.0,71.0,44.82,12.7,2.0,0.0,...,2,1,3,3,3,3,2,3,23,144
Roman Josi,Roman Josi,10750000.0,Defenseman,73.0,23.0,281.0,0.00,8.2,2.0,0.0,...,4,4,1,2,3,2,4,3,27,93
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Dominik Simon,Dominik Simon,146250.0,Center,10.0,3.0,88.0,42.85,3.4,1.0,0.0,...,0,1,2,0,2,2,0,2,10,342
Jeremy Davies,Jeremy Davies,146250.0,Defenseman,2.0,0.0,5.0,0.00,0.0,0.0,0.0,...,0,0,1,0,0,2,0,1,4,424
William Lagesson,William Lagesson,146250.0,Defenseman,5.0,0.0,33.0,0.00,0.0,0.0,0.0,...,0,0,1,0,1,2,0,3,7,385
Jake Walman,Jake Walman,146250.0,Defenseman,7.0,3.0,87.0,0.00,3.4,0.0,0.0,...,0,1,1,0,0,2,0,2,6,394


In [11]:
offense_data['Salary_2021-22'].rank(method='first', ascending=False).astype('int64')

id
P.K. Subban         23
Ryan Murray        223
Damon Severson      97
Will Butcher       132
Miles Wood         158
                  ... 
Alex Tuch          130
Keegan Kolesar     383
Nicolas Roy        384
Dylan Coghlan      385
Zach Whitecloud    386
Name: Salary_2021-22, Length: 441, dtype: int64

In [12]:
offense_data

Unnamed: 0_level_0,fullName,Salary_2021-22,name,assists22,goals22,shots22,faceOffPct22,shotPct22,gameWinningGoals22,overTimeGoals22,...,goals22_quantile,shots22_quantile,faceOffPct22_quantile,shotPct22_quantile,gameWinningGoals22_quantile,overTimeGoals22_quantile,points22_quantile,plusMinus22_quantile,sum_quantiles,overall_rank
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P.K. Subban,P.K. Subban,8000000.0,Defenseman,17.0,5.0,114.0,0.00,4.4,1.0,0.0,...,1,2,0,1,1,0,2,1,10,332
Ryan Murray,Ryan Murray,2000000.0,Defenseman,4.0,0.0,26.0,0.00,0.0,0.0,0.0,...,0,0,0,0,0,0,0,1,1,436
Damon Severson,Damon Severson,5050000.0,Defenseman,35.0,11.0,158.0,0.00,7.0,0.0,0.0,...,2,3,0,1,0,0,3,0,13,287
Will Butcher,Will Butcher,4100000.0,Defenseman,6.0,2.0,35.0,0.00,5.7,0.0,0.0,...,0,0,0,1,0,0,0,0,1,437
Miles Wood,Miles Wood,3500000.0,Left Wing,0.0,0.0,2.0,0.00,0.0,0.0,0.0,...,0,0,0,0,0,0,0,2,2,430
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Alex Tuch,Alex Tuch,4180000.0,Right Wing,26.0,12.0,139.0,39.13,8.6,0.0,0.0,...,3,3,2,2,1,3,3,2,22,159
Keegan Kolesar,Keegan Kolesar,750000.0,Right Wing,17.0,7.0,94.0,40.42,7.4,0.0,0.0,...,2,1,2,2,1,3,2,1,16,246
Nicolas Roy,Nicolas Roy,750000.0,Center,24.0,15.0,145.0,48.76,10.3,3.0,0.0,...,3,3,3,3,4,3,3,3,28,88
Dylan Coghlan,Dylan Coghlan,750000.0,Defenseman,10.0,3.0,108.0,0.00,2.8,1.0,0.0,...,1,2,1,0,2,3,1,1,12,322


In [14]:
offense_data[offense_data['name'] == 'Center']

Unnamed: 0_level_0,fullName,Salary_2021-22,name,assists22,goals22,shots22,faceOffPct22,shotPct22,gameWinningGoals22,overTimeGoals22,...,goals22_quantile,shots22_quantile,faceOffPct22_quantile,shotPct22_quantile,gameWinningGoals22_quantile,overTimeGoals22_quantile,points22_quantile,plusMinus22_quantile,sum_quantiles,overall_rank
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Pavel Zacha,Pavel Zacha,3000000.0,Center,21.0,15.0,148.0,50.13,10.1,1.0,1.0,...,3,3,4,2,1,3,3,0,21,160
Nico Hischier,Nico Hischier,7250000.0,Center,39.0,21.0,153.0,52.06,13.7,2.0,1.0,...,3,3,4,4,2,3,4,2,29,67
Yegor Sharangovich,Yegor Sharangovich,1800000.0,Center,22.0,24.0,168.0,40.45,14.3,3.0,0.0,...,4,3,2,4,3,0,3,0,22,145
Jack Hughes,Jack Hughes,925000.0,Center,30.0,26.0,165.0,34.56,15.8,2.0,2.0,...,4,3,2,4,2,4,4,0,26,97
Casey Cizikas,Casey Cizikas,2500000.0,Center,6.0,10.0,91.0,55.56,11.0,3.0,0.0,...,2,1,4,3,3,0,1,1,15,247
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Jonathan Marchessault,Jonathan Marchessault,5000000.0,Center,36.0,30.0,251.0,43.00,12.0,4.0,0.0,...,4,4,2,3,4,3,4,3,31,47
Chandler Stephenson,Chandler Stephenson,3025000.0,Center,43.0,21.0,125.0,52.03,16.8,3.0,0.0,...,4,2,4,4,4,3,4,3,32,33
Mattias Janmark,Mattias Janmark,2000000.0,Center,16.0,9.0,89.0,40.74,10.1,1.0,0.0,...,2,1,2,3,2,3,2,3,20,191
Patrick Brown,Patrick Brown,750000.0,Center,5.0,4.0,51.0,58.45,7.8,0.0,0.0,...,1,0,4,2,1,3,0,0,11,331


In [20]:
for col in offense_data.columns:
    if 'Pct' in col and 'quantile' not in col:
        offense_data[col] = offense_data[col]/100

In [17]:
offense_data

Unnamed: 0_level_0,fullName,Salary_2021-22,name,assists22,goals22,shots22,faceOffPct22,shotPct22,gameWinningGoals22,overTimeGoals22,...,goals22_quantile,shots22_quantile,faceOffPct22_quantile,shotPct22_quantile,gameWinningGoals22_quantile,overTimeGoals22_quantile,points22_quantile,plusMinus22_quantile,sum_quantiles,overall_rank
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P.K. Subban,P.K. Subban,8000000.0,Defenseman,17.0,5.0,114.0,0.00,0.044,1.0,0.0,...,1,2,0,1,1,0,2,1,10,332
Ryan Murray,Ryan Murray,2000000.0,Defenseman,4.0,0.0,26.0,0.00,0.000,0.0,0.0,...,0,0,0,0,0,0,0,1,1,436
Damon Severson,Damon Severson,5050000.0,Defenseman,35.0,11.0,158.0,0.00,0.070,0.0,0.0,...,2,3,0,1,0,0,3,0,13,287
Will Butcher,Will Butcher,4100000.0,Defenseman,6.0,2.0,35.0,0.00,0.057,0.0,0.0,...,0,0,0,1,0,0,0,0,1,437
Miles Wood,Miles Wood,3500000.0,Left Wing,0.0,0.0,2.0,0.00,0.000,0.0,0.0,...,0,0,0,0,0,0,0,2,2,430
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Alex Tuch,Alex Tuch,4180000.0,Right Wing,26.0,12.0,139.0,39.13,0.086,0.0,0.0,...,3,3,2,2,1,3,3,2,22,159
Keegan Kolesar,Keegan Kolesar,750000.0,Right Wing,17.0,7.0,94.0,40.42,0.074,0.0,0.0,...,2,1,2,2,1,3,2,1,16,246
Nicolas Roy,Nicolas Roy,750000.0,Center,24.0,15.0,145.0,48.76,0.103,3.0,0.0,...,3,3,3,3,4,3,3,3,28,88
Dylan Coghlan,Dylan Coghlan,750000.0,Defenseman,10.0,3.0,108.0,0.00,0.028,1.0,0.0,...,1,2,1,0,2,3,1,1,12,322


In [None]:
    df = basic_player_data[basic_player_data['name'] == str(position_dropdown)].copy()


In [4]:
offense_data.columns

Index(['fullName', 'name', 'assists22', 'goals22', 'shots22', 'faceOffPct22',
       'shotPct22', 'gameWinningGoals22', 'overTimeGoals22', 'points22',
       'plusMinus22', 'assists22_quantile', 'goals22_quantile',
       'shots22_quantile', 'faceOffPct22_quantile', 'shotPct22_quantile',
       'gameWinningGoals22_quantile', 'overTimeGoals22_quantile',
       'points22_quantile', 'plusMinus22_quantile'],
      dtype='object')

In [30]:
offense.columns

Index(['fullName', 'timeOnIce22', 'assists22', 'goals22', 'pim22', 'shots22',
       'games22', 'hits22', 'timeOnIce22_quantile', 'assists22_quantile',
       'goals22_quantile', 'pim22_quantile', 'shots22_quantile',
       'games22_quantile', 'hits22_quantile'],
      dtype='object')

In [39]:
offense['sum'] = offense[['timeOnIce22_quantile', 'assists22_quantile',
       'goals22_quantile', 'pim22_quantile', 'shots22_quantile',
       'games22_quantile', 'hits22_quantile']].sum(axis=1)

In [40]:
offense

Unnamed: 0_level_0,fullName,timeOnIce22,assists22,goals22,pim22,shots22,games22,hits22,timeOnIce22_quantile,assists22_quantile,goals22_quantile,pim22_quantile,shots22_quantile,games22_quantile,hits22_quantile,sum
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
P.K. Subban,P.K. Subban,1408.48,17.0,5.0,82.0,114.0,77.0,69.0,3,2,1,4,2,3,2,17
Ryan Murray,Ryan Murray,521.14,4.0,0.0,2.0,26.0,37.0,32.0,0,0,0,0,0,0,0,0
Damon Severson,Damon Severson,1888.23,35.0,11.0,57.0,158.0,80.0,91.0,4,4,2,4,3,4,3,24
Will Butcher,Will Butcher,609.53,6.0,2.0,0.0,35.0,37.0,11.0,0,0,0,0,0,0,0,0
Miles Wood,Miles Wood,44.17,0.0,0.0,4.0,2.0,3.0,5.0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Alex Tuch,Alex Tuch,920.25,26.0,12.0,14.0,139.0,50.0,32.0,1,3,2,0,3,0,0,9
Keegan Kolesar,Keegan Kolesar,974.05,17.0,7.0,68.0,94.0,77.0,246.0,1,2,1,4,1,3,4,16
Nicolas Roy,Nicolas Roy,1267.39,24.0,15.0,51.0,145.0,78.0,85.0,3,3,3,4,3,3,2,21
Dylan Coghlan,Dylan Coghlan,837.49,10.0,3.0,18.0,108.0,59.0,62.0,1,1,0,1,2,1,1,7


In [24]:
offense.columns.str.contains('|'.join(['quantile']), regex=True)

array([False, False, False, False, False, False, False, False,  True,
        True,  True,  True,  True,  True,  True])

In [4]:
offense

Unnamed: 0_level_0,fullName,timeOnIce22,assists22,goals22,pim22,shots22,games22,hits22
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
P.K. Subban,P.K. Subban,1408.48,17.0,5.0,82.0,114.0,77.0,69.0
Ryan Murray,Ryan Murray,521.14,4.0,0.0,2.0,26.0,37.0,32.0
Damon Severson,Damon Severson,1888.23,35.0,11.0,57.0,158.0,80.0,91.0
Will Butcher,Will Butcher,609.53,6.0,2.0,0.0,35.0,37.0,11.0
Miles Wood,Miles Wood,44.17,0.0,0.0,4.0,2.0,3.0,5.0
...,...,...,...,...,...,...,...,...
Alex Tuch,Alex Tuch,920.25,26.0,12.0,14.0,139.0,50.0,32.0
Keegan Kolesar,Keegan Kolesar,974.05,17.0,7.0,68.0,94.0,77.0,246.0
Nicolas Roy,Nicolas Roy,1267.39,24.0,15.0,51.0,145.0,78.0,85.0
Dylan Coghlan,Dylan Coghlan,837.49,10.0,3.0,18.0,108.0,59.0,62.0


In [7]:
offense = [
    'fullName',
     'assists22',
     'goals22',
     'shots22',
     'faceOffPct22',
     'shotPct22',
     'gameWinningGoals22',
     'overTimeGoals22',
     'points22',
     'plusMinus22',
]
special_teams = [
    'fullName',
     'powerPlayGoals22',
     'powerPlayPoints22',
     'powerPlayTimeOnIce22',
     'shortHandedGoals22',
     'shortHandedPoints22',
     'shortHandedTimeOnIce22',
]
enforcer = [
    'fullName',
     'hits22',
     'penaltyMinutes22',
]
endurance = [
    'fullName',
        'timeOnIce22',
         'games22',
         'shifts22',
         'blocked22',
         'timeOnIcePerGame22',
         'evenTimeOnIcePerGame22',
         'shortHandedTimeOnIcePerGame22',
         'powerPlayTimeOnIcePerGame22',
]

In [15]:
offense_data = df[offense].copy()
offense_data['id'] = offense_data['fullName'] 
offense_data.set_index('id', inplace=True, drop=False)

# --------------------------- Special Team Stat's ----------------------------------------------

special_team_data = df[special_teams].copy()
special_team_data['id'] = special_team_data['fullName'] 
special_team_data.set_index('id', inplace=True, drop=False)
special_team_data['powerPlayTimeOnIce22'] = special_team_data['powerPlayTimeOnIce22'].str.replace(':', '.').astype(float)
special_team_data['shortHandedTimeOnIce22'] = special_team_data['shortHandedTimeOnIce22'].str.replace(':', '.').astype(float)

# --------------------------- Enforcer Stat's ----------------------------------------------

enforcer_data = offense_data[enforcer].copy()
enforcer_data['id'] = enforcer_data['fullName'] 
enforcer_data.set_index('id', inplace=True, drop=False)

# --------------------------- Endurance Stat's ----------------------------------------------

endurance_data = df[endurance].copy()
endurance_data['id'] = endurance_data['fullName'] 
endurance_data.set_index('id', inplace=True, drop=False)

endurance_data['timeOnIce22'] = endurance_data['timeOnIce22'].str.replace(':', '.').astype(float)
endurance_data['timeOnIcePerGame22'] = endurance_data['timeOnIcePerGame22'].str.replace(':', '.').astype(float)
endurance_data['evenTimeOnIcePerGame22'] = endurance_data['evenTimeOnIcePerGame22'].str.replace(':', '.').astype(float)
endurance_data['shortHandedTimeOnIcePerGame22'] = endurance_data['shortHandedTimeOnIcePerGame22'].str.replace(':', '.').astype(float)
endurance_data['powerPlayTimeOnIcePerGame22'] = endurance_data['powerPlayTimeOnIcePerGame22'].str.replace(':', '.').astype(float)



In [18]:
enforcer_data

Unnamed: 0_level_0,fullName,pim22,hits22,penaltyMinutes22,id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
P.K. Subban,P.K. Subban,82.0,69.0,82.0,P.K. Subban
Ryan Murray,Ryan Murray,2.0,32.0,2.0,Ryan Murray
Damon Severson,Damon Severson,57.0,91.0,57.0,Damon Severson
Will Butcher,Will Butcher,0.0,11.0,0.0,Will Butcher
Miles Wood,Miles Wood,4.0,5.0,4.0,Miles Wood
...,...,...,...,...,...
Alex Tuch,Alex Tuch,14.0,32.0,14.0,Alex Tuch
Keegan Kolesar,Keegan Kolesar,68.0,246.0,68.0,Keegan Kolesar
Nicolas Roy,Nicolas Roy,51.0,85.0,51.0,Nicolas Roy
Dylan Coghlan,Dylan Coghlan,18.0,62.0,18.0,Dylan Coghlan
