#### Calculation CLUTCH with different weight of features

Original paper: https://www.inpredictable.com/2023/01/the-jerry-west-clutch-player-of-year.html

CLUTCH trecker: https://stats.inpredictable.com/nba/jerryWest.php

Features and available weights for them:

 1. **SH - Field Goal Shooting**. Weight from 0.5 to 2
 2. **DF - Drawn Folus**. Weight from 0.5 to 2
 3. **FT - Free Throw Shooting**. Weight from 0.5 to 2
 4. **TOV - Turnovers**. Weight from 0.5 to 2


 5. **REB - Rebounds**. Weight from 0 to 0.5
 6. **AST - Assists**. Weight from 0 to 0.5
 7. **STL - Steals**. Weight from 0 to 0.5
 8. **BLK - Blocks**. Weight from 0 to 0.5
 
Gap between weights - 0.05

In [1]:
from itertools import product
from collections import Counter

import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv("./data/clutch_perfomance.csv")

In [3]:
data.head(2)

Unnamed: 0,Rnk,Player,Team,Gms,CLUTCH,SH,DF,FT,TO,REB,AST,STL,BLK
0,1,AJ Griffin,ATL,37,1.23,1.48,0.05,0.01,-0.31,0.33,0.31,0.16,0.09
1,2,Trae Young,ATL,38,1.05,0.55,1.13,0.67,-1.29,0.31,3.11,0.12,0.01


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 524 entries, 0 to 523
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Rnk     524 non-null    int64  
 1   Player  524 non-null    object 
 2   Team    524 non-null    object 
 3   Gms     524 non-null    int64  
 4   CLUTCH  524 non-null    float64
 5   SH      524 non-null    float64
 6   DF      524 non-null    float64
 7   FT      524 non-null    float64
 8   TO      524 non-null    float64
 9   REB     524 non-null    float64
 10  AST     524 non-null    float64
 11  STL     524 non-null    float64
 12  BLK     524 non-null    float64
dtypes: float64(9), int64(2), object(2)
memory usage: 53.3+ KB


In [5]:
sh_arr = np.arange(0.5, 2.05, 0.1)
df_arr = np.arange(0.5, 2.05, 0.1)
ft_arr = np.arange(0.5, 2.05, 0.1)
tov_arr = np.arange(0.5, 2.05, 0.1)

reb_arr = np.arange(0, 0.55, 0.1)
ast_arr = np.arange(0, 0.55, 0.1)
stl_arr = np.arange(0, 0.55, 0.1)
blk_arr = np.arange(0, 0.55, 0.1)

In [6]:
values = data.iloc[:, 5:].to_numpy()

In [7]:
it = product(sh_arr, df_arr, ft_arr, tov_arr, reb_arr, ast_arr, stl_arr, blk_arr)

In [8]:
%%time
len_loop = 1000000
dt = pd.DataFrame()
j = np.empty(len_loop,)
g = np.empty((len_loop, 8))

for i, weight in enumerate(it):
    m = np.sum(np.tile(np.array(weight), values.shape[0]).reshape(-1, 8) * values, axis=1)
    j[(i % len_loop)] = np.argmax(m)
    g[(i % len_loop)] = np.array(weight)
    
    if (i + 1) % len_loop == 0:
        d = Counter(j)
        df = pd.DataFrame.from_dict(d, orient="index").reset_index()
        df.columns = ["IDX", "CNT"]
        df1 = pd.DataFrame(np.hstack((j.reshape(-1, 1), g))).groupby(0, as_index=False).mean()
        df1.columns = ["IDX", "SH", "DF", "FT", "TOV", "REB", "AST", "STL", "BLK"]
        dd = df.merge(df1, how="inner", on="IDX")
        dt = pd.concat([dt, dd], axis=0, ignore_index=True)
        j = np.empty(len_loop,)
        g = np.empty((len_loop, 8))

CPU times: user 51min 19s, sys: 7.43 s, total: 51min 26s
Wall time: 51min 34s


In [9]:
wm = lambda x: np.average(x, weights=dt.loc[x.index, "CNT"])

In [10]:
gr_res = dt.groupby(["IDX"], as_index=False).agg(CNT = ("CNT", "sum"),
                                                 SH = ("SH", wm),
                                                 DF = ("DF", wm),
                                                 FT = ("FT", wm),
                                                 TOV = ("TOV", wm),
                                                 REB = ("REB", wm),
                                                 AST = ("AST", wm),
                                                 STL = ("STL", wm),
                                                 BLK = ("BLK", wm))
gr_res["CNT"] = np.clip(gr_res["CNT"].transform(lambda x: x/x.sum()*100), a_min=0.01, a_max=None)

In [11]:
gr_res.iloc[:, 1:] = gr_res.iloc[:, 1:].apply(lambda x: round(x, 2), axis=1)

In [12]:
player = data.iloc[gr_res.loc[:, "IDX"]].loc[:, "Player"].reset_index(drop=True)
team = data.iloc[gr_res.loc[:, "IDX"]].loc[:, "Team"].reset_index(drop=True)

In [13]:
gr_res.insert(0, "Team", team)
gr_res.insert(0, "Player", player)

In [14]:
main_tbl = gr_res.sort_values(by="CNT", ascending=False)
main_tbl

Unnamed: 0,Player,Team,IDX,CNT,SH,DF,FT,TOV,REB,AST,STL,BLK
0,DeMar DeRozan,CHI,68.0,89.23,1.18,1.3,1.29,1.26,0.25,0.25,0.25,0.25
7,De'Aaron Fox,SAC,432.0,9.85,1.79,0.78,0.94,1.12,0.22,0.22,0.25,0.25
3,Tyrese Haliburton,IND,188.0,0.87,1.02,0.59,0.71,1.61,0.19,0.46,0.28,0.25
2,Luka Doncic,DAL,102.0,0.05,1.15,0.66,0.55,0.73,0.4,0.48,0.4,0.26
1,Derrick Jones Jr.,CHI,69.0,0.01,0.51,0.52,0.5,1.98,0.02,0.0,0.17,0.18
4,Zion Williamson,NOP,311.0,0.01,1.13,0.59,0.51,1.93,0.03,0.01,0.31,0.31
5,Trey Murphy III,NOP,312.0,0.01,1.07,0.5,0.53,1.98,0.08,0.0,0.09,0.19
6,Georges Niang,PHI,381.0,0.01,0.69,0.5,0.52,1.98,0.02,0.0,0.1,0.37
8,Walker Kessler,UTA,491.0,0.01,0.62,0.5,0.5,1.98,0.47,0.0,0.07,0.42


In [15]:
main_tbl.to_csv("data/final_table.csv", index=False)