In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LassoCV

In [2]:
data = pd.read_excel("2024_Defense_Stats.xlsx", sheet_name="Averages")

data = data[data['GP'] > 65]

In [3]:
excluded = ["PLAYER", "POS", "TEAM", "AGE", "GP", "W", "L", "MIN", "DEF RTG",
            "DREB", "%DREB", "DREB%"]
defensive_cols = [col for col in data.columns if col not in excluded]

df_def = data[defensive_cols].dropna().copy()

df_def['POS'] = data.loc[df_def.index, 'POS']

In [4]:
for col in defensive_cols:
    df_def[col] = df_def.groupby('POS')[col].transform(lambda x: (x - x.mean()) / x.std())

# Create a composite measure using the sum of the positional z-scores
df_def['Total'] = df_def[defensive_cols].sum(axis=1)

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_def[defensive_cols])

In [6]:
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

In [7]:
y = df_def['Total'].values

lasso = LassoCV(cv=5, random_state=42)
lasso.fit(X_pca, y)

# Retrieve the Lasso coefficients and select non-zero components.
lasso_coef = lasso.coef_
selected_pcs = np.where(lasso_coef != 0)[0]

In [8]:
weighted_score = X_pca[:, selected_pcs] @ lasso_coef[selected_pcs]
min_score, max_score = weighted_score.min(), weighted_score.max()
normalized_score = 100 * (weighted_score - min_score) / (max_score - min_score)

In [9]:
df_result = data.loc[df_def.index].copy()
df_result['Defensive Impact Score'] = normalized_score

In [10]:
top_5_by_pos = df_result.sort_values('Defensive Impact Score', ascending=False) \
    .groupby('POS').head(5)
top_5_by_pos_display = top_5_by_pos[['PLAYER', 'TEAM', 'POS', 'Defensive Impact Score']] \
    .sort_values(by='Defensive Impact Score', ascending=False) \
    .reset_index(drop=True)

top_5_by_pos_display.to_csv("Lasso_PCA_top5_by_pos.csv", index=False)

In [11]:
top_25_overall = df_result.sort_values('Defensive Impact Score', ascending=False).head(25)
top_25_overall_display = top_25_overall[['PLAYER', 'TEAM', 'POS', 'Defensive Impact Score']] \
    .sort_values(by='Defensive Impact Score', ascending=False) \
    .reset_index(drop=True)

top_25_overall_display.to_csv("Lasso_PCA_top25_overall.csv", index=False)