Evaluating how much value a player gives relative to their salary 

Creating a Fantasy Value Score and Finding the Top 5 Best players each season based on it (Adjusted for Salary)

In [3]:
import pandas as pd
import numpy as np

# Load cleaned CSV
df = pd.read_csv("Final_NBA_Data.csv")

# Step 1: Fantasy score (modern position weights)
def fantasy_score(row):
    if row['G'] <= 55 or pd.isna(row['G']):
        return 0
    pts = row['PTS'] / row['G']
    ast = row['AST'] / row['G']
    reb = row['TRB'] / row['G']
    stl = row['STL'] / row['G']
    blk = row['BLK'] / row['G']
    tov = row['TOV'] / row['G']
    fg_pct = row['FG%'] if not pd.isna(row['FG%']) else 0
    three_pct = row['3P%'] if not pd.isna(row['3P%']) else 0
    pos = row['Pos']
    
    if 'PG' in pos or 'SG' in pos:
        score = 1.6 * pts + 1.2 * ast + 1.0 * reb + 1.5 * stl + 0.7 * blk - 0.5 * tov + 1.2 * fg_pct + 2.5 * three_pct
    elif 'SF' in pos or 'PF' in pos:
        score = 1.4 * pts + 1.1 * ast + 1.3 * reb + 1.3 * stl + 1.2 * blk - 0.5 * tov + 1.2 * fg_pct + 2.0 * three_pct
    elif 'C' in pos:
        score = 1.4 * pts + 1.0 * ast + 1.5 * reb + 1.0 * stl + 1.6 * blk - 0.5 * tov + 1.3 * fg_pct + 1.8 * three_pct
    else:
        score = pts + ast + reb + stl + blk - tov + fg_pct + three_pct
    return score

# Step 2: Apply fantasy score
df['FantasyScore'] = df.apply(fantasy_score, axis=1)

# Step 3: Filter players with valid salaries and games
filtered_df = df[(df['Salary (Millions)'] >= 1) & (df['G'] > 55)]

# Step 4: Create salary brackets
def bracket(sal):
    if sal < 5:
        return "<$5M"
    elif sal < 15:
        return "$5M–$15M"
    elif sal < 30:
        return "$15M–$30M"
    else:
        return ">$30M"
    
filtered_df['SalaryBracket'] = filtered_df['Salary (Millions)'].apply(bracket)

# Step 5: Compute z-score within each bracket and season
filtered_df['BracketZScore'] = filtered_df.groupby(['Season', 'SalaryBracket'])['FantasyScore'].transform(
    lambda x: (x - x.mean()) / x.std(ddof=0)
)

# Step 6: Sort and get top 5 per season
filtered_df = filtered_df.sort_values(['Season', 'BracketZScore'], ascending=[True, False])
top_5_per_year = filtered_df.groupby('Season').head(5)

# Step 7: Print results
for season in sorted(filtered_df['Season'].unique()):
    avg = round(filtered_df[filtered_df['Season'] == season]['BracketZScore'].mean(), 2)
    print(f"\n📈 The 5 best value contracts for {season} (Avg Z-Score: {avg}) are:\n")
    top_5 = top_5_per_year[top_5_per_year['Season'] == season]
    for _, row in top_5.iterrows():
        print(f"• {row['Player']} ({row['Pos']}) – Fantasy Score: {round(row['FantasyScore'], 2)}, Salary: ${round(row['Salary (Millions)'], 2)}M, Bracket: {row['SalaryBracket']} → Z: {round(row['BracketZScore'], 2)}")



📈 The 5 best value contracts for 2021 (Avg Z-Score: 0.0) are:

• Collin Sexton (SG) – Fantasy Score: 2.29, Salary: $4.99M, Bracket: <$5M → Z: 2.71
• Trae Young (PG) – Fantasy Score: 2.26, Salary: $6.57M, Bracket: $5M–$15M → Z: 2.22
• Michael Porter Jr. (SF) – Fantasy Score: 2.17, Salary: $3.55M, Bracket: <$5M → Z: 2.18
• Zion Williamson (PF) – Fantasy Score: 2.17, Salary: $10.25M, Bracket: $5M–$15M → Z: 1.85
• Jayson Tatum (SF) – Fantasy Score: 2.14, Salary: $9.9M, Bracket: $5M–$15M → Z: 1.71

📈 The 5 best value contracts for 2022 (Avg Z-Score: 0.0) are:

• Ja Morant (PG) – Fantasy Score: 2.47, Salary: $9.6M, Bracket: $5M–$15M → Z: 2.47
• Tyler Herro (SG) – Fantasy Score: 2.18, Salary: $4.0M, Bracket: <$5M → Z: 2.23
• Anfernee Simons (SG) – Fantasy Score: 2.15, Salary: $3.94M, Bracket: <$5M → Z: 2.12
• Desmond Bane (SG) – Fantasy Score: 2.14, Salary: $2.03M, Bracket: <$5M → Z: 2.08
• Tyrese Maxey (PG) – Fantasy Score: 2.14, Salary: $2.6M, Bracket: <$5M → Z: 2.08

📈 The 5 best value co

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['SalaryBracket'] = filtered_df['Salary (Millions)'].apply(bracket)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['BracketZScore'] = filtered_df.groupby(['Season', 'SalaryBracket'])['FantasyScore'].transform(


Creating a Fantasy Value Score and Finding the Top 5 WORST players each season based on it (Adjusted for Salary)

In [34]:
import pandas as pd
import numpy as np

# Step 1: Load your cleaned dataset
df = pd.read_csv("Final_NBA_Data.csv")

# Step 2: Modern fantasy score function (you've used this before)
def modern_fantasy_score(row):
    if row['G'] == 0 or pd.isna(row['G']):
        return 0

    pts = row['PTS'] / row['G']
    ast = row['AST'] / row['G']
    reb = row['TRB'] / row['G']
    stl = row['STL'] / row['G']
    blk = row['BLK'] / row['G']
    tov = row['TOV'] / row['G']
    fg_pct = row['FG%'] if not pd.isna(row['FG%']) else 0
    three_pct = row['3P%'] if not pd.isna(row['3P%']) else 0
    pos = row['Pos']

    if 'PG' in pos or 'SG' in pos:
        score = 1.6 * pts + 1.2 * ast + 1.0 * reb + 1.5 * stl + 0.7 * blk - 0.5 * tov + 1.2 * fg_pct + 2.5 * three_pct
    elif 'SF' in pos or 'PF' in pos:
        score = 1.4 * pts + 1.1 * ast + 1.3 * reb + 1.3 * stl + 1.2 * blk - 0.5 * tov + 1.2 * fg_pct + 2.0 * three_pct
    elif 'C' in pos:
        score = 1.4 * pts + 1.0 * ast + 1.5 * reb + 1.0 * stl + 1.6 * blk - 0.5 * tov + 1.3 * fg_pct + 1.8 * three_pct
    else:
        score = pts + ast + reb + stl + blk - tov + fg_pct + three_pct

    return score

# Step 3: Apply Fantasy Score
df['FantasyScore'] = df.apply(modern_fantasy_score, axis=1)

# Step 4: Filter players with Salary >= $1M
filtered_df = df[(df['Salary (Millions)'] >= 1) & (~df['FantasyScore'].isna())]

# Step 5: Adjusted Value formula (balanced score vs salary)
alpha = 0.75  # penalty tuning constant
filtered_df['AdjustedValue'] = filtered_df['FantasyScore'] - alpha * np.log(filtered_df['Salary (Millions)'])

# Step 6: Sort by Season and AdjustedValue ASC (worst at top)
filtered_df = filtered_df.sort_values(['Season', 'AdjustedValue'], ascending=[True, True])

# Step 7: Get bottom 5 per year
bottom_5_per_year = filtered_df.groupby('Season').head(5)

# Step 8: Print results
for season in sorted(filtered_df['Season'].unique()):
    avg_adj = round(filtered_df[filtered_df['Season'] == season]['AdjustedValue'].mean(), 2)
    print(f"\n📉 The 5 worst value contracts for {season} (Avg Adjusted Score: {avg_adj}) are:\n")

    bottom_5 = bottom_5_per_year[bottom_5_per_year['Season'] == season]
    for _, row in bottom_5.iterrows():
        name = row['Player']
        pos = row['Pos']
        score = round(row['FantasyScore'], 2)
        salary = round(row['Salary (Millions)'], 2)
        adjusted = round(row['AdjustedValue'], 2)
        print(f"• {name} ({pos}) – Fantasy Score: {score}, Salary: ${salary}M → Adjusted Value: {adjusted}")



📉 The 5 worst value contracts for 2021 (Avg Adjusted Score: 0.68) are:

• Steven Adams (C) – Fantasy Score: 1.27, Salary: $29.59M → Adjusted Value: -1.27
• Rudy Gobert (C) – Fantasy Score: 1.52, Salary: $27.53M → Adjusted Value: -0.97
• Draymond Green (PF) – Fantasy Score: 1.56, Salary: $22.25M → Adjusted Value: -0.77
• James Johnson (PF) – Fantasy Score: 1.41, Salary: $16.05M → Adjusted Value: -0.67
• DeMar DeRozan (PF) – Fantasy Score: 1.82, Salary: $27.74M → Adjusted Value: -0.67

📉 The 5 worst value contracts for 2022 (Avg Adjusted Score: 0.53) are:

• Steven Adams (C) – Fantasy Score: 1.1, Salary: $17.07M → Adjusted Value: -1.03
• Rudy Gobert (C) – Fantasy Score: 1.66, Salary: $35.34M → Adjusted Value: -1.02
• Bam Adebayo (C) – Fantasy Score: 1.56, Salary: $28.1M → Adjusted Value: -0.94
• Clint Capela (C) – Fantasy Score: 1.3, Salary: $18.6M → Adjusted Value: -0.89
• Tobias Harris (PF) – Fantasy Score: 1.83, Salary: $36.0M → Adjusted Value: -0.86

📉 The 5 worst value contracts fo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['AdjustedValue'] = filtered_df['FantasyScore'] - alpha * np.log(filtered_df['Salary (Millions)'])


## Top 10 Best vs Worst Contracts
This view compares the ten most efficient contracts against the ten costliest ones (based on fantasy value per $1M). The left chart highlights bargain players who deliver elite output relative to salary, while the right chart shows expensive deals that return the least impact for the money. Use the helper below to regenerate or customize the plot for any season.


In [None]:
import matplotlib.pyplot as pltimport seaborn as snsdef plot_best_vs_worst_contracts(season=2025, top_n=10):    data = pd.read_csv("Final_NBA_Data.csv")    data = data[(data['Salary (Millions)'] > 0)].copy()    data['FantasyScore'] = data.apply(fantasy_score, axis=1)    data = data[data['FantasyScore'] > 0]    data['ValuePerMillion'] = data['FantasyScore'] / data['Salary (Millions)']    season_df = data if season is None else data[data['Season'] == season]    best = season_df.sort_values('ValuePerMillion', ascending=False).head(top_n)    worst = season_df.sort_values('ValuePerMillion', ascending=True).head(top_n)    combined = pd.concat([best[['Team']], worst[['Team']]])    palette = sns.color_palette('viridis', n_colors=combined['Team'].nunique())    team_colors = dict(zip(combined['Team'].unique(), palette))    sns.set_theme(style='whitegrid')    fig, axes = plt.subplots(1, 2, figsize=(16, 8))    best_order = best.sort_values('ValuePerMillion', ascending=False)['Player']    sns.barplot(        ax=axes[0],        data=best,        x='ValuePerMillion',        y='Player',        order=best_order,        hue='Team',        palette=team_colors,        dodge=False,    )    axes[0].set_title('Top Best Value Contracts')    axes[0].set_xlabel('Fantasy Value per $1M')    axes[0].set_ylabel('Player')    axes[0].legend(fontsize=8, loc='lower right')    worst_order = worst.sort_values('ValuePerMillion', ascending=True)['Player']    sns.barplot(        ax=axes[1],        data=worst,        x='ValuePerMillion',        y='Player',        order=worst_order,        hue='Team',        palette=team_colors,        dodge=False,    )    axes[1].set_title('Top Worst Value Contracts')    axes[1].set_xlabel('Fantasy Value per $1M')    axes[1].set_ylabel('Player')    axes[1].legend(fontsize=8, loc='lower right')    season_label = season if season is not None else 'All Seasons'    fig.suptitle(f'Top {top_n} Best vs Worst Contracts — {season_label}', fontsize=16)    fig.tight_layout(rect=(0, 0, 1, 0.95))    plt.show()plot_best_vs_worst_contracts(season=2025)