In [10]:
import pandas as pd
import numpy as np

# Load data
u18_df = pd.read_csv('u18_updated.csv')
team_rankings = pd.read_csv('data.csv')
conference_net = pd.read_csv('net_conference_full2.csv')
u18_df = u18_df.iloc[:, 0:]
# Clean data
u18_df['D1 PPG'] = u18_df['D1 PPG'].replace('TBD', np.nan).astype(float)
u18_df.tail(5)

Unnamed: 0,Player,Synergy Role,Height,U18 Season,PTS/G,PPP,D1 Conference,D1 School(s),D1 Season,D1 PPG,Country
121,Lamprini Polymeni,Playmaking Wing,509.0,2021,10.0,0.548,Big Sky,Montana,21-22,0.4,Greece
122,Lova Lagerlid,Slashing Wing,600.0,2023,5.1,0.75,Big 12,Colorado,24-25,0.0,Sweden
123,Polyxeni (Zenia) Palaska,Post-Up Big,602.0,2023,5.0,0.681,American East,Vermont,23-24,0.0,Greece
124,Theoni Tsami,Rim Finishing Big,601.0,2023,5.0,0.56,Big West,UC Davis,24-25,0.0,Greece
125,Ioanna Stefanaki,Playmaking Wing,509.0,2023,1.0,0.28,Big Sky,Idaho St.,23-24,0.0,Greece


In [11]:

# Conference tiers
conference_tiers = {
    'SEC': 1, 'Big Ten': 1, 'ACC': 1, 'Big 12': 1, 'PAC12':1,
    'Big East': 2, 'The American': 2, 'WCC': 2, 'MW': 2, 'Atlantic 10': 2,
    'MVC': 3, 'MAC': 3, 'Ivy': 3, 'Ivy League': 3, 'Sun Belt': 3, 'CUSA': 3, 'SoCon': 3,
    # All others would be tier 4
}

# School strength groups
def get_school_strength(rank):
    if rank <= 25: return 1.0
    elif rank <= 55: return 0.9
    elif rank <= 100: return 0.8
    elif rank <= 140: return 0.7
    elif rank <= 200: return 0.6
    elif rank <= 290: return 0.4
    else: return 0.25


In [12]:

# Create composite score
def calculate_composite(row):
     # First handle non-D1 cases
    if row['D1 Conference'] in ['D2', 'Canada']:
        conf_weight = 0.1
    else:
        conf_tier = conference_tiers.get(row['D1 Conference'], 4)
        conf_weight = 1.0 if conf_tier == 1 else 0.75 if conf_tier == 2 else 0.5 if conf_tier == 3 else 0.25

   # School strength (30%)
    if row['D1 Conference'] in ['D2', 'Canada']:
        school_strength = 0.1  # Fixed low value for non-D1 schools
    else:
        school_rank = team_rankings[team_rankings['School'] == row['D1 School(s)']]['Rank'].values
        if len(school_rank) > 0:
            school_strength = get_school_strength(school_rank[0])
        else:
            # Use conference median as proxy
            school_strength = conf_weight * 0.4  # Slightly discount unknown D1 schools
    
        # For players WITHOUT D1 stats (TBD)
    if pd.isna(row['D1 PPG']):
        # Start with base expectation (0.5 midpoint)
        estimated_performance = 0.3
        # Adjust up for strong schools/conferences, down for weaker ones
        d1_perf = min(estimated_performance * school_strength * conf_weight * 1.5, 1.0)

    # For players WITH D1 stats
    else:
        # Normalize PPG (0-20 scale -> 0-1) with cap at 1.0
        d1_perf = min(row['D1 PPG'] / 18, 1.2)
    
    composite = (0.6 * d1_perf) + (0.1 * school_strength) + (0.3 * conf_weight)
    
    # Optional: Add U18 performance (5-10%)
    if 'U18 PTS/G' in row and not pd.isna(row['U18 PTS/G']):
        u18_normalized = min(row['U18 PTS/G'] / 30, 1.0)  # Assuming 30 is max
        composite = 0.90 * composite + 0.1 * u18_normalized
    
    return round(composite, 3)


In [14]:

# Apply to all players
u18_df['Composite Score'] = u18_df.apply(calculate_composite, axis=1)
u18_df.sort_values(by='Composite Score', ascending=False, inplace=True)
u18_df.head(20)
u18_df.to_csv("u18_composite.csv",index = False)



In [93]:
def calculate_composite_single(row):
    # Conference strength (20%)
    # First handle non-D1 cases
    if row['D1 Conference'] in ['D2', 'Canada']:
        conf_weight = 0.1
    else:
        conf_tier = conference_tiers.get(row['D1 Conference'], 4)
        conf_weight = 1.0 if conf_tier == 1 else 0.75 if conf_tier == 2 else 0.5 if conf_tier == 3 else 0.25

   # School strength (30%)
    if row['D1 Conference'] in ['D2', 'Canada']:
        school_strength = 0.1  # Fixed low value for non-D1 schools
    else:
        school_rank = team_rankings[team_rankings['School'] == row['D1 School(s)']]['Rank'].values
        if len(school_rank) > 0:
            school_strength = get_school_strength(school_rank[0])
        else:
            # Use conference median as proxy
            school_strength = conf_weight * 0.4  # Slightly discount unknown D1 schools
    
    # For players WITHOUT D1 stats (TBD)
    if pd.isna(row['D1 PPG']):
        # Start with base expectation (0.5 midpoint)
        estimated_performance = 0.3
        # Adjust up for strong schools/conferences, down for weaker ones
        d1_perf = min(estimated_performance * school_strength * conf_weight * 1.5, 1.0)

    # For players WITH D1 stats
    else:
        # Normalize PPG (0-20 scale -> 0-1) with cap at 1.0
        d1_perf = min(row['D1 PPG'] / 18, 1.2)
    
    composite = (0.6 * d1_perf) + (0.1 * school_strength) + (0.3 * conf_weight)
    u18_factor = 0
    # Optional: Add U18 performance (5-10%)
    if 'U18 PTS/G' in row and not pd.isna(row['U18 PTS/G']):
        u18_normalized = min(row['U18 PTS/G'] / 30, 1.0)  # Assuming 30 is max
        composite = 0.90 * composite + 0.1 * u18_normalized
        u18_factor = 0.1*u18_normalized
        print("u18_factor : ", u18_factor,'\n')
    
    return d1_perf, school_strength, conf_weight, round(composite, 3)
# Example player

example_player = u18_df.iloc[98]  # Replace with the index of the player you want to analyze
d1_perf, school_strength, conf_weight, composite_score = calculate_composite_single(example_player)
print(f"Player: {example_player['Player']}")
print(f"D1 School(s): {example_player['D1 School(s)']}")
print(f"D1 Performance: {d1_perf}")
print(f"School Strength: {school_strength}")
print(f"Conference Weight: {conf_weight}")
print(f"Composite Score: {composite_score}")
print("--------------------------------------------------")
example_player = u18_df.iloc[95]  # Replace with the index of the player you want to analyze
d1_perf, school_strength, conf_weight,composite_score = calculate_composite_single(example_player)
print(f"Player: {example_player['Player']}")
print(f"D1 School(s): {example_player['D1 School(s)']}")
print(f"D1 Performance: {d1_perf}")
print(f"School Strength: {school_strength}")
print(f"Conference Weight: {conf_weight}")
print(f"Composite Score: {composite_score}")



Player: Sara Deidda
D1 School(s): Wofford
D1 Performance: 0.05555555555555555
School Strength: 0.6
Conference Weight: 0.5
Composite Score: 0.243
--------------------------------------------------
Player: Rose Nelson
D1 School(s): Fordham
D1 Performance: 0.2222222222222222
School Strength: 0.6
Conference Weight: 0.25
Composite Score: 0.268


### Average Team Strength over the past 5 seasons