In [11]:
import numpy as np
import pandas as pd
import json

import plotly.io as pio
import sys

sys.path.append("..")

from utils.calcs import ovr_to_vorp
from utils.data import player_json_to_df

pio.templates.default = "plotly_dark"
pio.renderers.default = "browser"

In [12]:
with open('C:/Users/jrnas/Downloads/BBGM_League_2_2024_preseason.json', encoding='latin') as f:
    r_json = json.load(f)

df = player_json_to_df(r_json)

In [6]:
# Define the file paths
local_file_path = '../data/df.csv'  # replace with the path to save your parquet file
s3_file_path = 'df.csv'  # replace with the desired path in the S3 bucket

# Save the DataFrame as a parquet file
df.to_csv(local_file_path)

# Get the bucket object
bucket_name = 'nastase-analytics-bbgm'  # replace with your bucket name
bucket = s3.Bucket(bucket_name)

# Upload the file
bucket.upload_file(local_file_path, s3_file_path)

print(f'Successfully uploaded {local_file_path} to {bucket_name}/{s3_file_path}')

Successfully uploaded ../data/df.csv to nastase-analytics-bbgm/df.csv


In [9]:
## Get object from S3
obj = s3.Object(bucket_name, s3_file_path)
df = pd.read_csv(obj.get()['Body'])

In [3]:
def compute_kde_percentile_fast(x_values, y_values, percentile):
    # Calculate the cumulative sum of the y_values
    cumulative_sum = np.cumsum(y_values)

    # Find the index where the cumulative sum is just greater than or equal to the desired percentile
    index = np.searchsorted(cumulative_sum, percentile)

    # Return the corresponding x_value at this index
    return x_values[index]

In [4]:
df = df[df.season == df[~df.salary.isna()].season.max()].drop_duplicates(['pid', 'season']).reset_index(drop=True)

In [5]:
df['vorp_pred'] = df['ovr'].apply(ovr_to_vorp)

In [6]:
progs = pd.read_parquet('../constants/progression.parquet')
for i in range(1, 10):
    progs[f'y_{i}'] = progs[f'y_{i}'] / (np.sum(progs[f'y_{i}']) / progs.age.nunique())

In [7]:
x_prog = progs[progs.age == 20]['x'].values
y_1 = progs[progs.age == 20]['y_1'].values

In [8]:
test = pd.DataFrame(zip(x_prog, y_1), columns=['x', 'y'])

In [9]:
test['y_cum'] = test.y.cumsum()

In [10]:
test[test.y_cum > 0.1]

Unnamed: 0,x,y,y_cum
493,-1.301301,0.020882,0.105615
494,-1.101101,0.022511,0.128126
495,-0.900901,0.023342,0.151467
496,-0.700701,0.023343,0.174810
497,-0.500501,0.022609,0.197419
...,...,...,...
995,99.199199,0.000000,1.000000
996,99.399399,0.000000,1.000000
997,99.599600,0.000000,1.000000
998,99.799800,0.000000,1.000000


In [102]:
def calc_progs(ovr, age, q=0.9):
    x_prog = progs[progs.age == age]['x'].values
    x_rating = x_prog + ovr
    x_pred = [ovr_to_vorp(x) for x in x_rating]
    x_value = np.array(x_pred).clip(0, )
    x_cap_hit = 30 * x_value / 433.58

    rating_dict = dict()
    rating_uppper_dict = dict()
    rating_lower_dict = dict()
    vorp_added_dict = dict()
    cap_value_dict = dict()
    
    rating_dict[0] = ovr
    rating_uppper_dict[0] = ovr
    rating_lower_dict[0] = ovr
    vorp_added_dict[0] = ovr_to_vorp(ovr)
    cap_value_dict[0] = 30 * vorp_added_dict[0] / 433.58

    for i in range(1, 10):
        y = progs[progs.age == age][f'y_{i}'].values
        rating_dict[i] = np.dot(x_rating, y) / np.sum(y)
        rating_uppper_dict[i] = compute_kde_percentile_fast(x_rating, y, q)
        rating_lower_dict[i] = compute_kde_percentile_fast(x_rating, y, 1 - q)
        vorp_added_dict[i] = np.dot(x_value, y) / np.sum(y)
        cap_value_dict[i] = np.dot(x_cap_hit, y) / np.sum(y)

    return {'rating': rating_dict,
            'rating_upper': rating_uppper_dict,
            'rating_lower': rating_lower_dict,
            'vorp_added': vorp_added_dict,
            'cap_value': cap_value_dict}

In [103]:
df['results'] = df.apply(lambda x: calc_progs(x['ovr'], x['age'], 0.75), axis=1)
df['rating_prog'] = df['results'].apply(lambda x: x['rating'])
df['rating_upper_prog'] = df['results'].apply(lambda x: x['rating_upper'])
df['rating_lower_prog'] = df['results'].apply(lambda x: x['rating_lower'])
df['vorp_added_prog'] = df['results'].apply(lambda x: x['vorp_added'])
df['cap_value_prog'] = df['results'].apply(lambda x: x['cap_value'])

In [104]:
df['team'] = df['tid'].map(dict([(teams['tid'], teams['region']) for teams in r_json['teams']]))

In [105]:
df.sort_values('ovr', ascending=False)[['pid', 'firstName', 'lastName', 'season', 'team', 'age', 'ovr', 'pot']]

Unnamed: 0,pid,firstName,lastName,season,team,age,ovr,pot
54,57,Nikola,Jokic,2024,Denver,29,74,74
113,124,Giannis,Antetokounmpo,2024,Milwaukee,29,73,73
27,29,Shai,Gilgeous-Alexander,2024,Oklahoma City,25,72,75
85,90,Joel,Embiid,2024,Philadelphia,30,72,72
9,10,Luka,Doncic,2024,Dallas,25,71,74
...,...,...,...,...,...,...,...,...
131,142,Bronny,James,2024,,19,29,53
477,555,Pacome,Dadiet,2024,,19,29,57
590,701,Trevor,Keels,2024,,20,29,57
606,720,Kwame,Evans Jr.,2024,,19,29,59


In [116]:
import plotly.graph_objects as go

pid = 10  # replace with the pid you want to filter on

# Filter the dataframe for the specific pid
df_filtered = df[df.pid == pid]

# Retrieve the player's first name and last name
first_name = df_filtered['firstName'].values[0]
last_name = df_filtered['lastName'].values[0]

# Create the title
title = f'{first_name} {last_name}'

# Extract the rating, value, and bounds data
rating_data = df_filtered['rating_prog'].values[0]
rating_upper_data = df_filtered['rating_upper_prog'].values[0]
rating_lower_data = df_filtered['rating_lower_prog'].values[0]
value_data = df_filtered['vorp_added_prog'].values[0]
cap_value_data = df_filtered['cap_value_prog'].values[0]

# Create the plot for ratings
fig = go.Figure()

# Add the plot for upper and lower bounds as lines with the area between them filled
fig.add_trace(
    go.Scatter(
        x=list(rating_upper_data.keys()),
        y=list(rating_upper_data.values()),
        name='Upper Bound',
        mode='lines',
        line=dict(width=0),
        hoverinfo='skip',
        showlegend=False,
    )
)

fig.add_trace(
    go.Scatter(
        x=list(rating_lower_data.keys()),
        y=list(rating_lower_data.values()),
        name='Lower Bound',
        mode='none',
        fill='tonexty',
        hoverinfo='skip',
        fillcolor='rgba(255, 90, 95, 0.2)',
        showlegend=False,
    )
)

# Add the plot for ratings as a line
fig.add_trace(
    go.Scatter(
        x=list(rating_data.keys()),
        y=list(rating_data.values()),
        name='Rating',
        line=dict(
            color='rgb(255, 90, 95)'
        ),
        customdata=np.stack((list(rating_lower_data.values()), list(rating_upper_data.values())), axis=-1),
        hovertemplate=
        '<b>Year</b>: %{x}<br>' +
        '<b>Rating</b>: %{y:.1f}<br>' +  # Round to 1 decimal place
        '<b>Lower Bound</b>: %{customdata[0]:.1f}<br>' +  # Round to 1 decimal place
        '<b>Upper Bound</b>: %{customdata[1]:.1f}<br>',  # Round to 1 decimal place
    )
)

# Add the plot for values as bars
fig.add_trace(
    go.Bar(
        x=[x - 0.2 for x in list(value_data.keys())],
        y=list(value_data.values()),
        name='Value',
        yaxis='y2',
        width=0.35,
        marker=dict(
            color='rgb(252,100,45)'
        ),
        text=[f'{val:.1f}' for val in list(value_data.values())],
        textposition='outside',
        textfont=dict(
            color='rgb(252,100,45)',
            size=14,
        ),
    )
)

# Add the plot for cap values as bars
fig.add_trace(
    go.Bar(
        x=[x + 0.2 for x in list(cap_value_data.keys())],
        y=list(cap_value_data.values()),
        name='Cap Value',
        yaxis='y3',
        width=0.35,
        marker=dict(
            color='rgb(0, 166, 153)'
        ),
        text=[f'{val*100:.1f}%' for val in list(cap_value_data.values())],
        textposition='outside',
        textfont=dict(
            color='rgb(0, 166, 153)',
            size=14,
        ),
    )
)

# Update the plot title
fig.update_layout(
    template='simple_white',
    title=title,
    barmode='group',
    yaxis=dict(
        range=[10, 90],
        showgrid=False,
        showticklabels=True,
    ),
    yaxis2=dict(
        range=[0, 20],
        overlaying='y',
        side='right',
        showgrid=False,
        showticklabels=False,
    ),
    yaxis3=dict(
        range=[0, 1],
        overlaying='y',
        side='left',
        showgrid=False,
        showticklabels=False,
    )
)
fig.show()