In [292]:
import os

os.chdir('..')

import numpy as np
import json

import plotly.io as pio

from apps.data import *

pio.templates.default = "plotly_dark"
pio.renderers.default = "browser"

In [293]:
UNDER_OVER = 56.064

In [294]:
import re


def get_salary_cap_events():
    ## Load json file from downloads
    with open('C:/Users/jrnas/Downloads/BBGM_League_1_2220_free_agency(1).json', encoding='latin') as f:
        r_json = json.load(f)

    return pd.DataFrame([(x['season'], x['text']) for x in r_json['events'] if 'An inflation rate of' in x['text']],
                        columns=['season', 'text'])


def extract_values(text):
    # Pattern for inflation rate
    inflation_pattern = r"(\d+(\.\d+)?)%"
    # Pattern for salary cap
    salary_cap_pattern = r"\$(\d+(\.\d+)?[MB]?)"

    # Search for the patterns
    inflation_match = re.search(inflation_pattern, text)
    salary_cap_match = re.search(salary_cap_pattern, text)

    # Extract the matched values
    inflation_rate = float(inflation_match.group(1)) if inflation_match else None
    salary_cap = salary_cap_match.group(1) if salary_cap_match else None

    return inflation_rate, salary_cap


def convert_salary_cap(salary_cap):
    if salary_cap.endswith('M'):
        return float(salary_cap[:-1])
    elif salary_cap.endswith('B'):
        return float(salary_cap[:-1]) * 1000
    else:
        return None


def get_salary_cap():
    inf_df = get_salary_cap_events()
    inf_df['inf_rate'], inf_df['salary_cap'] = zip(*inf_df['text'].apply(extract_values))
    inf_df['salary_cap'] = inf_df['salary_cap'].apply(convert_salary_cap)
    inf_df = inf_df[['season', 'inf_rate', 'salary_cap']]
    ## Add a row for 2023
    inf_df = pd.concat(
        [pd.DataFrame({'season': 2023, 'inf_rate': 0, 'salary_cap': 136}, index=[0]), inf_df]).reset_index(
        drop=True)
    ## Set up dictionary
    return inf_df.set_index('season').to_dict()['salary_cap']

In [295]:
salary_cap = get_salary_cap()

In [296]:
with open('C:/Users/jrnas/Downloads/BBGM_League_1_2220_free_agency.json', encoding='latin') as f:
    r_json = json.load(f)

In [319]:
df = player_json_to_df(r_json, keep=['ratings', 'stats','salaries'])

In [320]:
df['vorp_norm'] = (df['vorp'] / df['min']) * (32 * 82)
df['ewa_norm'] = (df['ewa'] / df['min']) * (32 * 82)

In [321]:
test_df = df[(df['min'] > 10) & (~df['vorp_norm'].isna())].reset_index(drop=True)
test_df['vorp_norm_wt'] = test_df['vorp_norm'] * test_df['min']
test_df['ewa_norm_wt'] = test_df['ewa_norm'] * test_df['min']
agg_df = test_df.groupby('ovr')[['vorp_norm_wt', 'ewa_norm_wt', 'min']].sum().reset_index()
agg_df['vorp_norm'] = agg_df['vorp_norm_wt'] / agg_df['min']
agg_df['ewa_norm'] = agg_df['ewa_norm_wt'] / agg_df['min']

In [322]:
model_df_under = agg_df[agg_df['ovr'] <= UNDER_OVER].reset_index(drop=True)
poly_under = np.polyfit(model_df_under['ovr'], model_df_under['vorp_norm'], 1)
ewa_under = np.polyfit(model_df_under['ovr'], model_df_under['ewa_norm'], 1)

model_df_over = agg_df[agg_df['ovr'] > UNDER_OVER].reset_index(drop=True)
poly_over = np.polyfit(model_df_over['ovr'], model_df_over['vorp_norm'], 1)
ewa_over = np.polyfit(model_df_over['ovr'], model_df_over['ewa_norm'], 1)

df['vorp_under'] = np.polyval(poly_under, df['ovr'])
df['vorp_over'] = np.polyval(poly_over, df['ovr'])
df['vorp_pred'] = np.where(
    df['ovr'] <= UNDER_OVER,
    df['vorp_under'],
    df['vorp_over']
)

df['ewa_under'] = np.polyval(ewa_under, df['ovr'])
df['ewa_over'] = np.polyval(ewa_over, df['ovr'])
df['ewa_pred'] = np.where(
    df['ovr'] <= UNDER_OVER,
    df['ewa_under'],
    df['ewa_over']
)

np.save('models/poly_under.npy', poly_under)
np.save('models/poly_over.npy', poly_over)

FileNotFoundError: [Errno 2] No such file or directory: 'models/poly_under.npy'

In [None]:
df['vorp_norm'] = (df['vorp'] / df['min']) * (32 * 82)
df['ewa_norm'] = (df['ewa'] / df['min']) * (32 * 82)

df['vorp_under'] = np.polyval(poly_under, df['ovr'])
df['vorp_over'] = np.polyval(poly_over, df['ovr'])
df['vorp_pred'] = np.where(
    df['ovr'] <= UNDER_OVER,
    df['vorp_under'],
    df['vorp_over']
)

df['ewa_under'] = np.polyval(ewa_under, df['ovr'])
df['ewa_over'] = np.polyval(ewa_over, df['ovr'])
df['ewa_pred'] = np.where(
    df['ovr'] <= UNDER_OVER,
    df['ewa_under'],
    df['ewa_over']
)

In [323]:
df['cvorp'] = df['vorp'].clip(0, )
df['vorp_pct'] = df['vorp_pred'].clip(0, ) / df[df.season.between(2024, 2220)].groupby('season').vorp.sum().mean()
df['vorp_pct_cap'] = df['vorp_pct'] * 30

In [324]:
def predict_minutes(ovr):
    # =6.6208*LN(A2) - 26.515
    pred_ini = 6.6208 * np.log(ovr) - 26.515
    pred_inverse = 1 / (1 + np.exp(-pred_ini))
    pred_final = 3000 * pred_inverse
    return pred_final

In [325]:
df['min_pred'] = df['ovr'].apply(predict_minutes)
df['vorp_total'] = df['vorp_pred'].clip(0, ) * df['min_pred'] / 82 / 32
df['vorp_total_pct'] = df['vorp_total'] / df[df.season.between(2024, 2220)].groupby('season').vorp.sum().mean()
df['vorp_total_pct_cap'] = df['vorp_total_pct'] * 30

In [326]:
df['player_rank'] = df[df.tid > 0].groupby('season').ovr.transform('rank', ascending=False, method='first').astype(
    'int64[pyarrow]')

In [327]:
player_ranks = [x for x in range(15, 15 * 30, 30)]

In [328]:
team_df = pd.DataFrame(columns=['ovr', 'vorp_pred', 'min_pred'])

In [329]:
df = df.drop_duplicates(['pid','season'], keep='last').reset_index(drop=True)

In [330]:
for i in player_ranks:
    step_df = df[(df.season.between(2024, 2200)) & (df.player_rank == i)][
        ['ovr', 'vorp_pred', 'min_pred']].mean().reset_index().T.tail(1).rename(
        columns={0: 'ovr', 1: 'vorp_pred', 2: 'min_pred'})
    team_df = pd.concat([team_df, step_df], ignore_index=True)

In [331]:
df['v1'] = np.where(df.ovr >= 55, 0.0059 * df['ovr'] ** 2 - 0.2886 * df['ovr'] - 2.4429, 0).clip(0, )
df['v1_cap'] = 30 * df['v1'] / df.groupby('season')['v1'].transform('sum')

df['v2'] = df['vorp_pred'].clip(0, )
df['v2_cap'] = 30 * df['v2'] / df.groupby('season')['v2'].transform('sum')

df['v3'] = df['vorp_total'].clip(0, )
df['v3_cap'] = 30 * df['v3'] / df.groupby('season')['v3'].transform('sum')

df['v3'] = df['vorp_total'].clip(0, )
df['v3_cap'] = 30 * df['v3'] / df.groupby('season')['v3'].transform('sum')
df['v3_cap_new'] = 30 * df['v3'] / df[df.season.between(2024, 2220)].groupby('season').vorp.sum().mean()

In [347]:
df[df.season.between(2050, 2220)].groupby('season').vorp.sum().mean()

308.74735028974266

In [349]:
df[df.season.between(2024, 2220)].groupby('season').v3.sum().mean()

309.00924055899424

In [332]:
df[(df.season.between(2024, 2220)) & (df.lastName == 'Lively II')].groupby('season').vorp.sum().mean()

1.9818266440900922

In [333]:
df[(df.season.between(2024, 2220)) & (df.lastName == 'Lively II')][
    ['firstName', 'lastName', 'tid', 'ovr', 'v1', 'v2', 'v3', 'v1_cap', 'v2_cap', 'v3_cap','v3_cap_new']].head(10)

Unnamed: 0,firstName,lastName,tid,ovr,v1,v2,v3,v1_cap,v2_cap,v3_cap,v3_cap_new
603,Dereck,Lively II,6,58,0.6659,1.395021,0.942704,0.045517,0.065394,0.055196,0.091555
604,Dereck,Lively II,6,63,2.7924,3.584332,2.926745,0.197833,0.176231,0.178995,0.284244
605,Dereck,Lively II,6,67,4.706,5.33578,4.817768,0.334361,0.26697,0.298494,0.467899
606,Dereck,Lively II,6,68,5.2139,5.773642,5.317578,0.385646,0.300565,0.342147,0.516441
607,Dereck,Lively II,6,69,5.7336,6.211505,5.825197,0.442754,0.337388,0.389918,0.56574
608,Dereck,Lively II,6,70,6.2651,6.649367,6.339361,0.467667,0.359501,0.417001,0.615676
609,Dereck,Lively II,6,70,6.2651,6.649367,6.339361,0.472917,0.365797,0.420556,0.615676
610,Dereck,Lively II,6,73,7.9304,7.962953,7.91046,0.612994,0.446887,0.535179,0.76826
611,Dereck,Lively II,6,69,5.7336,6.211505,5.825197,0.46566,0.363299,0.412795,0.56574
612,Dereck,Lively II,6,71,6.8084,7.087229,6.858939,0.546648,0.410204,0.479308,0.666137


In [336]:
df['cap_hit'] = df['salary'] / df['season'].map(salary_cap) / 1000

In [339]:
model_df = df[(df['min'] > 100)][['v3_cap_new','vorp_total']].dropna()
model = np.polyfit(model_df['v3_cap_new'], model_df['vorp_total'], 1)
df['vorp_exp'] = np.polyval(model, df['cap_hit'])

In [317]:
import plotly.express as px
px.scatter(
    df[(df.season.between(2024, 2220)) & (df.tid > 0)],
    x='v3_cap_new',
    y='vorp_total'
)