A weighted average of a players stats over the past three seasons can be used to predict next season's perfomance, whereby more recent seasons are weighted more heavily.

In [1]:
import os
import pandas as pd
from scipy.stats import zscore 
from constants import DATA_DIR
from tqdm import tqdm_notebook as tqdm

In [2]:
def add_player_value(df):
    #calculate zscore - blocks not included!!
    #consider punting more than one
    columns = ['G','FG%','FT%','3P','PTS','TRB','AST','STL','TOV']
    numeric_stats = df[columns]
    z_scores = pd.DataFrame(zscore(numeric_stats, axis=0, nan_policy='omit'),columns=columns)
    #turnovers need to be inverted maybe nerfed?
    z_scores['TOV'] = z_scores['TOV'] * -1
    value = pd.DataFrame(columns = ['VALUE'])
    value['VALUE'] = z_scores.sum(axis=1)
    result = pd.concat([df, value], axis=1)
    result.to_csv('asdf.csv')
    return result

In [3]:
def weigh_rows(row, weighting, old_row):
    if weighting == 'linear':
        weights = [i for i in range(1,4)]
    elif weighting == 'quad':
        weights = [i**2 for i in range(1,4)]
    elif weighting == 'sqrt':
        weights = [i**(1/2) for i in range(1,4)]
    elif weighting == 'base':
        weights = [1, 1, 1]
    columns = ['G','FG%','FT%','3P','PTS','TRB','AST','STL','TOV']
    new_row = pd.DataFrame(columns=columns)
    for column in columns:
        values = row[column].tolist()
        n = len(weights)
        weighted_sum = [weights[i] * values[i] for i in range(n)]
        weighted_mean = sum(weighted_sum) / sum(weights)
        old_row[column]=[weighted_mean]
    return old_row

def weigh_data(weighting, season, merged):
#for year (2018, 2019, 2020)
    #merge years e.g. merge(2015,2016,2017) -> 2018
    curr = merged.loc[merged['SEASON'] == season]
    weighted = pd.DataFrame()
    for i in tqdm(range(curr.shape[0])):
        #filter to just that player
        #ensure that there are three years worth of data
        #get dataframe of past years three years
        #define weightings
        date = curr.loc[i,'SEASON']
        name = curr.loc[i,'PLAYER']
        df_name = merged.loc[merged['PLAYER']==name].reset_index(drop=True)
        
        new = df_name.loc[df_name['SEASON'] < season]
        if len(new.index) >=3:
            # get bottome three
            row = new.iloc[-3:]
            old_row = df_name.loc[df_name['SEASON'] == season]
            new_row = weigh_rows(row, weighting, old_row)
            # add value to row
            weighted = weighted.append(new_row)
            #if name == 'James Harden':
                #print(new)
    return weighted

In [6]:
years = ['2013-14','2014-15','2015-16','2016-17','2017-18','2018-19','2019-20','2020-21']
merged = pd.DataFrame()
for year in years:
    df = pd.read_csv('~/Desktop/NBA-Fantasy-Draft/data/PlayerStats/'+year+'.csv')
    df = add_player_value(df)
    df.to_csv('~/Desktop/NBA-Fantasy-Draft/data/Value/'+year+'.csv')
    merged = merged.append(df)

In [7]:
weightings = ['base', 'linear', 'quad', 'sqrt']
seasons = ['2013-14','2014-15','2015-16','2016-17','2017-18','2018-19','2019-20','2020-21']

for weighting in weightings:
    for season in seasons:
        df = weigh_data(weighting, season, merged)
        merged.to_csv('merged.csv')
        df.to_csv(os.path.join(DATA_DIR,'Weighted',weighting,season+'.csv'), index=False)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(range(curr.shape[0])):


HBox(children=(FloatProgress(value=0.0, max=478.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=491.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=475.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=482.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_row[column]=[weighted_mean]





HBox(children=(FloatProgress(value=0.0, max=535.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=526.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=526.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=478.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=491.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=475.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=482.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=535.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=526.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=526.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=478.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=491.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=475.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=482.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=535.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=526.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=526.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=478.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=491.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=475.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=482.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=535.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=526.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=526.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=536.0), HTML(value='')))


