In [None]:
%matplotlib inline
# data analysis and wrangling
import pandas as pd
import numpy as np
import random as rnd

# visualization
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('glicko.csv', encoding = "utf-8")
dob = pd.read_csv('dob.csv', encoding = "utf-8")

In [None]:
dob.id = dob.id.str.replace(r'\W+', '').str.lower()

In [None]:
dob.head()

In [None]:
df = df.merge(dob, how='left', on=['id'])

In [None]:
df = df.dropna().drop_duplicates(['id', 'tour'])

In [None]:
df.head()

In [None]:
#convert day to datetime type
df.start = pd.to_datetime(df.start, format='%d/%m/%Y')
df.end = pd.to_datetime(df.end, format='%d/%m/%Y')
df.dob = pd.to_datetime(df.dob, format='%Y/%m/%d')

#age
df['age'] = (df.start - df.dob) / pd.Timedelta(days=365.25)

#elo gain from each tournament
df['diffr'] = df.endglicko2rating - df.startglicko2rating

#cumulative sum of delo
cumsums = df[['id', 'start', 'diffr']].groupby(['id', 'start']).sum().groupby(level=0).cumsum()
df = df.set_index(['id', 'start'])
df['cumur'] = cumsums
df = df.reset_index()

#droptourwithnoprize
df = df[(df.prizeusd != 'Nan')]
df.prizeusd = pd.to_numeric(df.prizeusd)

#experience
for player in df.id.unique():
    try:
        dfid = pd.DataFrame(df[df['id'] == player].sort_values('start').tour.unique()).reset_index().rename(columns={'index': 'exp', 0: 'tour'})
        dfid['id'] = player
        dfid2 = pd.concat([dfid2, dfid])
    except NameError:
        dfid2 = dfid
        continue

df = pd.merge(df, dfid2, how='left', on=['id', 'tour'])

#init is rating + initial mmr (assume first team = player's calibration)
init = df[df['startglicko2rating'].notnull()].sort_values('start')[['startglicko2rating', 'id']].groupby('id').first().reset_index().rename(columns={'startglicko2rating': 'init'})
df = pd.merge(df, init, how='left', on='id')
df['rating'] = df.cumur + df.init
df.drop_duplicates(subset=['id', 'tour'], inplace=True)
df.dropna(inplace=True)


In [None]:
df[df['tour'] == 'The International 2013']

In [None]:
df['season'] = 'ti3'
df.loc[(df.start > pd.to_datetime('2013-08-02')) & (df.start <= pd.to_datetime('2014-07-08')), 'season'] = 'ti4'
df.loc[(df.start > pd.to_datetime('2014-07-08')) & (df.start <= pd.to_datetime('2015-07-27')), 'season'] = 'ti5'
df.loc[(df.start > pd.to_datetime('2015-07-27')) & (df.start <= pd.to_datetime('2016-08-02')), 'season'] = 'ti6'
df.loc[(df.start > pd.to_datetime('2016-08-02')) & (df.start <= pd.to_datetime('2017-08-02')), 'season'] = 'ti7'

In [None]:
df['tiprize'] = np.log(int(df[df['tour'] == 'The International 2013'].prizeusd.iloc[0]))
df.loc[(df.season == 'ti4'), 'tiprize'] = np.log(int(df[df['tour'] == 'The International 2014'].prizeusd.iloc[0]))
df.loc[(df.season == 'ti5'), 'tiprize'] = np.log(int(df[df['tour'] == 'The International 2015'].prizeusd.iloc[0]))
df.loc[(df.season == 'ti6'), 'tiprize'] = np.log(int(df[df['tour'] == 'The International 2016'].prizeusd.iloc[0]))
df.loc[(df.season == 'ti7'), 'tiprize'] = np.log(int(df[df['tour'] == 'The International 2017'].prizeusd.iloc[0]))

df['wdiffr'] = (df.prizeusd.apply(np.log) / df.tiprize) * df.diffr

#cumulative sum of wdiffr
cumsums = df[['id', 'start', 'wdiffr']].groupby(['id', 'start']).sum().groupby(level=0).cumsum()
df = df.set_index(['id', 'start'])
df['cumuwr'] = cumsums
df = df.reset_index()
df['wrating'] = df.cumuwr + df.init

In [None]:
df.head()

In [None]:
df.head()

In [None]:
def PanelRegression(df):
    from linearmodels import PanelOLS
    

In [None]:
player = 'arteezy'
fig = plt.figure(figsize=(10, 6))
ax = fig.gca()
df[df['id'] == player][['wrating', 'age']].set_index('age').sort_index().plot(ax=ax)
df[df['id'] == player][['wrating', 'tour', 'age', 'team', 'start']].set_index('age').sort_index()