In [None]:
"""
The intent behind this notebook was to develop season summary statistics and to explore a longstanding question around SR changes, do some players consistently get rewarded more for wins/punished more for losses than other players.

"""

In [1]:

import pandas as pd
import time
import matplotlib.pyplot as plt
import os
import numpy as np
import statsmodels.api as sm

In [2]:
support_heroes={'Ana','Baptiste','Brigette','Lucio','Mercy','Moira','Zenyatta'}
tank_heroes={'D.Va','Orisa','Reinhart','Roadhog','Sigma','Winston','Wrecking Ball','Zarya'}
damage_heroes={'Ashe',
 'Bastion',
 'Doomfist',
 'Echo',
 'Genji',
 'Hanzo',
 'Junkrat',
 'McGree',
 'Mei',
 'Pharah',
 'Reaper',
 'Soldier: 76',
 'Sombra',
 'Symmetra',
 'Torbjorn',
 'Tracer',
 'Widowmaker'}

In [4]:
log_path='C:\\Users\\Matt\\Documents\\GitHub\\random-projects\\overwatch\\'
pivoted_ow_df=pd.read_csv(log_path+'full_comp_stats_running.csv',index_col=('date','player','hero'))
new_df=pivoted_ow_df.reset_index()
new_df['date']=pd.to_datetime(new_df['date'],format='%Y_%m_%d')

In [7]:
#run this for squad, then start looking at 3 cases- when all role losses or wins in day, or when equal.
cols=['Time Played',
'Games Lost',
'Games Tied',
'Games Won',
'Games Played']
roles=['Damage','Tank','Support']
class OW_player_analysis():
    def __init__(self,df):
        #df is full pivoted table for one player
        self.df=df
        self.all_heroes=df[df['hero']=='All Heroes'].set_index('date').sort_index()#.dropna(axis=1,how='all')
    #def sr_change_tracker(self): #No need to make this a separate method... 
        self.sr_diff=self.all_heroes[['Tank SR','Support SR','Damage SR','Games Played']].diff()
    def specific_hero_diff(self,hero):
        hero_df=self.df[self.df['hero']==hero].set_index('date').sort_index().dropna(axis=1,how='all').drop(['player','hero'],axis=1)
        hero_diff_df=hero_df.diff()
        return hero_df,hero_diff_df
    def check_role_changes(self):
        """
        What are we doing here? 
        What do we want? Support/tank/dps games played/won/lost/tied/time, along with SR changes
        
        Net positive avg change, net negative avg change, net neutral avg change
        
        Looks like a +29 for a net 0 on support for me on 5/15, which is not feasible. But this is not an error
        in my code, that's how it is in the html files.
        Also Net - Tank SR seems too low for 5/15, consider dropping that day
        
        Berg's 4/29 also makes no sense, a -29 despite having a new win. Dropped game? 
        Add sanity checks, if net 0 change>15, net pos<10, net negative>-10
        """
        self.support=self.df.loc[new_df['hero'].isin(support_heroes),cols+['date']].groupby('date').sum().add_suffix(' Support')
        self.support_dff=self.support.sort_index().diff()
        
        self.damage=self.df.loc[new_df['hero'].isin(damage_heroes),cols+['date']].groupby('date').sum().add_suffix(' Damage')
        self.damage_dff=self.damage.sort_index().diff()
        
        self.tank=self.df.loc[new_df['hero'].isin(tank_heroes),cols+['date']].groupby('date').sum().add_suffix(' Tank')
        self.tank_dff=self.tank.sort_index().diff()
        
        
        self.all_roles=self.support.join(self.damage).join(self.tank,rsuffix='tank')
        self.all_roles_dff=self.all_roles.sort_index().diff()
        self.all_roles_dff_sr=self.all_roles_dff.join(self.sr_diff)
        for role in roles:
            self.all_roles_dff_sr[f"Net {role} Games"]=self.all_roles_dff_sr[f"Games Won {role}"]-self.all_roles_dff_sr[f"Games Lost {role}"]
            net_pos_mask=((self.all_roles_dff_sr[f"Net {role} Games"]>0) &(self.all_roles_dff_sr[f"Games Won {role}"]>0) )
            net_neg_mask=((self.all_roles_dff_sr[f"Net {role} Games"]<0) &(self.all_roles_dff_sr[f"Games Lost {role}"]>0) )
            net_zero_mask=((self.all_roles_dff_sr[f"Net {role} Games"]==0) &(self.all_roles_dff_sr[f"Games Won {role}"]>0) )
            self.all_roles_dff_sr.loc[net_pos_mask,f'Net + SR {role}']=self.all_roles_dff_sr.loc[net_pos_mask,f"{role} SR"]/self.all_roles_dff_sr.loc[net_pos_mask,f"Net {role} Games"]
            self.all_roles_dff_sr.loc[net_neg_mask,f'Net - SR {role}']=self.all_roles_dff_sr.loc[net_neg_mask,f"{role} SR"]/self.all_roles_dff_sr.loc[net_neg_mask,f"Net {role} Games"]
            self.all_roles_dff_sr.loc[net_zero_mask,f'Net 0 SR {role}']=self.all_roles_dff_sr.loc[net_zero_mask,f"{role} SR"]/self.all_roles_dff_sr.loc[net_zero_mask,f"Games Won {role}"]
            #Sanity checks. SR change <12, >40 unreasonable? 
            self.all_roles_dff_sr.loc[self.all_roles_dff_sr[f'Net + SR {role}']>40,f'Net + SR {role}']=np.nan
            self.all_roles_dff_sr.loc[self.all_roles_dff_sr[f'Net + SR {role}']<12,f'Net + SR {role}']=np.nan
            self.all_roles_dff_sr.loc[self.all_roles_dff_sr[f'Net - SR {role}']>40,f'Net - SR {role}']=np.nan
            self.all_roles_dff_sr.loc[self.all_roles_dff_sr[f'Net - SR {role}']<12,f'Net - SR {role}']=np.nan
            self.all_roles_dff_sr.loc[self.all_roles_dff_sr[f'Net 0 SR {role}'].abs()>15,f'Net 0 SR {role}']=np.nan

In [63]:
#Make this generic for all seasons, with a list of the seasons start dates


In [None]:
#MArch 5
#may 7
'2020-07-2'

In [17]:
#Season summary. Average SR across the season (weighted by games played), high, low.
new_df_date=new_df.set_index('date')
season_start=pd.Timestamp('2020-07-2',freq='D')
squad=['Rthunder27','BIGoleICEBERG','Seraph341','LaCroixDaddy','YAS RIHANNA']

season_summary_table=pd.DataFrame()
for player in squad:
    sr_df_temp=new_df_date.loc[new_df_date['player']==player,['Tank SR','Support SR', 'Damage SR']].dropna()

    t=OW_player_analysis(new_df.loc[new_df['player']==player])
    t.check_role_changes()

    sr_df=t.all_roles_dff_sr.join(sr_df_temp,rsuffix=' level')

    actual_start=sr_df.loc[(sr_df['Games Won Support']<0)&(sr_df.index>season_start)].index[0]
    try:actual_end=sr_df.loc[(sr_df['Games Won Support']<0)&(sr_df.index>season_start)].index[1]
    #Clean this up once there's a standard list of season boundary dates
    except:actual_end=pd.Timestamp('2020-09-4',freq='D')
    season_df=sr_df.loc[(sr_df.index>actual_start)&(sr_df.index<actual_end)]

    for role in roles:
        
        #NEed to do this over the post placement period
        role_mean_sr=(season_df.loc[~season_df[f'{role} SR level'].isna(),f'{role} SR level']*season_df.loc[~season_df[f'{role} SR level'].isna(),f'Games Played {role}']
                     ).sum()/season_df.loc[~season_df[f'{role} SR level'].isna(),f'Games Played {role}'].sum()
        season_summary_table.loc[player,f'Mean {role} SR']=role_mean_sr


"""
to-do, also include seasons highs/lows. Make visual?
"""

In [18]:
season_summary_table
#pd.set_option('display.max_columns', None)

Unnamed: 0,Mean Damage SR,Mean Tank SR,Mean Support SR
Rthunder27,2354.108108,2466.733333,2770.222826
BIGoleICEBERG,2207.55102,2370.403226,2522.647059
Seraph341,2613.0,2452.546512,2652.887324
LaCroixDaddy,2229.478261,2321.863636,2793.914286
YAS RIHANNA,2217.441667,1962.473684,2144.591837


In [19]:
"""
Do this for all roles/players. Filter out when N is too low (less than 10-15?)
Columns: For each role, + coeff, -coeff,adj R2

Consider an option to do it by season, to see how the coeffs change with time.
"""
squad=['Rthunder27','BIGoleICEBERG','Seraph341','LaCroixDaddy','YAS RIHANNA','star4ker','ULove2SeeIt7915']
roles=['Damage','Tank','Support']
sr_coeffs_df=pd.DataFrame()

for gamer_tag in squad:
    t=OW_player_analysis(new_df.loc[new_df['player']==gamer_tag])
    for role in roles:
        t.check_role_changes()
        role_df=t.all_roles_dff_sr[[f'Games Won {role}',f'Games Lost {role}',f'{role} SR']].dropna()
        role_df=role_df.loc[~(role_df==0).all(axis=1)]
        role_df_Y=role_df[[f'{role} SR']]
        role_df_X=role_df.drop(columns=[f'{role} SR'])
        if role_df_X.shape[0]<15:continue
        model = sm.OLS(role_df_Y, role_df_X)
        results = model.fit()

        sr_coeffs_df.loc[gamer_tag,f'{role} +']=results.params[0]
        sr_coeffs_df.loc[gamer_tag,f'{role} -']=results.params[1]
        sr_coeffs_df.loc[gamer_tag,f'{role} R^2']=results.rsquared_adj

In [20]:
sr_coeffs_df

Unnamed: 0,Damage +,Damage -,Damage R^2,Tank +,Tank -,Tank R^2,Support +,Support -,Support R^2
Rthunder27,19.736804,-17.932441,0.812065,22.793804,-22.785163,0.96858,26.175458,-23.070927,0.945256
BIGoleICEBERG,15.84194,-16.98396,0.710217,15.955133,-16.46479,0.672798,17.76659,-20.315497,0.762886
Seraph341,15.072892,-15.093677,0.667421,21.946443,-20.718857,0.883382,19.820977,-18.350117,0.798649
LaCroixDaddy,17.102436,-15.683164,0.730431,17.924136,-16.374609,0.704946,19.162347,-17.971943,0.752407
YAS RIHANNA,16.20268,-17.102123,0.721809,18.714935,-18.100385,0.764126,16.5247,-16.269642,0.713516
ULove2SeeIt7915,,,,13.55725,-10.605466,0.399933,27.998013,-24.01072,0.880678


In [23]:
results.summary()

0,1,2,3
Dep. Variable:,Support SR,R-squared (uncentered):,0.888
Model:,OLS,Adj. R-squared (uncentered):,0.881
Method:,Least Squares,F-statistic:,119.1
Date:,"Mon, 03 May 2021",Prob (F-statistic):,5.38e-15
Time:,17:35:13,Log-Likelihood:,-133.02
No. Observations:,32,AIC:,270.0
Df Residuals:,30,BIC:,273.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Games Won Support,27.9980,2.368,11.822,0.000,23.161,32.835
Games Lost Support,-24.0107,1.583,-15.172,0.000,-27.243,-20.779

0,1,2,3
Omnibus:,19.448,Durbin-Watson:,1.237
Prob(Omnibus):,0.0,Jarque-Bera (JB):,27.373
Skew:,-1.543,Prob(JB):,1.14e-06
Kurtosis:,6.317,Cond. No.,4.16
