# User-player regression
Regressions targeting sentiment with covariates from both users and players. Single rows are determined by user-player-year triplet key

### Imports / load

In [1]:
import pandas as pd
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
nba_df = pd.read_csv('c:/Users/map22/Google Drive/sentiment_nba/nba_user_player_sentiment.tsv', sep='\t')
nba_df = nba_df.dropna(subset=['Race', 'PPG']) # get some name matches for years players weren't playing / coaches
nba_df['rookie'] = nba_df['experience']<=1
nba_df = nba_df.replace({'M':'B', 'L':'W'}).query('Race == "W" or Race == "B"')

  interactivity=interactivity, compiler=compiler, result=result)


#### Get salary residuals for NBA

In [5]:
nba_cov_df = pd.read_csv('modeling_data/nba_player_model_data.tsv', sep='\t')
nba_cov_df['rookie_contract'] = nba_cov_df['experience'] <=4

In [6]:
salary_model = smf.wls( formula = 'standard_salary ~ rookie + MP + PPG + ThreePP + DWS + TRBP * height_dummies + AST + BLKP * height_dummies + STLP + TOVP',
                data = nba_cov_df, weights = nba_cov_df['G']).fit()

In [7]:
nba_cov_df['pred_std_salary'] = salary_model.predict(nba_cov_df)
nba_cov_df['std_salary_resid'] = nba_cov_df['pred_std_salary']- nba_cov_df['standard_salary']
nba_cov_df.loc[ nba_cov_df['G'] <20, 'std_salary_resid'] = 0

In [8]:
nba_df = nba_df.merge(nba_cov_df[['Player','year', 'std_salary_resid']],
                      on=['Player', 'year'], how = 'left')

In [9]:
for col in ['FTr','TOVP', 'standard_salary', 'std_salary_resid', 'clinton_vote_lead', 'white_black_diff', 'total_population' ]:
    nba_df[col] = nba_df[col].fillna(nba_df[col].mean())

In [10]:
nba_df['demean_PPG'] = nba_df['PPG'] - nba_df['PPG'].mean()
nba_df['demean_clinton'] = nba_df['clinton_vote_lead'] - nba_df['clinton_vote_lead'].mean()
nba_df['demean_race_diff'] = nba_df['white_black_diff'] - nba_df['white_black_diff'].mean()

#### NFL

In [9]:
nfl_df = pd.read_csv('c:/Users/map22/Google Drive/sentiment_nba/nfl_user_player_sentiment.tsv', sep='\t')
nfl_df = nfl_df.dropna(subset=['race']) # get some name matches for years players weren't playing / coaches
nfl_df['rookie'] = nfl_df['experience'] <=1
nfl_df['race'] = nfl_df['race'].replace({'L':'B', 'S':'B', 'M':'B'})

In [10]:
nfl_cov_df = pd.read_csv('modeling_data/nfl_model_data.tsv', sep='\t')
nfl_cov_df['rookie_contract'] = nfl_cov_df['experience'] <=4

In [11]:
salary_model = smf.wls( formula = 'standard_salary ~ rookie_contract + position + z_DVOA',
                data = nfl_cov_df, weights = 1).fit()

In [12]:
nfl_cov_df['pred_std_salary'] = salary_model.predict(nfl_cov_df)
nfl_cov_df['std_salary_resid'] = nfl_cov_df['pred_std_salary']- nfl_cov_df['standard_salary']

In [13]:
nfl_df = nfl_df.merge(nfl_cov_df[['Player','year', 'std_salary_resid']],
                      on=['Player', 'year'], how = 'left')

In [14]:
for col in [ 'std_salary_resid', 'clinton_vote_lead', 'white_black_diff','total_population' ]:
    nfl_df[col] = nfl_df[col].fillna(nfl_df[col].mean())

In [15]:
nfl_df['demean_DVOA'] = nfl_df['z_DVOA'] - nfl_df['z_DVOA'].mean()
nfl_df['demean_clinton'] = nfl_df['clinton_vote_lead'] - nfl_df['clinton_vote_lead'].mean()
nfl_df['demean_race_diff'] = nfl_df['white_black_diff'] - nfl_df['white_black_diff'].mean()

## NBA
#### Points + MP only model - unclustered

In [11]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG', \
#                       ' PPG +  + total_population+  * white_black_diff + C(Race) * clinton_vote_lead',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit()

In [12]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,104.6
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,3.7799999999999995e-46
Time:,13:44:57,Log-Likelihood:,-269430.0
No. Observations:,848905,AIC:,538900.0
Df Residuals:,848902,BIC:,538900.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0699,0.001,73.825,0.000,0.068,0.072
MP,-1.036e-06,6.52e-07,-1.589,0.112,-2.31e-06,2.42e-07
PPG,0.0007,6.46e-05,11.085,0.000,0.001,0.001

0,1,2,3
Omnibus:,1034.625,Durbin-Watson:,1.98
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1158.714
Skew:,-0.044,Prob(JB):,2.4500000000000003e-252
Kurtosis:,3.158,Cond. No.,5480.0


#### Points + MP only model - clustered

In [39]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG', \
#                       ' PPG +  + total_population+  * white_black_diff + C(Race) * clinton_vote_lead',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [40]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,4.224
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,0.015
Time:,13:54:25,Log-Likelihood:,-269430.0
No. Observations:,848905,AIC:,538900.0
Df Residuals:,848902,BIC:,538900.0
Df Model:,2,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0699,0.003,21.002,0.000,0.063,0.076
MP,-1.036e-06,2.93e-06,-0.354,0.723,-6.77e-06,4.7e-06
PPG,0.0007,0.000,1.985,0.047,8.87e-06,0.001

0,1,2,3
Omnibus:,1034.625,Durbin-Watson:,1.98
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1158.714
Skew:,-0.044,Prob(JB):,2.4500000000000003e-252
Kurtosis:,3.158,Cond. No.,5480.0


#### Full performance

In [21]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP', \
#                       ' PPG +  + total_population+  * white_black_diff + C(Race) * clinton_vote_lead',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [22]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,2.866
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,0.00161
Time:,13:46:54,Log-Likelihood:,-269310.0
No. Observations:,848905,AIC:,538600.0
Df Residuals:,848894,BIC:,538800.0
Df Model:,10,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0732,0.009,8.554,0.000,0.056,0.090
MP,1.812e-07,3.26e-06,0.056,0.956,-6.21e-06,6.57e-06
PPG,0.0009,0.000,2.314,0.021,0.000,0.002
ThreePP,0.0059,0.015,0.385,0.701,-0.024,0.036
DWS,-0.0013,0.002,-0.516,0.606,-0.006,0.004
TRBP,-0.0004,0.000,-0.704,0.482,-0.001,0.001
FTr,-0.0169,0.009,-1.787,0.074,-0.035,0.002
ASTP,-0.0002,0.000,-1.412,0.158,-0.001,8.46e-05
BLKP,0.0021,0.001,1.502,0.133,-0.001,0.005

0,1,2,3
Omnibus:,1027.798,Durbin-Watson:,1.981
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1150.734
Skew:,-0.044,Prob(JB):,1.3199999999999998e-250
Kurtosis:,3.158,Cond. No.,21500.0


#### Full performance + height / age

In [13]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [14]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,8.965
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,1.48e-19
Time:,13:46:01,Log-Likelihood:,-268900.0
No. Observations:,848905,AIC:,537800.0
Df Residuals:,848889,BIC:,538000.0
Df Model:,15,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0592,0.007,8.034,0.000,0.045,0.074
C(height_dummies)[T.short],0.0069,0.005,1.404,0.160,-0.003,0.016
C(height_dummies)[T.tall],-0.0001,0.004,-0.032,0.974,-0.008,0.008
rookie[T.True],0.0199,0.004,5.409,0.000,0.013,0.027
MP,-2.629e-06,3e-06,-0.876,0.381,-8.51e-06,3.25e-06
PPG,0.0014,0.000,3.492,0.000,0.001,0.002
ThreePP,0.0102,0.013,0.791,0.429,-0.015,0.035
DWS,0.0008,0.002,0.383,0.702,-0.003,0.005
TRBP,-0.0004,0.000,-0.821,0.412,-0.001,0.000

0,1,2,3
Omnibus:,1019.358,Durbin-Watson:,1.982
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1143.163
Skew:,-0.043,Prob(JB):,5.829999999999999e-249
Kurtosis:,3.158,Cond. No.,21900.0


#### Full performance + height / age + salary

In [25]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid',
                data = nba_df,
               weights = np.sqrt(nba_df['comment_count']),# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [26]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,7.786
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,2.64e-18
Time:,13:48:57,Log-Likelihood:,-245340.0
No. Observations:,848905,AIC:,490700.0
Df Residuals:,848887,BIC:,490900.0
Df Model:,17,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0543,0.014,3.844,0.000,0.027,0.082
C(height_dummies)[T.short],0.0075,0.005,1.525,0.127,-0.002,0.017
C(height_dummies)[T.tall],-0.0007,0.004,-0.169,0.866,-0.009,0.008
rookie[T.True],0.0207,0.004,4.736,0.000,0.012,0.029
MP,-3.616e-06,3.11e-06,-1.161,0.246,-9.72e-06,2.49e-06
PPG,0.0013,0.001,2.552,0.011,0.000,0.002
ThreePP,0.0109,0.013,0.810,0.418,-0.015,0.037
DWS,0.0013,0.002,0.621,0.535,-0.003,0.005
TRBP,-0.0004,0.000,-0.826,0.409,-0.001,0.001

0,1,2,3
Omnibus:,265.066,Durbin-Watson:,1.98
Prob(Omnibus):,0.0,Jarque-Bera (JB):,266.889
Skew:,-0.039,Prob(JB):,1.11e-58
Kurtosis:,3.037,Cond. No.,29100.0


#### Full performance + height / age + salary + Team wins

In [27]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + Wins',
                data = nba_df,
               weights = np.sqrt(nba_df['comment_count']),# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [28]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,7.456
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,3.52e-18
Time:,13:50:05,Log-Likelihood:,-245340.0
No. Observations:,848905,AIC:,490700.0
Df Residuals:,848886,BIC:,490900.0
Df Model:,18,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0576,0.015,3.731,0.000,0.027,0.088
C(height_dummies)[T.short],0.0075,0.005,1.525,0.127,-0.002,0.017
C(height_dummies)[T.tall],-0.0008,0.004,-0.185,0.853,-0.009,0.008
rookie[T.True],0.0208,0.004,4.743,0.000,0.012,0.029
MP,-4.208e-06,3.22e-06,-1.307,0.191,-1.05e-05,2.1e-06
PPG,0.0013,0.001,2.495,0.013,0.000,0.002
ThreePP,0.0116,0.014,0.837,0.403,-0.016,0.039
DWS,0.0020,0.002,0.859,0.390,-0.003,0.006
TRBP,-0.0004,0.000,-0.905,0.365,-0.001,0.001

0,1,2,3
Omnibus:,264.924,Durbin-Watson:,1.98
Prob(Omnibus):,0.0,Jarque-Bera (JB):,266.737
Skew:,-0.039,Prob(JB):,1.2e-58
Kurtosis:,3.037,Cond. No.,30300.0


#### Full performance + height / age + salary + race of player

In [29]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + C(Race)',
                data = nba_df,
               weights = np.sqrt(nba_df['comment_count']),# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [30]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,7.459
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,3.46e-18
Time:,13:51:04,Log-Likelihood:,-245340.0
No. Observations:,848905,AIC:,490700.0
Df Residuals:,848886,BIC:,490900.0
Df Model:,18,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0542,0.014,3.858,0.000,0.027,0.082
C(height_dummies)[T.short],0.0076,0.005,1.532,0.126,-0.002,0.017
C(height_dummies)[T.tall],-0.0008,0.004,-0.184,0.854,-0.009,0.007
rookie[T.True],0.0207,0.004,4.708,0.000,0.012,0.029
C(Race)[T.W],0.0004,0.005,0.080,0.936,-0.009,0.010
MP,-3.61e-06,3.11e-06,-1.160,0.246,-9.71e-06,2.49e-06
PPG,0.0014,0.001,2.523,0.012,0.000,0.002
ThreePP,0.0108,0.014,0.771,0.441,-0.017,0.038
DWS,0.0013,0.002,0.617,0.537,-0.003,0.006

0,1,2,3
Omnibus:,265.119,Durbin-Watson:,1.98
Prob(Omnibus):,0.0,Jarque-Bera (JB):,266.941
Skew:,-0.039,Prob(JB):,1.0800000000000001e-58
Kurtosis:,3.037,Cond. No.,29100.0


#### Full performance + height / age + salary + race of player + city vote

In [43]:
model = smf.wls( formula = 'compound_mean ~ MP + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + Wins + ' \
                'team_match + C(Race) * demean_clinton +total_population + C(Race) * demean_PPG',
                data = nba_df,
               weights = np.sqrt(nba_df['comment_count']),# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [44]:
model.summary()



0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,8.448
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,9.920000000000001e-26
Time:,14:01:24,Log-Likelihood:,-245290.0
No. Observations:,848905,AIC:,490600.0
Df Residuals:,848880,BIC:,490900.0
Df Model:,24,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0774,0.021,3.637,0.000,0.036,0.119
C(height_dummies)[T.short],0.0071,0.005,1.419,0.156,-0.003,0.017
C(height_dummies)[T.tall],-0.0003,0.004,-0.062,0.951,-0.008,0.008
rookie[T.True],0.0217,0.005,4.779,0.000,0.013,0.031
team_match[T.True],0.0024,0.002,1.493,0.136,-0.001,0.006
C(Race)[T.W],0.0081,0.005,1.640,0.101,-0.002,0.018
MP,-5.007e-06,3.31e-06,-1.511,0.131,-1.15e-05,1.49e-06
ThreePP,0.0111,0.014,0.801,0.423,-0.016,0.038
DWS,0.0019,0.002,0.830,0.406,-0.003,0.006

0,1,2,3
Omnibus:,260.874,Durbin-Watson:,1.98
Prob(Omnibus):,0.0,Jarque-Bera (JB):,262.629
Skew:,-0.039,Prob(JB):,9.350000000000001e-58
Kurtosis:,3.036,Cond. No.,131000000.0


#### Full performance + height / age + salary + race of player + city demographics

In [35]:
model = smf.wls( formula = 'compound_mean ~ MP + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + Wins + ' \
                'team_match + C(Race) * demean_race_diff +total_population + C(Race) * demean_PPG',
                data = nba_df,
               weights = np.sqrt(nba_df['comment_count']),# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [36]:
model.summary()



0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,8.0
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,4.1899999999999995e-24
Time:,13:52:12,Log-Likelihood:,-245300.0
No. Observations:,848905,AIC:,490600.0
Df Residuals:,848880,BIC:,490900.0
Df Model:,24,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0760,0.021,3.589,0.000,0.034,0.117
C(height_dummies)[T.short],0.0070,0.005,1.399,0.162,-0.003,0.017
C(height_dummies)[T.tall],-0.0003,0.004,-0.063,0.950,-0.008,0.008
rookie[T.True],0.0217,0.005,4.784,0.000,0.013,0.031
team_match[T.True],0.0024,0.002,1.513,0.130,-0.001,0.006
C(Race)[T.W],0.0080,0.005,1.619,0.105,-0.002,0.018
MP,-4.908e-06,3.32e-06,-1.479,0.139,-1.14e-05,1.6e-06
ThreePP,0.0111,0.014,0.805,0.421,-0.016,0.038
DWS,0.0018,0.002,0.799,0.425,-0.003,0.006

0,1,2,3
Omnibus:,260.009,Durbin-Watson:,1.98
Prob(Omnibus):,0.0,Jarque-Bera (JB):,261.758
Skew:,-0.039,Prob(JB):,1.45e-57
Kurtosis:,3.036,Cond. No.,130000000.0


#### Race and PPG

In [41]:
model = smf.wls( formula = 'compound_mean ~ MP + C(Race) * demean_PPG',
                data = nba_df,
               weights = np.sqrt(nba_df['comment_count']),# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [42]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.519
Date:,"Sun, 10 Feb 2019",Prob (F-statistic):,0.000218
Time:,14:00:32,Log-Likelihood:,-245860.0
No. Observations:,848905,AIC:,491700.0
Df Residuals:,848900,BIC:,491800.0
Df Model:,4,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0853,0.006,14.902,0.000,0.074,0.096
C(Race)[T.W],0.0095,0.005,1.764,0.078,-0.001,0.020
MP,-2.518e-06,3e-06,-0.838,0.402,-8.41e-06,3.37e-06
demean_PPG,0.0007,0.000,1.915,0.056,-1.66e-05,0.001
C(Race)[T.W]:demean_PPG,0.0022,0.001,3.171,0.002,0.001,0.004

0,1,2,3
Omnibus:,282.469,Durbin-Watson:,1.978
Prob(Omnibus):,0.0,Jarque-Bera (JB):,284.246
Skew:,-0.041,Prob(JB):,1.89e-62
Kurtosis:,3.036,Cond. No.,8250.0


## NFL

### z_DVOA only

In [23]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA',#\
                       #' + white_black_diff * C(race) + clinton_vote_lead',',#'
                data = nfl_df, weights = 1 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [24]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,9.575
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,0.0021
Time:,08:34:59,Log-Likelihood:,-96293.0
No. Observations:,292531,AIC:,192600.0
Df Residuals:,292529,BIC:,192600.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0670,0.002,31.603,0.000,0.063,0.071
z_DVOA,0.0057,0.002,3.094,0.002,0.002,0.009

0,1,2,3
Omnibus:,237.333,Durbin-Watson:,1.957
Prob(Omnibus):,0.0,Jarque-Bera (JB):,263.144
Skew:,-0.031,Prob(JB):,7.230000000000001e-58
Kurtosis:,3.133,Cond. No.,1.46


### Performance + demo + position + salary

In [32]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid',#\
                       #' + white_black_diff * C(race) + clinton_vote_lead',',#'
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [33]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.489
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,3.16e-08
Time:,08:39:40,Log-Likelihood:,-96157.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292519,BIC:,192500.0
Df Model:,11,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0683,0.067,1.014,0.310,-0.064,0.200
rookie[T.True],0.0172,0.007,2.316,0.021,0.003,0.032
C(position)[T.rb],0.0077,0.006,1.273,0.203,-0.004,0.020
C(position)[T.te],-0.0060,0.005,-1.188,0.235,-0.016,0.004
C(position)[T.wr],-0.0063,0.006,-0.976,0.329,-0.019,0.006
C(height_dummies)[T.tall],0.0041,0.006,0.727,0.468,-0.007,0.015
z_DVOA,0.0074,0.002,3.963,0.000,0.004,0.011
youth,0.0009,0.002,0.542,0.588,-0.002,0.004
oldness,-0.0006,0.001,-0.764,0.445,-0.002,0.001

0,1,2,3
Omnibus:,239.797,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,265.519
Skew:,-0.032,Prob(JB):,2.2e-58
Kurtosis:,3.133,Cond. No.,879.0


### Performance + demo + position + salary + race

In [38]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid'\
                       '+ C(race)',#' + white_black_diff *  + clinton_vote_lead',',#'
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [39]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.241
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,2.88e-08
Time:,08:42:56,Log-Likelihood:,-96152.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292518,BIC:,192500.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0667,0.067,0.995,0.320,-0.065,0.198
rookie[T.True],0.0172,0.007,2.310,0.021,0.003,0.032
C(position)[T.rb],0.0117,0.006,1.818,0.069,-0.001,0.024
C(position)[T.te],-0.0058,0.005,-1.120,0.263,-0.016,0.004
C(position)[T.wr],-0.0028,0.007,-0.403,0.687,-0.016,0.011
C(height_dummies)[T.tall],0.0037,0.006,0.649,0.517,-0.007,0.015
C(race)[T.W],0.0058,0.006,1.025,0.306,-0.005,0.017
z_DVOA,0.0072,0.002,3.891,0.000,0.004,0.011
youth,0.0011,0.002,0.659,0.510,-0.002,0.004

0,1,2,3
Omnibus:,239.514,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,265.232
Skew:,-0.032,Prob(JB):,2.54e-58
Kurtosis:,3.133,Cond. No.,880.0


### Performance + demo + position + salary + race + user + city vote

In [23]:
nfl_model = smf.wls( formula = 'compound_mean ~ C(race) * demean_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid'\
                       '+ team_match + C(race) * demean_clinton + total_population',
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [24]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,7.067
Date:,"Mon, 21 Jan 2019",Prob (F-statistic):,3.01e-15
Time:,18:36:25,Log-Likelihood:,-96140.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292513,BIC:,192500.0
Df Model:,17,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0715,0.066,1.086,0.278,-0.058,0.200
C(race)[T.W],0.0067,0.006,1.198,0.231,-0.004,0.018
rookie[T.True],0.0176,0.007,2.462,0.014,0.004,0.032
C(position)[T.rb],0.0120,0.006,1.865,0.062,-0.001,0.025
C(position)[T.te],-0.0070,0.005,-1.321,0.186,-0.017,0.003
C(position)[T.wr],-0.0024,0.007,-0.352,0.725,-0.015,0.011
C(height_dummies)[T.tall],0.0040,0.006,0.711,0.477,-0.007,0.015
team_match[T.True],0.0023,0.002,0.912,0.362,-0.003,0.007
demean_DVOA,0.0051,0.004,1.433,0.152,-0.002,0.012

0,1,2,3
Omnibus:,237.391,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,263.085
Skew:,-0.031,Prob(JB):,7.450000000000001e-58
Kurtosis:,3.133,Cond. No.,326000000.0


### Performance + demo + position + salary + race + user + city demo

In [29]:
nfl_model = smf.wls( formula = 'compound_mean ~ C(race) * demean_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid'\
                       '+ team_match + C(race) * demean_race_diff + total_population',
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [30]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.194
Date:,"Mon, 21 Jan 2019",Prob (F-statistic):,1.86e-10
Time:,18:38:22,Log-Likelihood:,-96144.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292513,BIC:,192500.0
Df Model:,17,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0698,0.066,1.054,0.292,-0.060,0.200
C(race)[T.W],0.0067,0.006,1.181,0.238,-0.004,0.018
rookie[T.True],0.0175,0.007,2.446,0.014,0.003,0.032
C(position)[T.rb],0.0119,0.006,1.832,0.067,-0.001,0.025
C(position)[T.te],-0.0068,0.005,-1.279,0.201,-0.017,0.004
C(position)[T.wr],-0.0026,0.007,-0.389,0.698,-0.016,0.011
C(height_dummies)[T.tall],0.0036,0.006,0.642,0.521,-0.007,0.015
team_match[T.True],0.0020,0.003,0.789,0.430,-0.003,0.007
demean_DVOA,0.0051,0.004,1.439,0.150,-0.002,0.012

0,1,2,3
Omnibus:,237.438,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,263.086
Skew:,-0.031,Prob(JB):,7.44e-58
Kurtosis:,3.133,Cond. No.,326000000.0
