# User-player regression
Regressions targeting sentiment with covariates from both users and players. Single rows are determined by user-player-year triplet key

### Imports / load

In [1]:
import pandas as pd
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

  from pandas.core import datetools


In [2]:
nba_df = pd.read_csv('c:/Users/map22/Google Drive/sentiment_nba/nba_user_player_sentiment.tsv', sep='\t')
nba_df = nba_df.dropna(subset=['Race', 'PPG']) # get some name matches for years players weren't playing / coaches
nba_df['rookie'] = nba_df['experience']<=1
nba_df = nba_df.replace({'M':'B', 'L':'W'}).query('Race == "W" or Race == "B"')

  interactivity=interactivity, compiler=compiler, result=result)


#### Get salary residuals for NBA

In [3]:
nba_cov_df = pd.read_csv('modeling_data/nba_model_data.tsv', sep='\t')
nba_cov_df['rookie_contract'] = nba_cov_df['experience'] <=4

In [4]:
salary_model = smf.wls( formula = 'standard_salary ~ rookie + MP + PPG + ThreePP + DWS + TRBP * height_dummies + AST + BLKP * height_dummies + STLP + TOVP',
                data = nba_cov_df, weights = nba_cov_df['G']).fit()

In [5]:
nba_cov_df['pred_std_salary'] = salary_model.predict(nba_cov_df)
nba_cov_df['std_salary_resid'] = nba_cov_df['pred_std_salary']- nba_cov_df['standard_salary']
nba_cov_df.loc[ nba_cov_df['G'] <20, 'std_salary_resid'] = 0

In [6]:
nba_df = nba_df.merge(nba_cov_df[['Player','year', 'std_salary_resid']],
                      on=['Player', 'year'], how = 'left')

In [7]:
for col in ['FTr','TOVP', 'standard_salary', 'std_salary_resid', 'clinton_vote_lead', 'white_black_diff','total_population' ]:
    nba_df[col] = nba_df[col].fillna(nba_df[col].mean())

#### NFL

In [8]:
nfl_df = pd.read_csv('c:/Users/map22/Google Drive/sentiment_nba/nfl_user_player_sentiment.tsv', sep='\t')
nfl_df = nfl_df.dropna(subset=['race']) # get some name matches for years players weren't playing / coaches
nfl_df['rookie'] = nfl_df['experience'] <=1
nfl_df['race'] = nfl_df['race'].replace({'L':'B', 'S':'B', 'M':'B'})

In [9]:
nfl_cov_df = pd.read_csv('modeling_data/nfl_model_data.tsv', sep='\t')
nfl_cov_df['rookie_contract'] = nfl_cov_df['experience'] <=4

In [10]:
salary_model = smf.wls( formula = 'standard_salary ~ rookie_contract + position + z_DVOA',
                data = nfl_cov_df, weights = 1).fit()

In [14]:
nfl_cov_df['pred_std_salary'] = salary_model.predict(nfl_cov_df)
nfl_cov_df['std_salary_resid'] = nfl_cov_df['pred_std_salary']- nfl_cov_df['standard_salary']

In [15]:
nfl_df = nfl_df.merge(nfl_cov_df[['Player','year', 'std_salary_resid']],
                      on=['Player', 'year'], how = 'left')

In [18]:
for col in [ 'std_salary_resid', 'clinton_vote_lead', 'white_black_diff','total_population' ]:
    nfl_df[col] = nfl_df[col].fillna(nfl_df[col].mean())

## NBA
#### Performance only model

In [48]:
nba_df['demean_PPG'] = nba_df['PPG'] - nba_df['PPG'].mean()
nba_df['demean_clinton'] = nba_df['clinton_vote_lead'] - nba_df['clinton_vote_lead'].mean()
nba_df['demean_race_diff'] = nba_df['white_black_diff'] - nba_df['white_black_diff'].mean()

In [18]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG', \
#                       ' PPG +  + total_population+  * white_black_diff + C(Race) * clinton_vote_lead',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit()

In [19]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,99.48
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,6.33e-44
Time:,06:12:21,Log-Likelihood:,-282570.0
No. Observations:,886736,AIC:,565200.0
Df Residuals:,886733,BIC:,565200.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0702,0.001,76.323,0.000,0.068,0.072
MP,-7.649e-08,6.34e-07,-0.121,0.904,-1.32e-06,1.17e-06
PPG,0.0006,6.46e-05,9.796,0.000,0.001,0.001

0,1,2,3
Omnibus:,1095.622,Durbin-Watson:,1.477
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1225.433
Skew:,-0.045,Prob(JB):,7.95e-267
Kurtosis:,3.158,Cond. No.,5440.0


#### Full performance

In [20]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP', \
#                       ' PPG +  + total_population+  * white_black_diff + C(Race) * clinton_vote_lead',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit()

In [21]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,55.53
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,7.03e-113
Time:,06:14:20,Log-Likelihood:,-282220.0
No. Observations:,886123,AIC:,564500.0
Df Residuals:,886112,BIC:,564600.0
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0803,0.002,33.625,0.000,0.076,0.085
MP,5.631e-07,8.74e-07,0.644,0.520,-1.15e-06,2.28e-06
PPG,0.0007,9.02e-05,7.922,0.000,0.001,0.001
ThreePP,0.0099,0.003,3.024,0.002,0.003,0.016
DWS,-0.0006,0.001,-1.147,0.252,-0.002,0.000
TRBP,-0.0012,0.000,-9.833,0.000,-0.001,-0.001
FTr,-0.0066,0.003,-2.409,0.016,-0.012,-0.001
ASTP,-0.0001,5.02e-05,-2.484,0.013,-0.000,-2.63e-05
BLKP,0.0028,0.000,8.628,0.000,0.002,0.003

0,1,2,3
Omnibus:,1082.997,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1210.575
Skew:,-0.045,Prob(JB):,1.3399999999999999e-263
Kurtosis:,3.158,Cond. No.,21400.0


#### Full performance + height / age

In [45]:
nfl_df['demean_DVOA'] = nfl_df['z_DVOA'] - nfl_df['z_DVOA'].mean()
nfl_df['demean_clinton'] = nfl_df['clinton_vote_lead'] - nfl_df['clinton_vote_lead'].mean()
nfl_df['demean_race_diff'] = nfl_df['white_black_diff'] - nfl_df['white_black_diff'].mean()

In [27]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit()

In [28]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,81.82
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,3.3e-252
Time:,07:26:49,Log-Likelihood:,-281880.0
No. Observations:,886123,AIC:,563800.0
Df Residuals:,886107,BIC:,564000.0
Df Model:,15,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0660,0.002,26.496,0.000,0.061,0.071
C(height_dummies)[T.short],0.0108,0.002,5.055,0.000,0.007,0.015
C(height_dummies)[T.tall],-0.0020,0.001,-1.922,0.055,-0.004,3.84e-05
rookie[T.True],0.0148,0.001,10.022,0.000,0.012,0.018
MP,-1.574e-06,8.87e-07,-1.774,0.076,-3.31e-06,1.65e-07
PPG,0.0011,9.36e-05,12.094,0.000,0.001,0.001
ThreePP,0.0125,0.003,3.792,0.000,0.006,0.019
DWS,0.0009,0.001,1.831,0.067,-6.68e-05,0.002
TRBP,-0.0010,0.000,-7.878,0.000,-0.001,-0.001

0,1,2,3
Omnibus:,1072.458,Durbin-Watson:,1.479
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1200.485
Skew:,-0.044,Prob(JB):,2.08e-261
Kurtosis:,3.158,Cond. No.,21700.0


#### Full performance + height / age + salary

In [19]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [20]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,6.232
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,6.68e-14
Time:,08:33:11,Log-Likelihood:,-279890.0
No. Observations:,880544,AIC:,559800.0
Df Residuals:,880526,BIC:,560000.0
Df Model:,17,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0632,0.013,4.918,0.000,0.038,0.088
C(height_dummies)[T.short],0.0115,0.010,1.137,0.256,-0.008,0.031
C(height_dummies)[T.tall],-0.0019,0.005,-0.373,0.709,-0.012,0.008
rookie[T.True],0.0146,0.005,2.975,0.003,0.005,0.024
MP,-1.185e-06,2.99e-06,-0.396,0.692,-7.05e-06,4.68e-06
PPG,0.0011,0.000,2.241,0.025,0.000,0.002
ThreePP,0.0124,0.014,0.875,0.382,-0.015,0.040
DWS,0.0007,0.002,0.375,0.707,-0.003,0.005
TRBP,-0.0010,0.001,-2.014,0.044,-0.002,-2.76e-05

0,1,2,3
Omnibus:,1065.698,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1193.334
Skew:,-0.044,Prob(JB):,7.43e-260
Kurtosis:,3.158,Cond. No.,27400.0


#### Full performance + height / age + salary + Team wins

In [74]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + Wins',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [75]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.98
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,8.53e-14
Time:,08:02:03,Log-Likelihood:,-279890.0
No. Observations:,880544,AIC:,559800.0
Df Residuals:,880525,BIC:,560000.0
Df Model:,18,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0678,0.014,5.010,0.000,0.041,0.094
C(height_dummies)[T.short],0.0115,0.010,1.143,0.253,-0.008,0.031
C(height_dummies)[T.tall],-0.0020,0.005,-0.400,0.689,-0.012,0.008
rookie[T.True],0.0146,0.005,2.977,0.003,0.005,0.024
MP,-1.924e-06,3.22e-06,-0.597,0.550,-8.24e-06,4.39e-06
PPG,0.0011,0.001,2.164,0.030,0.000,0.002
ThreePP,0.0129,0.014,0.903,0.367,-0.015,0.041
DWS,0.0016,0.002,0.683,0.495,-0.003,0.006
TRBP,-0.0011,0.001,-2.075,0.038,-0.002,-6.08e-05

0,1,2,3
Omnibus:,1065.367,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1192.863
Skew:,-0.044,Prob(JB):,9.4e-260
Kurtosis:,3.158,Cond. No.,28900.0


#### Full performance + height / age + salary + race of player

In [71]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + C(Race)',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [72]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,6.556
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,1.67e-15
Time:,07:56:00,Log-Likelihood:,-279880.0
No. Observations:,880544,AIC:,559800.0
Df Residuals:,880525,BIC:,560000.0
Df Model:,18,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0623,0.012,4.996,0.000,0.038,0.087
C(height_dummies)[T.short],0.0120,0.010,1.190,0.234,-0.008,0.032
C(height_dummies)[T.tall],-0.0026,0.005,-0.499,0.617,-0.013,0.008
rookie[T.True],0.0143,0.005,2.921,0.003,0.005,0.024
C(Race)[T.W],0.0054,0.007,0.722,0.471,-0.009,0.020
MP,-1.21e-06,2.96e-06,-0.409,0.683,-7.02e-06,4.6e-06
PPG,0.0012,0.001,2.330,0.020,0.000,0.002
ThreePP,0.0104,0.015,0.692,0.489,-0.019,0.040
DWS,0.0007,0.002,0.364,0.716,-0.003,0.005

0,1,2,3
Omnibus:,1067.67,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1195.438
Skew:,-0.044,Prob(JB):,2.5899999999999998e-260
Kurtosis:,3.158,Cond. No.,27400.0


#### Full performance + height / age + salary + race of player

In [50]:
model = smf.wls( formula = 'compound_mean ~ MP + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + Wins + ' \
                'team_match + C(Race) * demean_clinton + C(Race) * demean_race_diff + total_population + C(Race) * demean_PPG',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [51]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,5.78
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,1.25e-17
Time:,08:58:24,Log-Likelihood:,-279780.0
No. Observations:,880544,AIC:,559600.0
Df Residuals:,880517,BIC:,559900.0
Df Model:,26,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0810,0.018,4.430,0.000,0.045,0.117
C(height_dummies)[T.short],0.0113,0.010,1.127,0.260,-0.008,0.031
C(height_dummies)[T.tall],-0.0019,0.005,-0.383,0.702,-0.012,0.008
rookie[T.True],0.0161,0.005,3.007,0.003,0.006,0.027
team_match[T.True],0.0023,0.002,1.296,0.195,-0.001,0.006
C(Race)[T.W],0.0167,0.010,1.610,0.107,-0.004,0.037
MP,-2.957e-06,3.26e-06,-0.906,0.365,-9.35e-06,3.44e-06
ThreePP,0.0106,0.015,0.727,0.467,-0.018,0.039
DWS,0.0013,0.002,0.571,0.568,-0.003,0.006

0,1,2,3
Omnibus:,1066.833,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1194.273
Skew:,-0.044,Prob(JB):,4.64e-260
Kurtosis:,3.158,Cond. No.,127000000.0


## NFL

### z_DVOA only

In [45]:
nfl_df['demean_DVOA'] = nfl_df['z_DVOA'] - nfl_df['z_DVOA'].mean()
nfl_df['demean_clinton'] = nfl_df['clinton_vote_lead'] - nfl_df['clinton_vote_lead'].mean()
nfl_df['demean_race_diff'] = nfl_df['white_black_diff'] - nfl_df['white_black_diff'].mean()

In [23]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA',#\
                       #' + white_black_diff * C(race) + clinton_vote_lead',',#'
                data = nfl_df, weights = 1 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [24]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,9.575
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,0.0021
Time:,08:34:59,Log-Likelihood:,-96293.0
No. Observations:,292531,AIC:,192600.0
Df Residuals:,292529,BIC:,192600.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0670,0.002,31.603,0.000,0.063,0.071
z_DVOA,0.0057,0.002,3.094,0.002,0.002,0.009

0,1,2,3
Omnibus:,237.333,Durbin-Watson:,1.957
Prob(Omnibus):,0.0,Jarque-Bera (JB):,263.144
Skew:,-0.031,Prob(JB):,7.230000000000001e-58
Kurtosis:,3.133,Cond. No.,1.46


### Performance + demo + position + salary

In [32]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid',#\
                       #' + white_black_diff * C(race) + clinton_vote_lead',',#'
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [33]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.489
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,3.16e-08
Time:,08:39:40,Log-Likelihood:,-96157.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292519,BIC:,192500.0
Df Model:,11,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0683,0.067,1.014,0.310,-0.064,0.200
rookie[T.True],0.0172,0.007,2.316,0.021,0.003,0.032
C(position)[T.rb],0.0077,0.006,1.273,0.203,-0.004,0.020
C(position)[T.te],-0.0060,0.005,-1.188,0.235,-0.016,0.004
C(position)[T.wr],-0.0063,0.006,-0.976,0.329,-0.019,0.006
C(height_dummies)[T.tall],0.0041,0.006,0.727,0.468,-0.007,0.015
z_DVOA,0.0074,0.002,3.963,0.000,0.004,0.011
youth,0.0009,0.002,0.542,0.588,-0.002,0.004
oldness,-0.0006,0.001,-0.764,0.445,-0.002,0.001

0,1,2,3
Omnibus:,239.797,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,265.519
Skew:,-0.032,Prob(JB):,2.2e-58
Kurtosis:,3.133,Cond. No.,879.0


### Performance + demo + position + salary + race

In [38]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid'\
                       '+ C(race)',#' + white_black_diff *  + clinton_vote_lead',',#'
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [39]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.241
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,2.88e-08
Time:,08:42:56,Log-Likelihood:,-96152.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292518,BIC:,192500.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0667,0.067,0.995,0.320,-0.065,0.198
rookie[T.True],0.0172,0.007,2.310,0.021,0.003,0.032
C(position)[T.rb],0.0117,0.006,1.818,0.069,-0.001,0.024
C(position)[T.te],-0.0058,0.005,-1.120,0.263,-0.016,0.004
C(position)[T.wr],-0.0028,0.007,-0.403,0.687,-0.016,0.011
C(height_dummies)[T.tall],0.0037,0.006,0.649,0.517,-0.007,0.015
C(race)[T.W],0.0058,0.006,1.025,0.306,-0.005,0.017
z_DVOA,0.0072,0.002,3.891,0.000,0.004,0.011
youth,0.0011,0.002,0.659,0.510,-0.002,0.004

0,1,2,3
Omnibus:,239.514,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,265.232
Skew:,-0.032,Prob(JB):,2.54e-58
Kurtosis:,3.133,Cond. No.,880.0


### Performance + demo + position + salary + race + user + interactions

In [46]:
nfl_model = smf.wls( formula = 'compound_mean ~ C(race) * demean_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid'\
                       '+ C(race) * demean_race_diff  + C(race) * demean_clinton + total_population',
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [47]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,6.238
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,1.08e-13
Time:,08:49:06,Log-Likelihood:,-96137.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292512,BIC:,192500.0
Df Model:,18,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0724,0.066,1.094,0.274,-0.057,0.202
C(race)[T.W],0.0066,0.006,1.179,0.238,-0.004,0.018
rookie[T.True],0.0177,0.007,2.464,0.014,0.004,0.032
C(position)[T.rb],0.0119,0.006,1.834,0.067,-0.001,0.025
C(position)[T.te],-0.0069,0.005,-1.269,0.204,-0.017,0.004
C(position)[T.wr],-0.0025,0.007,-0.374,0.708,-0.016,0.011
C(height_dummies)[T.tall],0.0038,0.006,0.668,0.504,-0.007,0.015
demean_DVOA,0.0051,0.004,1.444,0.149,-0.002,0.012
C(race)[T.W]:demean_DVOA,0.0039,0.004,0.989,0.323,-0.004,0.012

0,1,2,3
Omnibus:,238.327,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,264.004
Skew:,-0.031,Prob(JB):,4.7e-58
Kurtosis:,3.133,Cond. No.,326000000.0
