# User-player regression
Regressions targeting sentiment with covariates from both users and players. Single rows are determined by user-player-year triplet key

### Imports / load

In [1]:
import pandas as pd
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
nba_df = pd.read_csv('c:/Users/map22/Google Drive/sentiment_nba/nba_user_player_sentiment.tsv', sep='\t')
nba_df = nba_df.dropna(subset=['Race', 'PPG']) # get some name matches for years players weren't playing / coaches
nba_df['rookie'] = nba_df['experience']<=1
nba_df = nba_df.replace({'M':'B', 'L':'W'}).query('Race == "W" or Race == "B"')

  interactivity=interactivity, compiler=compiler, result=result)


#### Get salary residuals for NBA

In [3]:
nba_cov_df = pd.read_csv('modeling_data/nba_model_data.tsv', sep='\t')
nba_cov_df['rookie_contract'] = nba_cov_df['experience'] <=4

In [4]:
salary_model = smf.wls( formula = 'standard_salary ~ rookie + MP + PPG + ThreePP + DWS + TRBP * height_dummies + AST + BLKP * height_dummies + STLP + TOVP',
                data = nba_cov_df, weights = nba_cov_df['G']).fit()

In [5]:
nba_cov_df['pred_std_salary'] = salary_model.predict(nba_cov_df)
nba_cov_df['std_salary_resid'] = nba_cov_df['pred_std_salary']- nba_cov_df['standard_salary']
nba_cov_df.loc[ nba_cov_df['G'] <20, 'std_salary_resid'] = 0

In [6]:
nba_df = nba_df.merge(nba_cov_df[['Player','year', 'std_salary_resid']],
                      on=['Player', 'year'], how = 'left')

In [7]:
for col in ['FTr','TOVP', 'standard_salary', 'std_salary_resid', 'clinton_vote_lead', 'white_black_diff', 'total_population' ]:
    nba_df[col] = nba_df[col].fillna(nba_df[col].mean())

In [8]:
nba_df['demean_PPG'] = nba_df['PPG'] - nba_df['PPG'].mean()
nba_df['demean_clinton'] = nba_df['clinton_vote_lead'] - nba_df['clinton_vote_lead'].mean()
nba_df['demean_race_diff'] = nba_df['white_black_diff'] - nba_df['white_black_diff'].mean()

#### NFL

In [9]:
nfl_df = pd.read_csv('c:/Users/map22/Google Drive/sentiment_nba/nfl_user_player_sentiment.tsv', sep='\t')
nfl_df = nfl_df.dropna(subset=['race']) # get some name matches for years players weren't playing / coaches
nfl_df['rookie'] = nfl_df['experience'] <=1
nfl_df['race'] = nfl_df['race'].replace({'L':'B', 'S':'B', 'M':'B'})

In [10]:
nfl_cov_df = pd.read_csv('modeling_data/nfl_model_data.tsv', sep='\t')
nfl_cov_df['rookie_contract'] = nfl_cov_df['experience'] <=4

In [11]:
salary_model = smf.wls( formula = 'standard_salary ~ rookie_contract + position + z_DVOA',
                data = nfl_cov_df, weights = 1).fit()

In [12]:
nfl_cov_df['pred_std_salary'] = salary_model.predict(nfl_cov_df)
nfl_cov_df['std_salary_resid'] = nfl_cov_df['pred_std_salary']- nfl_cov_df['standard_salary']

In [13]:
nfl_df = nfl_df.merge(nfl_cov_df[['Player','year', 'std_salary_resid']],
                      on=['Player', 'year'], how = 'left')

In [14]:
for col in [ 'std_salary_resid', 'clinton_vote_lead', 'white_black_diff','total_population' ]:
    nfl_df[col] = nfl_df[col].fillna(nfl_df[col].mean())

In [15]:
nfl_df['demean_DVOA'] = nfl_df['z_DVOA'] - nfl_df['z_DVOA'].mean()
nfl_df['demean_clinton'] = nfl_df['clinton_vote_lead'] - nfl_df['clinton_vote_lead'].mean()
nfl_df['demean_race_diff'] = nfl_df['white_black_diff'] - nfl_df['white_black_diff'].mean()

## NBA
#### Performance only model

In [28]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG', \
#                       ' PPG +  + total_population+  * white_black_diff + C(Race) * clinton_vote_lead',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit()

In [29]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,97.58
Date:,"Sun, 27 Jan 2019",Prob (F-statistic):,4.2400000000000006e-43
Time:,18:07:45,Log-Likelihood:,-280410.0
No. Observations:,880544,AIC:,560800.0
Df Residuals:,880541,BIC:,560900.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0702,0.001,75.899,0.000,0.068,0.072
MP,5.359e-08,6.41e-07,0.084,0.933,-1.2e-06,1.31e-06
PPG,0.0006,6.5e-05,9.505,0.000,0.000,0.001

0,1,2,3
Omnibus:,1086.581,Durbin-Watson:,1.476
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1215.436
Skew:,-0.045,Prob(JB):,1.18e-264
Kurtosis:,3.158,Cond. No.,5460.0


#### Full performance

In [18]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP', \
#                       ' PPG +  + total_population+  * white_black_diff + C(Race) * clinton_vote_lead',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit()

In [19]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,55.24
Date:,"Sun, 27 Jan 2019",Prob (F-statistic):,2.96e-112
Time:,17:52:31,Log-Likelihood:,-280230.0
No. Observations:,880544,AIC:,560500.0
Df Residuals:,880533,BIC:,560600.0
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0787,0.002,33.003,0.000,0.074,0.083
MP,1.021e-06,8.85e-07,1.154,0.249,-7.13e-07,2.76e-06
PPG,0.0007,9.04e-05,7.878,0.000,0.001,0.001
ThreePP,0.0100,0.003,3.051,0.002,0.004,0.016
DWS,-0.0009,0.001,-1.723,0.085,-0.002,0.000
TRBP,-0.0011,0.000,-9.593,0.000,-0.001,-0.001
FTr,-0.0067,0.003,-2.434,0.015,-0.012,-0.001
ASTP,-0.0001,5e-05,-2.780,0.005,-0.000,-4.1e-05
BLKP,0.0029,0.000,8.994,0.000,0.002,0.004

0,1,2,3
Omnibus:,1075.513,Durbin-Watson:,1.477
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1202.638
Skew:,-0.044,Prob(JB):,7.09e-262
Kurtosis:,3.158,Cond. No.,21400.0


#### Full performance + height / age

In [27]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit()

In [28]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,81.82
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,3.3e-252
Time:,07:26:49,Log-Likelihood:,-281880.0
No. Observations:,886123,AIC:,563800.0
Df Residuals:,886107,BIC:,564000.0
Df Model:,15,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0660,0.002,26.496,0.000,0.061,0.071
C(height_dummies)[T.short],0.0108,0.002,5.055,0.000,0.007,0.015
C(height_dummies)[T.tall],-0.0020,0.001,-1.922,0.055,-0.004,3.84e-05
rookie[T.True],0.0148,0.001,10.022,0.000,0.012,0.018
MP,-1.574e-06,8.87e-07,-1.774,0.076,-3.31e-06,1.65e-07
PPG,0.0011,9.36e-05,12.094,0.000,0.001,0.001
ThreePP,0.0125,0.003,3.792,0.000,0.006,0.019
DWS,0.0009,0.001,1.831,0.067,-6.68e-05,0.002
TRBP,-0.0010,0.000,-7.878,0.000,-0.001,-0.001

0,1,2,3
Omnibus:,1072.458,Durbin-Watson:,1.479
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1200.485
Skew:,-0.044,Prob(JB):,2.08e-261
Kurtosis:,3.158,Cond. No.,21700.0


#### Full performance + height / age + salary

In [20]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [21]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,6.232
Date:,"Sun, 27 Jan 2019",Prob (F-statistic):,6.68e-14
Time:,17:55:35,Log-Likelihood:,-279890.0
No. Observations:,880544,AIC:,559800.0
Df Residuals:,880526,BIC:,560000.0
Df Model:,17,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0632,0.013,4.918,0.000,0.038,0.088
C(height_dummies)[T.short],0.0115,0.010,1.137,0.256,-0.008,0.031
C(height_dummies)[T.tall],-0.0019,0.005,-0.373,0.709,-0.012,0.008
rookie[T.True],0.0146,0.005,2.975,0.003,0.005,0.024
MP,-1.185e-06,2.99e-06,-0.396,0.692,-7.05e-06,4.68e-06
PPG,0.0011,0.000,2.241,0.025,0.000,0.002
ThreePP,0.0124,0.014,0.875,0.382,-0.015,0.040
DWS,0.0007,0.002,0.375,0.707,-0.003,0.005
TRBP,-0.0010,0.001,-2.014,0.044,-0.002,-2.76e-05

0,1,2,3
Omnibus:,1065.698,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1193.334
Skew:,-0.044,Prob(JB):,7.43e-260
Kurtosis:,3.158,Cond. No.,27400.0


#### Full performance + height / age + salary + Team wins

In [74]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + Wins',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [75]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.98
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,8.53e-14
Time:,08:02:03,Log-Likelihood:,-279890.0
No. Observations:,880544,AIC:,559800.0
Df Residuals:,880525,BIC:,560000.0
Df Model:,18,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0678,0.014,5.010,0.000,0.041,0.094
C(height_dummies)[T.short],0.0115,0.010,1.143,0.253,-0.008,0.031
C(height_dummies)[T.tall],-0.0020,0.005,-0.400,0.689,-0.012,0.008
rookie[T.True],0.0146,0.005,2.977,0.003,0.005,0.024
MP,-1.924e-06,3.22e-06,-0.597,0.550,-8.24e-06,4.39e-06
PPG,0.0011,0.001,2.164,0.030,0.000,0.002
ThreePP,0.0129,0.014,0.903,0.367,-0.015,0.041
DWS,0.0016,0.002,0.683,0.495,-0.003,0.006
TRBP,-0.0011,0.001,-2.075,0.038,-0.002,-6.08e-05

0,1,2,3
Omnibus:,1065.367,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1192.863
Skew:,-0.044,Prob(JB):,9.4e-260
Kurtosis:,3.158,Cond. No.,28900.0


#### Full performance + height / age + salary + race of player

In [22]:
model = smf.wls( formula = 'compound_mean ~ MP + PPG + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + C(Race)',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [23]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,6.556
Date:,"Sun, 27 Jan 2019",Prob (F-statistic):,1.67e-15
Time:,17:57:51,Log-Likelihood:,-279880.0
No. Observations:,880544,AIC:,559800.0
Df Residuals:,880525,BIC:,560000.0
Df Model:,18,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0623,0.012,4.996,0.000,0.038,0.087
C(height_dummies)[T.short],0.0120,0.010,1.190,0.234,-0.008,0.032
C(height_dummies)[T.tall],-0.0026,0.005,-0.499,0.617,-0.013,0.008
rookie[T.True],0.0143,0.005,2.921,0.003,0.005,0.024
C(Race)[T.W],0.0054,0.007,0.722,0.471,-0.009,0.020
MP,-1.21e-06,2.96e-06,-0.409,0.683,-7.02e-06,4.6e-06
PPG,0.0012,0.001,2.330,0.020,0.000,0.002
ThreePP,0.0104,0.015,0.692,0.489,-0.019,0.040
DWS,0.0007,0.002,0.364,0.716,-0.003,0.005

0,1,2,3
Omnibus:,1067.67,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1195.438
Skew:,-0.044,Prob(JB):,2.5899999999999998e-260
Kurtosis:,3.158,Cond. No.,27400.0


#### Full performance + height / age + salary + race of player + city vote

In [24]:
model = smf.wls( formula = 'compound_mean ~ MP + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + Wins + ' \
                'team_match + C(Race) * demean_clinton +total_population + C(Race) * demean_PPG',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [25]:
model.summary()



0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,5.904
Date:,"Sun, 27 Jan 2019",Prob (F-statistic):,1.86e-16
Time:,18:01:52,Log-Likelihood:,-279780.0
No. Observations:,880544,AIC:,559600.0
Df Residuals:,880519,BIC:,559900.0
Df Model:,24,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0810,0.018,4.426,0.000,0.045,0.117
C(height_dummies)[T.short],0.0113,0.010,1.128,0.259,-0.008,0.031
C(height_dummies)[T.tall],-0.0019,0.005,-0.383,0.702,-0.012,0.008
rookie[T.True],0.0160,0.005,2.999,0.003,0.006,0.027
team_match[T.True],0.0024,0.002,1.314,0.189,-0.001,0.006
C(Race)[T.W],0.0167,0.010,1.614,0.107,-0.004,0.037
MP,-2.965e-06,3.26e-06,-0.909,0.363,-9.35e-06,3.43e-06
ThreePP,0.0106,0.015,0.725,0.468,-0.018,0.039
DWS,0.0013,0.002,0.571,0.568,-0.003,0.006

0,1,2,3
Omnibus:,1067.207,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1194.691
Skew:,-0.044,Prob(JB):,3.77e-260
Kurtosis:,3.158,Cond. No.,127000000.0


#### Full performance + height / age + salary + race of player + city demographics

In [18]:
model = smf.wls( formula = 'compound_mean ~ MP + ThreePP + DWS + TRBP + FTr + ASTP + BLKP + STLP + TOVP' \
                       ' + C(height_dummies) + rookie + youth + oldness + standard_salary + std_salary_resid + Wins + ' \
                'team_match + C(Race) * demean_race_diff +total_population + C(Race) * demean_PPG',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [19]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,5.953
Date:,"Mon, 21 Jan 2019",Prob (F-statistic):,3.35e-17
Time:,18:30:35,Log-Likelihood:,-279780.0
No. Observations:,880544,AIC:,559600.0
Df Residuals:,880519,BIC:,559900.0
Df Model:,24,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0802,0.018,4.402,0.000,0.045,0.116
C(height_dummies)[T.short],0.0113,0.010,1.124,0.261,-0.008,0.031
C(height_dummies)[T.tall],-0.0019,0.005,-0.383,0.702,-0.012,0.008
rookie[T.True],0.0161,0.005,3.003,0.003,0.006,0.027
team_match[T.True],0.0023,0.002,1.280,0.201,-0.001,0.006
C(Race)[T.W],0.0166,0.010,1.604,0.109,-0.004,0.037
MP,-2.924e-06,3.27e-06,-0.895,0.371,-9.33e-06,3.48e-06
ThreePP,0.0105,0.015,0.725,0.469,-0.018,0.039
DWS,0.0013,0.002,0.559,0.576,-0.003,0.006

0,1,2,3
Omnibus:,1065.907,Durbin-Watson:,1.478
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1193.235
Skew:,-0.044,Prob(JB):,7.8e-260
Kurtosis:,3.158,Cond. No.,127000000.0


#### Full performance + height / age + salary + race of player + city demographics

In [26]:
model = smf.wls( formula = 'compound_mean ~ MP + C(Race) * demean_PPG',
                data = nba_df,
               weights = 1,# / (nba_df['compound_mean_std'] / np.sqrt(fit_df['user_count'])),
#                missing='raise'   
               ).fit(cov_type='cluster', cov_kwds={'groups' : nba_df['Player']})

In [27]:
model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,3.813
Date:,"Sun, 27 Jan 2019",Prob (F-statistic):,0.00443
Time:,18:06:05,Log-Likelihood:,-280260.0
No. Observations:,880544,AIC:,560500.0
Df Residuals:,880539,BIC:,560600.0
Df Model:,4,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0826,0.005,15.122,0.000,0.072,0.093
C(Race)[T.W],0.0164,0.010,1.632,0.103,-0.003,0.036
MP,-1.533e-06,2.84e-06,-0.539,0.590,-7.11e-06,4.04e-06
demean_PPG,0.0005,0.000,1.564,0.118,-0.000,0.001
C(Race)[T.W]:demean_PPG,0.0029,0.001,2.591,0.010,0.001,0.005

0,1,2,3
Omnibus:,1094.056,Durbin-Watson:,1.477
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1222.681
Skew:,-0.045,Prob(JB):,3.15e-266
Kurtosis:,3.158,Cond. No.,7760.0


## NFL

### z_DVOA only

In [23]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA',#\
                       #' + white_black_diff * C(race) + clinton_vote_lead',',#'
                data = nfl_df, weights = 1 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [24]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,9.575
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,0.0021
Time:,08:34:59,Log-Likelihood:,-96293.0
No. Observations:,292531,AIC:,192600.0
Df Residuals:,292529,BIC:,192600.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0670,0.002,31.603,0.000,0.063,0.071
z_DVOA,0.0057,0.002,3.094,0.002,0.002,0.009

0,1,2,3
Omnibus:,237.333,Durbin-Watson:,1.957
Prob(Omnibus):,0.0,Jarque-Bera (JB):,263.144
Skew:,-0.031,Prob(JB):,7.230000000000001e-58
Kurtosis:,3.133,Cond. No.,1.46


### Performance + demo + position + salary

In [32]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid',#\
                       #' + white_black_diff * C(race) + clinton_vote_lead',',#'
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [33]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.489
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,3.16e-08
Time:,08:39:40,Log-Likelihood:,-96157.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292519,BIC:,192500.0
Df Model:,11,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0683,0.067,1.014,0.310,-0.064,0.200
rookie[T.True],0.0172,0.007,2.316,0.021,0.003,0.032
C(position)[T.rb],0.0077,0.006,1.273,0.203,-0.004,0.020
C(position)[T.te],-0.0060,0.005,-1.188,0.235,-0.016,0.004
C(position)[T.wr],-0.0063,0.006,-0.976,0.329,-0.019,0.006
C(height_dummies)[T.tall],0.0041,0.006,0.727,0.468,-0.007,0.015
z_DVOA,0.0074,0.002,3.963,0.000,0.004,0.011
youth,0.0009,0.002,0.542,0.588,-0.002,0.004
oldness,-0.0006,0.001,-0.764,0.445,-0.002,0.001

0,1,2,3
Omnibus:,239.797,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,265.519
Skew:,-0.032,Prob(JB):,2.2e-58
Kurtosis:,3.133,Cond. No.,879.0


### Performance + demo + position + salary + race

In [38]:
nfl_model = smf.wls( formula = 'compound_mean ~ z_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid'\
                       '+ C(race)',#' + white_black_diff *  + clinton_vote_lead',',#'
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [39]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.241
Date:,"Mon, 24 Dec 2018",Prob (F-statistic):,2.88e-08
Time:,08:42:56,Log-Likelihood:,-96152.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292518,BIC:,192500.0
Df Model:,12,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0667,0.067,0.995,0.320,-0.065,0.198
rookie[T.True],0.0172,0.007,2.310,0.021,0.003,0.032
C(position)[T.rb],0.0117,0.006,1.818,0.069,-0.001,0.024
C(position)[T.te],-0.0058,0.005,-1.120,0.263,-0.016,0.004
C(position)[T.wr],-0.0028,0.007,-0.403,0.687,-0.016,0.011
C(height_dummies)[T.tall],0.0037,0.006,0.649,0.517,-0.007,0.015
C(race)[T.W],0.0058,0.006,1.025,0.306,-0.005,0.017
z_DVOA,0.0072,0.002,3.891,0.000,0.004,0.011
youth,0.0011,0.002,0.659,0.510,-0.002,0.004

0,1,2,3
Omnibus:,239.514,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,265.232
Skew:,-0.032,Prob(JB):,2.54e-58
Kurtosis:,3.133,Cond. No.,880.0


### Performance + demo + position + salary + race + user + city vote

In [23]:
nfl_model = smf.wls( formula = 'compound_mean ~ C(race) * demean_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid'\
                       '+ team_match + C(race) * demean_clinton + total_population',
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [24]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,7.067
Date:,"Mon, 21 Jan 2019",Prob (F-statistic):,3.01e-15
Time:,18:36:25,Log-Likelihood:,-96140.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292513,BIC:,192500.0
Df Model:,17,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0715,0.066,1.086,0.278,-0.058,0.200
C(race)[T.W],0.0067,0.006,1.198,0.231,-0.004,0.018
rookie[T.True],0.0176,0.007,2.462,0.014,0.004,0.032
C(position)[T.rb],0.0120,0.006,1.865,0.062,-0.001,0.025
C(position)[T.te],-0.0070,0.005,-1.321,0.186,-0.017,0.003
C(position)[T.wr],-0.0024,0.007,-0.352,0.725,-0.015,0.011
C(height_dummies)[T.tall],0.0040,0.006,0.711,0.477,-0.007,0.015
team_match[T.True],0.0023,0.002,0.912,0.362,-0.003,0.007
demean_DVOA,0.0051,0.004,1.433,0.152,-0.002,0.012

0,1,2,3
Omnibus:,237.391,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,263.085
Skew:,-0.031,Prob(JB):,7.450000000000001e-58
Kurtosis:,3.133,Cond. No.,326000000.0


### Performance + demo + position + salary + race + user + city demo

In [29]:
nfl_model = smf.wls( formula = 'compound_mean ~ C(race) * demean_DVOA + youth + oldness + rookie + C(position) + C(height_dummies) + Wins + standard_salary + std_salary_resid'\
                       '+ team_match + C(race) * demean_race_diff + total_population',
                data = nfl_df, weights = 1, 
               ).fit(cov_type='cluster', cov_kwds={'groups' : nfl_df['Player']})

In [30]:
nfl_model.summary()

0,1,2,3
Dep. Variable:,compound_mean,R-squared:,0.001
Model:,WLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.194
Date:,"Mon, 21 Jan 2019",Prob (F-statistic):,1.86e-10
Time:,18:38:22,Log-Likelihood:,-96144.0
No. Observations:,292531,AIC:,192300.0
Df Residuals:,292513,BIC:,192500.0
Df Model:,17,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0698,0.066,1.054,0.292,-0.060,0.200
C(race)[T.W],0.0067,0.006,1.181,0.238,-0.004,0.018
rookie[T.True],0.0175,0.007,2.446,0.014,0.003,0.032
C(position)[T.rb],0.0119,0.006,1.832,0.067,-0.001,0.025
C(position)[T.te],-0.0068,0.005,-1.279,0.201,-0.017,0.004
C(position)[T.wr],-0.0026,0.007,-0.389,0.698,-0.016,0.011
C(height_dummies)[T.tall],0.0036,0.006,0.642,0.521,-0.007,0.015
team_match[T.True],0.0020,0.003,0.789,0.430,-0.003,0.007
demean_DVOA,0.0051,0.004,1.439,0.150,-0.002,0.012

0,1,2,3
Omnibus:,237.438,Durbin-Watson:,1.958
Prob(Omnibus):,0.0,Jarque-Bera (JB):,263.086
Skew:,-0.031,Prob(JB):,7.44e-58
Kurtosis:,3.133,Cond. No.,326000000.0
