In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import seaborn as sns
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Math Pass

In [2]:
math = pd.read_csv('mathpass_district_allstates.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['black_remote'] = math['black'] * math['remote']
math['his_remote'] = math['hispanic'] * math['remote']
math['low_remote'] = math['lowincome'] * math['remote']
math_school = math.drop(columns = ['districtcode', 'countycode', 'state'])
math_dummy = pd.get_dummies(math_school)

### School FE - No Interactions

In [3]:
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested', 'black_remote',
                                                                          'his_remote', 'low_remote']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,13.335399,0.668986,19.933736,5.574506e-86,12.023977,14.646821
black,-0.130592,0.041014,-3.184104,0.00145855,-0.210991,-0.050192
hispanic,-0.128699,0.028078,-4.583664,4.650287e-06,-0.18374,-0.073658
white,-0.020981,0.029007,-0.723314,0.469512,-0.077843,0.035881
lowincome,-0.027123,0.013177,-2.058409,0.03958879,-0.052954,-0.001293


### School FE - Race Interactions

In [4]:
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested','his_remote', 
                                                                              'low_remote']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome', 'black_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,14.352339,0.681013,21.074975,1.416088e-95,13.01734,15.687337
black,-0.127712,0.040861,-3.125528,0.00178231,-0.207813,-0.047612
hispanic,-0.130148,0.027973,-4.652691,3.338083e-06,-0.184983,-0.075313
white,-0.018624,0.028899,-0.644429,0.5193186,-0.075275,0.038028
lowincome,-0.028435,0.013128,-2.165921,0.03035173,-0.054171,-0.002699
black_remote,-0.090421,0.012449,-7.263364,4.19467e-13,-0.114825,-0.066017


In [5]:
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested','black_remote', 
                                                                              'low_remote']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome', 'his_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,12.043108,0.866526,13.898154,2.51913e-43,10.344448,13.741768
black,-0.138106,0.041125,-3.358192,0.000788834,-0.218724,-0.057488
hispanic,-0.138001,0.028347,-4.868223,1.151364e-06,-0.193571,-0.082432
white,-0.030693,0.029291,-1.047842,0.2947486,-0.088113,0.026727
lowincome,-0.02779,0.013175,-2.109229,0.0349611,-0.053618,-0.001962
his_remote,0.016791,0.00716,2.345288,0.01904087,0.002756,0.030827


### School FE - Income Interaction

In [6]:
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested','black_remote', 
                                                                              'his_remote']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome', 'low_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,14.999931,0.881321,17.019832,1.1862249999999999e-63,13.272268,16.727594
black,-0.123965,0.041055,-3.019467,0.002541561,-0.204445,-0.043484
hispanic,-0.120154,0.028217,-4.258238,2.08807e-05,-0.175468,-0.06484
white,-0.007282,0.029374,-0.247926,0.8041992,-0.064864,0.050299
lowincome,-0.021114,0.013332,-1.583763,0.1132941,-0.047249,0.00502
low_remote,-0.022303,0.007693,-2.899036,0.003755041,-0.037384,-0.007222


### School FE - Charter Interaction

In [17]:
math = pd.read_csv('mathpass_district_allstates.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['char_remote'] = math['charter'] * math['remote']
math_school = math.drop(columns = ['districtcode', 'countycode', 'state'])
math_dummy = pd.get_dummies(math_school)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome', 'charter', 'char_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,13.99628,0.676466,20.690298,2.740596e-92,12.670197,15.322364
black,-0.133989,0.040914,-3.274854,0.001062435,-0.214194,-0.053784
hispanic,-0.13044,0.028009,-4.657116,3.267377e-06,-0.185345,-0.075534
white,-0.020698,0.028934,-0.715368,0.4744063,-0.077418,0.036021
lowincome,-0.027424,0.013144,-2.086436,0.03697615,-0.053189,-0.001658
charter,-18.592442,0.757265,-24.55211,1.232766e-127,-20.076916,-17.107968
char_remote,-4.429147,0.743617,-5.956224,2.709558e-09,-5.886867,-2.971427


### School FE - District, Year Interaction

In [7]:
math = pd.read_csv('mathpass_district_allstates.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['district_year'] = math['districtcode'] + math['year']
math['county_year'] = math['countycode'] + math['year']
math['state_year'] = math['state'] + math['year']
math_school = math.drop(columns = ['districtcode', 'countycode', 'state', 'county_year', 'state_year'])
math_dummy = pd.get_dummies(math_school)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,-6.464024,2.664631,-2.42586,0.01531333,-11.688087,-1.239961
black,-0.230014,0.045055,-5.10521,3.448174e-07,-0.318344,-0.141683
hispanic,-0.168226,0.032696,-5.145162,2.792927e-07,-0.232327,-0.104125
white,-0.080129,0.035256,-2.272786,0.02308858,-0.149249,-0.011009
lowincome,-0.108513,0.020913,-5.188727,2.215669e-07,-0.149514,-0.067512


### School FE - County, Year Interaction

In [8]:
math = pd.read_csv('mathpass_district_allstates.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['district_year'] = math['districtcode'] + math['year']
math['county_year'] = math['countycode'] + math['year']
math['state_year'] = math['state'] + math['year']
math_school = math.drop(columns = ['districtcode', 'countycode', 'state', 'district_year', 'state_year'])
math_dummy = pd.get_dummies(math_school)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,-0.400842,1.130633,-0.354529,0.7229545,-2.617269,1.815585
black,-0.221577,0.039938,-5.548064,3.006423e-08,-0.299869,-0.143286
hispanic,-0.181644,0.027526,-6.599061,4.475952e-11,-0.235604,-0.127684
white,-0.058135,0.028847,-2.015332,0.0439124,-0.114684,-0.001586
lowincome,-0.070301,0.013206,-5.323403,1.053765e-07,-0.096189,-0.044413


### School FE - Sate, Year Interaction

In [9]:
math = pd.read_csv('mathpass_district_allstates.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['district_year'] = math['districtcode'] + math['year']
math['county_year'] = math['countycode'] + math['year']
math['state_year'] = math['state'] + math['year']
math_school = math.drop(columns = ['districtcode', 'countycode', 'state', 'district_year', 'county_year'])
math_dummy = pd.get_dummies(math_school)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,1.968711,0.74369,2.647219,0.00813433,0.510846,3.426576
black,-0.235325,0.038934,-6.044231,1.580341e-09,-0.311648,-0.159003
hispanic,-0.184929,0.026568,-6.960559,3.704977e-12,-0.23701,-0.132847
white,-0.038499,0.027366,-1.406822,0.1595258,-0.092144,0.015147
lowincome,-0.070864,0.01252,-5.660247,1.572912e-08,-0.095406,-0.046322


### District FE - No Interactions

In [10]:
math_district = math.groupby(['districtcode', 'countycode', 'state', 'year']).mean().reset_index().iloc[:, :13]
math_district = math_district.drop(columns = ['countycode', 'state'])
math_dummy = pd.get_dummies(math_district)
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'virtualper',
                                                                    'hybridper', 'totaltested']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,9.640046,0.90799,10.616905,7.366424999999999e-26,7.859675,11.420417
black,-0.351805,0.038177,-9.21506,5.804599999999999e-20,-0.426662,-0.276948
hispanic,-0.336519,0.034129,-9.860273,1.3951720000000001e-22,-0.403438,-0.2696
white,-0.067156,0.035406,-1.896749,0.05796033,-0.13658,0.002267
lowincome,-0.097883,0.017524,-5.585679,2.543397e-08,-0.132244,-0.063522


### District FE - Race Interactions

In [11]:
math_district = math.groupby(['districtcode', 'countycode', 'state', 'year']).mean().reset_index().iloc[:, :13]
math_district['black_remote'] = math_district['black'] * math_district['remote']
math_district['his_remote'] = math_district['hispanic'] * math_district['remote']
math_district['low_remote'] = math_district['lowincome'] * math_district['remote']
math_district = math_district.drop(columns = ['countycode', 'state'])
math_dummy = pd.get_dummies(math_district)
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'virtualper',
                                                                    'hybridper', 'totaltested', 'his_remote',
                                                                             'low_remote']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome', 'black_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,10.739447,0.938764,11.439981,1.130931e-29,8.898734,12.580159
black,-0.35128,0.038056,-9.230516,5.046936e-20,-0.425901,-0.27666
hispanic,-0.336938,0.034021,-9.903905,9.15736e-23,-0.403646,-0.270231
white,-0.066768,0.035294,-1.891766,0.05862165,-0.135972,0.002436
lowincome,-0.096582,0.017471,-5.528193,3.522837e-08,-0.130839,-0.062326
black_remote,-0.104124,0.023592,-4.413555,1.053975e-05,-0.150382,-0.057865


In [12]:
math_district = math.groupby(['districtcode', 'countycode', 'state', 'year']).mean().reset_index().iloc[:, :13]
math_district['black_remote'] = math_district['black'] * math_district['remote']
math_district['his_remote'] = math_district['hispanic'] * math_district['remote']
math_district['low_remote'] = math_district['lowincome'] * math_district['remote']
math_district = math_district.drop(columns = ['countycode', 'state'])
math_dummy = pd.get_dummies(math_district)
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'virtualper',
                                                                    'hybridper', 'totaltested', 'black_remote',
                                                                             'low_remote']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome', 'his_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,6.499372,1.228246,5.29159,1.302934e-07,4.09105,8.907694
black,-0.362852,0.038201,-9.498424,4.301735e-21,-0.437757,-0.287948
hispanic,-0.350418,0.034248,-10.231828,3.655708e-24,-0.417571,-0.283266
white,-0.079376,0.035472,-2.237708,0.02531574,-0.148929,-0.009823
lowincome,-0.096788,0.017486,-5.535084,3.388503e-08,-0.131074,-0.062501
his_remote,0.044969,0.011876,3.786658,0.0001557825,0.021683,0.068255


### District FE - Income Interaction

In [13]:
math_district = math.groupby(['districtcode', 'countycode', 'state', 'year']).mean().reset_index().iloc[:, :13]
math_district['black_remote'] = math_district['black'] * math_district['remote']
math_district['his_remote'] = math_district['hispanic'] * math_district['remote']
math_district['low_remote'] = math_district['lowincome'] * math_district['remote']
math_district = math_district.drop(columns = ['countycode', 'state'])
math_dummy = pd.get_dummies(math_district)
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'virtualper',
                                                                    'hybridper', 'totaltested', 'his_remote',
                                                                             'black_remote']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome', 'low_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,7.727434,1.220376,6.33201,2.794861e-10,5.334542,10.120326
black,-0.354261,0.038162,-9.283061,3.129741e-20,-0.429088,-0.279433
hispanic,-0.339521,0.034126,-9.948911,5.919145e-23,-0.406435,-0.272606
white,-0.075028,0.035538,-2.111218,0.03483894,-0.14471,-0.005346
lowincome,-0.105675,0.017823,-5.929043,3.40678e-09,-0.140623,-0.070728
low_remote,0.0302,0.012887,2.343376,0.01917729,0.004931,0.055469


### District FE - Charter Interaction

In [21]:
math_district = math.groupby(['districtcode', 'countycode', 'state', 'year']).mean().reset_index().iloc[:, :14]
math_district['char_remote'] = math_district['charter'] * math_district['remote']
math_district = math_district.drop(columns = ['countycode', 'state'])
math_dummy = pd.get_dummies(math_district)
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'virtualper',
                                                                    'hybridper', 'totaltested']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome', 'charter', 'char_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,9.921134,0.925564,10.719015,2.560151e-26,8.106305,11.735963
black,-0.352552,0.038171,-9.236155,4.7952079999999996e-20,-0.427397,-0.277707
hispanic,-0.337289,0.034124,-9.88421,1.107793e-22,-0.404198,-0.270379
white,-0.067291,0.035398,-1.900994,0.05740176,-0.136697,0.002116
lowincome,-0.097841,0.01752,-5.584656,2.55833e-08,-0.132194,-0.063489
charter,8.691387,0.976913,8.896785,9.901199999999999e-19,6.775873,10.606901
char_remote,-2.222422,1.428128,-1.556179,0.1197747,-5.02267,0.577826


### District FE - County, Year Interaction

In [14]:
math_district = math.groupby(['districtcode', 'countycode', 'state', 'year']).mean().reset_index().iloc[:, :13]
math_district['county_year'] = math_district['countycode'] + math_district['year']
math_district['state_year'] = math_district['state'] + math_district['year']
math_district = math_district.drop(columns = ['countycode', 'state', 'state_year'])
math_dummy = pd.get_dummies(math_school)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,1.968711,0.74369,2.647219,0.00813433,0.510846,3.426576
black,-0.235325,0.038934,-6.044231,1.580341e-09,-0.311648,-0.159003
hispanic,-0.184929,0.026568,-6.960559,3.704977e-12,-0.23701,-0.132847
white,-0.038499,0.027366,-1.406822,0.1595258,-0.092144,0.015147
lowincome,-0.070864,0.01252,-5.660247,1.572912e-08,-0.095406,-0.046322


### District FE - State, Year Interaction

In [15]:
math_district = math.groupby(['districtcode', 'countycode', 'state', 'year']).mean().reset_index().iloc[:, :13]
math_district['county_year'] = math_district['countycode'] + math_district['year']
math_district['state_year'] = math_district['state'] + math_district['year']
math_district = math_district.drop(columns = ['countycode', 'state', 'county_year'])
math_dummy = pd.get_dummies(math_school)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['remote', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
remote,1.968711,0.74369,2.647219,0.00813433,0.510846,3.426576
black,-0.235325,0.038934,-6.044231,1.580341e-09,-0.311648,-0.159003
hispanic,-0.184929,0.026568,-6.960559,3.704977e-12,-0.23701,-0.132847
white,-0.038499,0.027366,-1.406822,0.1595258,-0.092144,0.015147
lowincome,-0.070864,0.01252,-5.660247,1.572912e-08,-0.095406,-0.046322
