# Load Data

In [56]:
require(readstata13)
require(foreign)
require(multiwayvcov)
require(mlogit)
require(xtable)
require(stargazer)
require(ggplot2)
require(car)
require(mgcv)
require(reshape2)
require(nnet)
library('Matching')

#Load in replication data from B&W
county.data <- read.dta13("BW JOP county replication data.dta")
indiv.data <- read.dta("BW JOP individual replication data.dta")
state.data <- read.dta("BW JOP state replication data.dta")

# Replicating Table

### No Year Fixed Effects

In [58]:
###############################################################################################  
#TABLE 4

state.data.t4 <- state.data[ which(state.data$presyear==0 
                                   & state.data$year >= '1980'
                                   & state.data$GubElection == 1), ]

#The regression they ran, without year fixed effects
table4.1 <- lm(vep ~ uerate + incparty + uerate*incparty + s_black + college + SenElection
               + factor(fips_state), data=state.data.t4)
summary(table4.1)



Call:
lm(formula = vep ~ uerate + incparty + uerate * incparty + s_black + 
    college + SenElection + factor(fips_state), data = state.data.t4)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.121557 -0.018952 -0.000036  0.018290  0.092344 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)           0.105935   0.082429   1.285 0.199957    
uerate                0.057951   0.159455   0.363 0.716600    
incpartyR            -0.043711   0.012992  -3.364 0.000891 ***
s_black               1.112003   0.346261   3.211 0.001499 ** 
college               0.043146   0.054991   0.785 0.433451    
SenElection           0.013849   0.004497   3.080 0.002311 ** 
factor(fips_state)2   0.367554   0.080955   4.540 8.85e-06 ***
factor(fips_state)4   0.215977   0.081569   2.648 0.008632 ** 
factor(fips_state)5   0.107105   0.037167   2.882 0.004309 ** 
factor(fips_state)6   0.221354   0.069422   3.189 0.001618 ** 
factor(fips_state)8   0.281948 

# With Year Fixed Effects

In [3]:
#Re-calculate Table 4 regression with year fixed effects
table4 <- lm(vep ~ uerate + incparty + uerate*incparty + s_black + college + SenElection
             + factor(fips_state) + factor(year), data=state.data.t4)
summary(table4)


Call:
lm(formula = vep ~ uerate + incparty + uerate * incparty + s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.121760 -0.017739 -0.001529  0.015975  0.094655 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           3.824e-02  8.238e-02   0.464 0.642911    
uerate                4.475e-01  2.185e-01   2.048 0.041665 *  
incpartyR            -3.815e-02  1.249e-02  -3.055 0.002513 ** 
s_black               1.217e+00  3.318e-01   3.668 0.000302 ***
college               1.756e-01  1.249e-01   1.406 0.161145    
SenElection           1.553e-02  4.295e-03   3.617 0.000365 ***
factor(fips_state)2   3.792e-01  7.754e-02   4.890 1.86e-06 ***
factor(fips_state)4   2.354e-01  7.816e-02   3.012 0.002876 ** 
factor(fips_state)5   1.219e-01  3.577e-02   3.408 0.000769 ***
factor(fips_state)6   2.279e-01  6.686e-02   3.409 0.000768 ***


### Takeaways
They did not control for Year even though they stated that they did
Once you control for Year, every coefficient goes to 0 (minimal effect). 

### Extension: Gen Matching

In [59]:
names(state.data.t4)

In [108]:
state.data.t4$s_black

In [60]:
# get quantiles on enemployment
ue_q = quantile(state.data.t4$uerate,c(.25,.75))

# make new dataset w control  beng <= 25% unemployment control 
# and treat being >= 75% unemployment
state.data.t4.match = state.data.t4[state.data.t4$uerate <= ue_q[1] | state.data.t4$uerate >= ue_q[2],]
state.data.t4.match$treat = ifelse(state.data.t4.match$uerate >= ue_q[2], 1, 0)

In [67]:
# sanity check to ensure we took 50% of the data
dim(state.data.t4.match)/dim(state.data.t4)

In [87]:
# remove NAs
state.data.t4.match <- state.data.t4.match[complete.cases(state.data.t4.match), ]
which(is.na(state.data.t4.match) == TRUE)

In [92]:
# Gen Match
library('Matching')
library('rgenoud')

Tr = state.data.t4.match$treat

X = cbind(state.data.t4.match$incparty, 
          state.data.t4.match$s_black, state.data.t4.match$college, 
          state.data.t4.match$SenElection, factor(state.data.t4.match$fips_state),
          factor(state.data.t4.match$year))


# X = cbind(state.data.t4.match$year,state.data.t4.match$fips_state,
#       state.data.t4.match$s_black, state.data.t4.match$SenElection,
#       state.data.t4.match$presyear, state.data.t4.match$college,
#       state.data.t4.match$college_diff, state.data.t4.match$pci_0000,
#       state.data.t4.match$prior_ue, state.data.t4.match$partyspending,
#       state.data.t4.match$priorue_diff, state.data.t4.match$spending_diff,
#       state.data.t4.match$share_open, state.data.t4.match$share_qual_out2,
#       state.data.t4.match$open_all_share, state.data.t4.match$quality_share_new,
#       state.data.t4.match$outparty_spend, state.data.t4.match$share_open_diff,
#       state.data.t4.match$open_all_share_diff, state.data.t4.match$share_qual_out2_diff,
#       state.data.t4.match$quality_share_diff, state.data.t4.match$outparty_spend_diff,
#       state.data.t4.match$s_black_diff, state.data.t4.match$pci0000_diff,
#       state.data.t4.match$senelec_diff, state.data.t4.match$incparty,
#       state.data.t4.match$GubElection, state.data.t4.match$vep,
#       state.data.t4.match$uerate)

genout1 = GenMatch(Tr = Tr, X = X,pop.size=200,max.generations=25)
mout1 = Match(Tr = Tr, X = X, Weight.matrix=genout1)
summary(mout1)



Mon Apr 22 20:35:42 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 
 0.000000e+00   <=  X6   <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	22
	(2) Uniform Mutation.................. 	25
	(3) Boundary Mutation................. 	25
	(4) Non-Uniform Mutation.............. 	25
	(5) Polytope Crossover................ 	25
	(6) Simple Crossover.................. 	26
	(7) Whole Non-Uniform Mutation........ 	25
	(8) Heuristic Crossover............... 	26
	(9) Local-Minimum Crossover........... 	0

SOFT Maximum Number of Generations: 25
Maximum Nonchanging Generations: 4
Population size       : 200
Convergence Tolerance: 1.000000e-03

Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
Not


Estimate...  0 
SE.........  0 
T-stat.....  NaN 
p.val......  NA 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  17 
Matched number of observations  (unweighted).  17 



In [93]:
mb1  <- MatchBalance(Tr ~  incparty + s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4.match, match.out = mout1, nboots=500)



***** (V1) incpartyR *****
                       Before Matching 	 	 After Matching
mean treatment........    0.23529 	 	    0.23529 
mean control..........    0.62745 	 	    0.64706 
std mean diff.........     -89.69 	 	    -94.174 

mean raw eQQ diff.....    0.35294 	 	    0.41176 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........    0.19608 	 	    0.20588 
med  eCDF diff........    0.19608 	 	    0.20588 
max  eCDF diff........    0.39216 	 	    0.41176 

var ratio (Tr/Co).....    0.80181 	 	    0.78788 
T-test p-value........  0.0040663 	 	   0.012067 


***** (V2) s_black *****
                       Before Matching 	 	 After Matching
mean treatment........   0.086503 	 	   0.086503 
mean control..........   0.085878 	 	   0.052245 
std mean diff.........    0.87156 	 	     47.762 

mean raw eQQ diff.....   0.020856 	 	   0.037865 
med  raw eQQ diff.....      0.005 	 	   0.038812 
max  raw eQQ diff.....   

In [98]:
# checking effect

mout1y = Match(Tr = Tr, X = X, Y= state.data.t4.match$vep, Weight.matrix=genout1)
summary(mout1y)


Estimate...  0.025294 
AI SE......  0.032148 
T-stat.....  0.78679 
p.val......  0.4314 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  17 
Matched number of observations  (unweighted).  17 



#### Trying to Improve GenMatch

In [113]:
dim(state.data.t4.match)

In [111]:
genout2 = GenMatch(Tr = Tr, X = X,pop.size=200,max.generations=25,caliper=1)
mout2 = Match(Tr = Tr, X = X, Weight.matrix=genout2,caliper=1)
summary(mout2)



Mon Apr 22 21:31:34 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 
 0.000000e+00   <=  X6   <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	22
	(2) Uniform Mutation.................. 	25
	(3) Boundary Mutation................. 	25
	(4) Non-Uniform Mutation.............. 	25
	(5) Polytope Crossover................ 	25
	(6) Simple Crossover.................. 	26
	(7) Whole Non-Uniform Mutation........ 	25
	(8) Heuristic Crossover............... 	26
	(9) Local-Minimum Crossover........... 	0

SOFT Maximum Number of Generations: 25
Maximum Nonchanging Generations: 4
Population size       : 200
Convergence Tolerance: 1.000000e-03

Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
Not

In [112]:
mb2  <- MatchBalance(Tr ~  incparty + s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4.match, match.out = mout2, nboots=500)



***** (V1) incpartyR *****
                       Before Matching 	 	 After Matching
mean treatment........    0.23529 	 	    0.33333 
mean control..........    0.62745 	 	    0.33333 
std mean diff.........     -89.69 	 	          0 

mean raw eQQ diff.....    0.35294 	 	          0 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          0 

mean eCDF diff........    0.19608 	 	          0 
med  eCDF diff........    0.19608 	 	          0 
max  eCDF diff........    0.39216 	 	          0 

var ratio (Tr/Co).....    0.80181 	 	          1 
T-test p-value........  0.0040663 	 	          1 


***** (V2) s_black *****
                       Before Matching 	 	 After Matching
mean treatment........   0.086503 	 	   0.040257 
mean control..........   0.085878 	 	   0.036908 
std mean diff.........    0.87156 	 	     13.169 

mean raw eQQ diff.....   0.020856 	 	  0.0050211 
med  raw eQQ diff.....      0.005 	 	  0.0042818 
max  raw eQQ diff.....   

In [114]:
## AYY WAY BETTER

# checking effect

mout2y = Match(Tr = Tr, X = X, Y = state.data.t4.match$vep, Weight.matrix=genout2,caliper=1)
summary(mout2y)


Estimate...  0.051667 
AI SE......  0.016005 
T-stat.....  3.2282 
p.val......  0.0012457 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  6 
Matched number of observations  (unweighted).  6 

Caliper (SDs)........................................   1 1 1 1 1 1 
Number of obs dropped by 'exact' or 'caliper'  11 



In [127]:
length(state.data.t4.match$vep)

In [135]:
dim(state.data.t4)

In [132]:
length(which(Tr==1))

In [133]:
Tr

In [134]:
68-17

In [117]:
mout1y

$est
           [,1]
[1,] 0.02529412

$se
[1] 0.03214844

$est.noadj
[1] 0.02529412

$se.standard
[1] 0.02150268

$se.cond
[1] 0

$mdata
$mdata$Y
 [1] 0.43 0.51 0.41 0.46 0.58 0.40 0.37 0.40 0.40 0.40 0.55 0.35 0.32 0.50 0.38
[16] 0.47 0.51 0.41 0.42 0.42 0.42 0.33 0.42 0.43 0.43 0.42 0.33 0.52 0.43 0.43
[31] 0.43 0.44 0.31 0.42

$mdata$Tr
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

$mdata$X
      [,1]        [,2]      [,3] [,4] [,5] [,6]
 [1,]    1 0.255858153 0.1383178    1    1    1
 [2,]    1 0.034211684 0.1933020    1    2    1
 [3,]    1 0.163146988 0.1188192    1    4    1
 [4,]    1 0.035189021 0.2176259    1    6    1
 [5,]    1 0.002872020 0.1467181    1   10    1
 [6,]    2 0.146542192 0.1954217    1   11    1
 [7,]    1 0.129255772 0.1553283    0   16    1
 [8,]    1 0.017707502 0.1689029    0   21    1
 [9,]    1 0.099720217 0.1641143    1   23    1
[10,]    1 0.067699298 0.1469053    1   24    1
[11,]    2 0.014224271 0.2115010    1   25    1

In [115]:
mout2y

$est
           [,1]
[1,] 0.05166668

$se
[1] 0.01600473

$est.noadj
[1] 0.05166668

$se.standard
[1] 0.03261021

$se.cond
[1] 0

$mdata
$mdata$Y
 [1] 0.51 0.46 0.40 0.55 0.47 0.51 0.42 0.42 0.50 0.52 0.31 0.42

$mdata$Tr
 [1] 1 1 1 1 1 1 0 0 0 0 0 0

$mdata$X
      [,1]       [,2]      [,3] [,4] [,5] [,6]
 [1,]    1 0.03421168 0.1933020    1    2    1
 [2,]    1 0.03518902 0.2176259    1    6    1
 [3,]    1 0.06769930 0.1469053    1   24    1
 [4,]    2 0.01422427 0.2115010    1   25    1
 [5,]    2 0.07422041 0.2652225    1    5    3
 [6,]    1 0.01600000 0.2934708    1   25    5
 [7,]    1 0.06971382 0.2338004    1    7    1
 [8,]    1 0.06971382 0.2338004    1    7    1
 [9,]    1 0.00232278 0.2003854    1   30    1
[10,]    2 0.03054226 0.1931983    1   18    2
[11,]    2 0.03015474 0.2369099    1    3    4
[12,]    1 0.01900000 0.2795580    1   21    6

$mdata$orig.weighted.treated.nobs
[1] 17


$index.treated
[1]  2  4 13 14 25 50

$index.control
[1]  5  5 18 23 32 65

$index.d

**Dropped too many**

While we got the exact same coefficient on state unemployment rate, we've dropped so many variables that it is no longer generalizable to the whole population (left with 6 treatment obs, dropped 11).

Since match balance can only be achieved when dropping a significant ammount of variables, we can conclude that the treatment and observation groups are statistically significantly different and hence there is already inherent selection bias in the observations that is not controlled for. The incumbent party, college, and other factors significantly affect unemployment rate in a way that cannot be controlled for given the data we have. Thus, to isolate the effect of unemployment 