# Load Data

In [36]:
require(readstata13)
require(foreign)
require(multiwayvcov)
require(mlogit)
require(xtable)
require(stargazer)
require(ggplot2)
require(car)
require(mgcv)
require(reshape2)
require(nnet)
library('Matching')

#Load in replication data from B&W
county.data <- read.dta13("BW JOP county replication data.dta")
indiv.data <- read.dta("BW JOP individual replication data.dta")
state.data <- read.dta("BW JOP state replication data.dta")

# Replicating Table 4

### No Year Fixed Effects

In [37]:
###############################################################################################  
#TABLE 4

state.data.t4 <- state.data[ which(state.data$presyear==0 
                                   & state.data$year >= '1980'
                                   & state.data$GubElection == 1), ]

#The regression they ran, without year fixed effects
table4.1 <- lm(vep ~ uerate + incparty + uerate*incparty + s_black + college + SenElection
               + factor(fips_state), data=state.data.t4)
summary(table4.1)



Call:
lm(formula = vep ~ uerate + incparty + uerate * incparty + s_black + 
    college + SenElection + factor(fips_state), data = state.data.t4)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.121557 -0.018952 -0.000036  0.018290  0.092344 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)           0.105935   0.082429   1.285 0.199957    
uerate                0.057951   0.159455   0.363 0.716600    
incpartyR            -0.043711   0.012992  -3.364 0.000891 ***
s_black               1.112003   0.346261   3.211 0.001499 ** 
college               0.043146   0.054991   0.785 0.433451    
SenElection           0.013849   0.004497   3.080 0.002311 ** 
factor(fips_state)2   0.367554   0.080955   4.540 8.85e-06 ***
factor(fips_state)4   0.215977   0.081569   2.648 0.008632 ** 
factor(fips_state)5   0.107105   0.037167   2.882 0.004309 ** 
factor(fips_state)6   0.221354   0.069422   3.189 0.001618 ** 
factor(fips_state)8   0.281948 

### With Year Fixed Effects

In [38]:
#Re-calculate Table 4 regression with year fixed effects
table4 <- lm(vep ~ uerate + incparty + uerate*incparty + s_black + college + SenElection
             + factor(fips_state) + factor(year), data=state.data.t4)
summary(table4)


Call:
lm(formula = vep ~ uerate + incparty + uerate * incparty + s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.121760 -0.017739 -0.001529  0.015975  0.094655 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           3.824e-02  8.238e-02   0.464 0.642911    
uerate                4.475e-01  2.185e-01   2.048 0.041665 *  
incpartyR            -3.815e-02  1.249e-02  -3.055 0.002513 ** 
s_black               1.217e+00  3.318e-01   3.668 0.000302 ***
college               1.756e-01  1.249e-01   1.406 0.161145    
SenElection           1.553e-02  4.295e-03   3.617 0.000365 ***
factor(fips_state)2   3.792e-01  7.754e-02   4.890 1.86e-06 ***
factor(fips_state)4   2.354e-01  7.816e-02   3.012 0.002876 ** 
factor(fips_state)5   1.219e-01  3.577e-02   3.408 0.000769 ***
factor(fips_state)6   2.279e-01  6.686e-02   3.409 0.000768 ***


### Takeaways
They did not control for Year even though they stated that they did
Once you control for Year, every coefficient goes to 0 (minimal effect). 

# Extension: Gen Matching

In [42]:
# get quantiles on enemployment
ue_q = quantile(state.data.t4$uerate,c(.25,.75))

# make new dataset w control  beng <= 25% unemployment control 
# and treat being >= 75% unemployment
state.data.t4.match = state.data.t4[state.data.t4$uerate <= ue_q[1] | state.data.t4$uerate >= ue_q[2],]
state.data.t4.match$treat = ifelse(state.data.t4.match$uerate >= ue_q[2], 1, 0)

In [43]:
# remove NAs
state.data.t4.match <- state.data.t4.match[complete.cases(state.data.t4.match), ]
which(is.na(state.data.t4.match) == TRUE)

In [17]:
# Gen Match
library('Matching')
library('rgenoud')

Tr = state.data.t4.match$treat

X = cbind(state.data.t4.match$incparty, 
          state.data.t4.match$s_black, state.data.t4.match$college, 
          state.data.t4.match$SenElection, factor(state.data.t4.match$fips_state),
          factor(state.data.t4.match$year))


# X = cbind(state.data.t4.match$year,state.data.t4.match$fips_state,
#       state.data.t4.match$s_black, state.data.t4.match$SenElection,
#       state.data.t4.match$presyear, state.data.t4.match$college,
#       state.data.t4.match$college_diff, state.data.t4.match$pci_0000,
#       state.data.t4.match$prior_ue, state.data.t4.match$partyspending,
#       state.data.t4.match$priorue_diff, state.data.t4.match$spending_diff,
#       state.data.t4.match$share_open, state.data.t4.match$share_qual_out2,
#       state.data.t4.match$open_all_share, state.data.t4.match$quality_share_new,
#       state.data.t4.match$outparty_spend, state.data.t4.match$share_open_diff,
#       state.data.t4.match$open_all_share_diff, state.data.t4.match$share_qual_out2_diff,
#       state.data.t4.match$quality_share_diff, state.data.t4.match$outparty_spend_diff,
#       state.data.t4.match$s_black_diff, state.data.t4.match$pci0000_diff,
#       state.data.t4.match$senelec_diff, state.data.t4.match$incparty,
#       state.data.t4.match$GubElection, state.data.t4.match$vep,
#       state.data.t4.match$uerate)

genout1 = GenMatch(Tr = Tr, X = X,pop.size=200,max.generations=25)
mout1 = Match(Tr = Tr, X = X, Weight.matrix=genout1)
summary(mout1)



Wed Apr 24 19:17:09 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 
 0.000000e+00   <=  X6   <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	22
	(2) Uniform Mutation.................. 	25
	(3) Boundary Mutation................. 	25
	(4) Non-Uniform Mutation.............. 	25
	(5) Polytope Crossover................ 	25
	(6) Simple Crossover.................. 	26
	(7) Whole Non-Uniform Mutation........ 	25
	(8) Heuristic Crossover............... 	26
	(9) Local-Minimum Crossover........... 	0

SOFT Maximum Number of Generations: 25
Maximum Nonchanging Generations: 4
Population size       : 200
Convergence Tolerance: 1.000000e-03

Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
Not


Estimate...  0 
SE.........  0 
T-stat.....  NaN 
p.val......  NA 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  17 
Matched number of observations  (unweighted).  17 



In [18]:
mb1  <- MatchBalance(Tr ~  incparty + s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4.match, match.out = mout1, nboots=500)



***** (V1) incpartyR *****
                       Before Matching 	 	 After Matching
mean treatment........    0.23529 	 	    0.23529 
mean control..........    0.62745 	 	    0.47059 
std mean diff.........     -89.69 	 	    -53.814 

mean raw eQQ diff.....    0.35294 	 	    0.23529 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........    0.19608 	 	    0.11765 
med  eCDF diff........    0.19608 	 	    0.11765 
max  eCDF diff........    0.39216 	 	    0.23529 

var ratio (Tr/Co).....    0.80181 	 	    0.72222 
T-test p-value........  0.0040663 	 	   0.036151 


***** (V2) s_black *****
                       Before Matching 	 	 After Matching
mean treatment........   0.086503 	 	   0.086503 
mean control..........   0.085878 	 	   0.039729 
std mean diff.........    0.87156 	 	     65.212 

mean raw eQQ diff.....   0.020856 	 	   0.048657 
med  raw eQQ diff.....      0.005 	 	   0.035694 
max  raw eQQ diff.....   

In [19]:
# checking effect

mout1y = Match(Tr = Tr, X = X, Y= state.data.t4.match$vep, Weight.matrix=genout1)
summary(mout1y)


Estimate...  0.038235 
AI SE......  0.03122 
T-stat.....  1.2247 
p.val......  0.22069 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  17 
Matched number of observations  (unweighted).  17 



In [11]:
mout1y

$est
     [,1]
[1,] 0.03

$se
[1] 0.03323399

$est.noadj
[1] 0.03

$se.standard
[1] 0.02222876

$se.cond
[1] 4.656613e-10

$mdata
$mdata$Y
 [1] 0.43 0.51 0.41 0.46 0.58 0.40 0.37 0.40 0.40 0.40 0.55 0.35 0.32 0.50 0.38
[16] 0.47 0.51 0.42 0.42 0.42 0.42 0.42 0.42 0.39 0.43 0.33 0.33 0.33 0.43 0.43
[31] 0.39 0.52 0.42 0.41

$mdata$Tr
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

$mdata$X
      [,1]        [,2]      [,3] [,4] [,5] [,6]
 [1,]    1 0.255858153 0.1383178    1    1    1
 [2,]    1 0.034211684 0.1933020    1    2    1
 [3,]    1 0.163146988 0.1188192    1    4    1
 [4,]    1 0.035189021 0.2176259    1    6    1
 [5,]    1 0.002872020 0.1467181    1   10    1
 [6,]    2 0.146542192 0.1954217    1   11    1
 [7,]    1 0.129255772 0.1553283    0   16    1
 [8,]    1 0.017707502 0.1689029    0   21    1
 [9,]    1 0.099720217 0.1641143    1   23    1
[10,]    1 0.067699298 0.1469053    1   24    1
[11,]    2 0.014224271 0.2115010    1   25    1
[12,] 

### Trying to Improve GenMatch

In [13]:
genout2 = GenMatch(Tr = Tr, X = X,pop.size=200,max.generations=25,caliper=1)
mout2 = Match(Tr = Tr, X = X, Weight.matrix=genout2,caliper=1)
summary(mout2)



Wed Apr 24 19:16:33 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 
 0.000000e+00   <=  X6   <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	22
	(2) Uniform Mutation.................. 	25
	(3) Boundary Mutation................. 	25
	(4) Non-Uniform Mutation.............. 	25
	(5) Polytope Crossover................ 	25
	(6) Simple Crossover.................. 	26
	(7) Whole Non-Uniform Mutation........ 	25
	(8) Heuristic Crossover............... 	26
	(9) Local-Minimum Crossover........... 	0

SOFT Maximum Number of Generations: 25
Maximum Nonchanging Generations: 4
Population size       : 200
Convergence Tolerance: 1.000000e-03

Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
Not

In [18]:
mb2  <- MatchBalance(Tr ~  incparty + s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4.match, match.out = mout2, nboots=500)



***** (V1) incpartyR *****
                       Before Matching 	 	 After Matching
mean treatment........    0.23529 	 	    0.33333 
mean control..........    0.62745 	 	    0.33333 
std mean diff.........     -89.69 	 	          0 

mean raw eQQ diff.....    0.35294 	 	          0 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          0 

mean eCDF diff........    0.19608 	 	          0 
med  eCDF diff........    0.19608 	 	          0 
max  eCDF diff........    0.39216 	 	          0 

var ratio (Tr/Co).....    0.80181 	 	          1 
T-test p-value........  0.0040663 	 	          1 


***** (V2) s_black *****
                       Before Matching 	 	 After Matching
mean treatment........   0.086503 	 	   0.040257 
mean control..........   0.085878 	 	   0.036908 
std mean diff.........    0.87156 	 	     13.169 

mean raw eQQ diff.....   0.020856 	 	  0.0050211 
med  raw eQQ diff.....      0.005 	 	  0.0042818 
max  raw eQQ diff.....   

In [19]:
mout2y = Match(Tr = Tr, X = X, Y = state.data.t4.match$vep, Weight.matrix=genout2,caliper=1)
summary(mout2y)


Estimate...  0.051667 
AI SE......  0.016005 
T-stat.....  3.2282 
p.val......  0.0012457 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  6 
Matched number of observations  (unweighted).  6 

Caliper (SDs)........................................   1 1 1 1 1 1 
Number of obs dropped by 'exact' or 'caliper'  11 



**Dropped too many**

While we got the exact same coefficient on state unemployment rate, we've dropped so many variables that it is no longer generalizable to the whole population (left with 6 treatment obs, dropped 11).

Since match balance can only be achieved when dropping a significant ammount of variables, we can conclude that the treatment and observation groups are statistically significantly different and hence there is already inherent selection bias in the observations that is not controlled for. The incumbent party, college, and other factors significantly affect unemployment rate in a way that cannot be controlled for given the data we have. Thus, to isolate the effect of unemployment 

### Gen Match for effect of Republican Incumbent

In [25]:
# Gen Match
library('Matching')
library('rgenoud')

Trr = ifelse(state.data.t4.match$incparty =='R', 1, 0)

Xr = cbind(state.daata.t4.match$uerate, 
          state.data.t4.match$s_black, state.data.t4.match$college, 
          state.data.t4.match$SenElection, factor(state.data.t4.match$fips_state),
          factor(state.data.t4.match$year))


# X = cbind(state.data.t4.match$year,state.data.t4.match$fips_state,
#       state.data.t4.match$s_black, state.data.t4.match$SenElection,
#       state.data.t4.match$presyear, state.data.t4.match$college,
#       state.data.t4.match$college_diff, state.data.t4.match$pci_0000,
#       state.data.t4.match$prior_ue, state.data.t4.match$partyspending,
#       state.data.t4.match$priorue_diff, state.data.t4.match$spending_diff,
#       state.data.t4.match$share_open, state.data.t4.match$share_qual_out2,
#       state.data.t4.match$open_all_share, state.data.t4.match$quality_share_new,
#       state.data.t4.match$outparty_spend, state.data.t4.match$share_open_diff,
#       state.data.t4.match$open_all_share_diff, state.data.t4.match$share_qual_out2_diff,
#       state.data.t4.match$quality_share_diff, state.data.t4.match$outparty_spend_diff,
#       state.data.t4.match$s_black_diff, state.data.t4.match$pci0000_diff,
#       state.data.t4.match$senelec_diff, state.data.t4.match$incparty,
#       state.data.t4.match$GubElection, state.data.t4.match$vep,
#       state.data.t4.match$uerate)

genoutr1 = GenMatch(Tr = Trr, X = Xr,pop.size=200,max.generations=25)
moutr1 = Match(Tr = Trr, X = Xr, Weight.matrix=genoutr1)
summary(mout1r)



Wed Apr 24 19:53:30 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 
 0.000000e+00   <=  X6   <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	22
	(2) Uniform Mutation.................. 	25
	(3) Boundary Mutation................. 	25
	(4) Non-Uniform Mutation.............. 	25
	(5) Polytope Crossover................ 	25
	(6) Simple Crossover.................. 	26
	(7) Whole Non-Uniform Mutation........ 	25
	(8) Heuristic Crossover............... 	26
	(9) Local-Minimum Crossover........... 	0

SOFT Maximum Number of Generations: 25
Maximum Nonchanging Generations: 4
Population size       : 200
Convergence Tolerance: 1.000000e-03

Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
Not

ERROR: Error in summary(mout1r): object 'mout1r' not found


In [28]:
mb1r  <- MatchBalance(Trr ~  uerate + s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4.match, match.out = moutr1, nboots=500)


***** (V1) uerate *****
                       Before Matching 	 	 After Matching
mean treatment........   0.042917 	 	   0.042917 
mean control..........   0.058656 	 	   0.042833 
std mean diff.........    -103.31 	 	    0.54696 

mean raw eQQ diff.....   0.016625 	 	  0.0016389 
med  raw eQQ diff.....      0.007 	 	      0.001 
max  raw eQQ diff.....      0.044 	 	      0.007 

mean eCDF diff........    0.16381 	 	   0.046296 
med  eCDF diff........    0.18229 	 	   0.027778 
max  eCDF diff........     0.3125 	 	    0.16667 

var ratio (Tr/Co).....    0.39685 	 	    0.98853 
T-test p-value........  0.0026117 	 	    0.95508 
KS Bootstrap p-value..      0.026 	 	      0.568 
KS Naive p-value......   0.073115 	 	    0.69937 
KS Statistic..........     0.3125 	 	    0.16667 


***** (V2) s_black *****
                       Before Matching 	 	 After Matching
mean treatment........   0.089455 	 	   0.089455 
mean control..........   0.082185 	 	   0.087663 
std mean diff.........     8.

In [48]:
# checking effect

mout1ry = Match(Tr = Trr, X = Xr, Y= state.data.t4.match$vep, Weight.matrix=genoutr1)
summary(mout1y)


Estimate...  0.038235 
AI SE......  0.03122 
T-stat.....  1.2247 
p.val......  0.22069 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  17 
Matched number of observations  (unweighted).  17 



# Trying to improve GM on Republican

In [50]:
# Gen Match


Trr = ifelse(state.data.t4.match$incparty =='R', 1, 0)

Xr = cbind(state.data.t4.match$uerate, 
          state.data.t4.match$s_black, state.data.t4.match$college, 
          state.data.t4.match$SenElection, factor(state.data.t4.match$fips_state),
          factor(state.data.t4.match$year))


# X = cbind(state.data.t4.match$year,state.data.t4.match$fips_state,
#       state.data.t4.match$s_black, state.data.t4.match$SenElection,
#       state.data.t4.match$presyear, state.data.t4.match$college,
#       state.data.t4.match$college_diff, state.data.t4.match$pci_0000,
#       state.data.t4.match$prior_ue, state.data.t4.match$partyspending,
#       state.data.t4.match$priorue_diff, state.data.t4.match$spending_diff,
#       state.data.t4.match$share_open, state.data.t4.match$share_qual_out2,
#       state.data.t4.match$open_all_share, state.data.t4.match$quality_share_new,
#       state.data.t4.match$outparty_spend, state.data.t4.match$share_open_diff,
#       state.data.t4.match$open_all_share_diff, state.data.t4.match$share_qual_out2_diff,
#       state.data.t4.match$quality_share_diff, state.data.t4.match$outparty_spend_diff,
#       state.data.t4.match$s_black_diff, state.data.t4.match$pci0000_diff,
#       state.data.t4.match$senelec_diff, state.data.t4.match$incparty,
#       state.data.t4.match$GubElection, state.data.t4.match$vep,
#       state.data.t4.match$uerate)

genoutr2 = GenMatch(Tr = Trr, X = Xr,pop.size=200,max.generations=25, caliper = 1)
moutr2 = Match(Tr = Trr, X = Xr, Weight.matrix=genoutr2, caliper = 1)
summary(moutr2)



Thu Apr 25 01:12:16 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 
 0.000000e+00   <=  X6   <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	22
	(2) Uniform Mutation.................. 	25
	(3) Boundary Mutation................. 	25
	(4) Non-Uniform Mutation.............. 	25
	(5) Polytope Crossover................ 	25
	(6) Simple Crossover.................. 	26
	(7) Whole Non-Uniform Mutation........ 	25
	(8) Heuristic Crossover............... 	26
	(9) Local-Minimum Crossover........... 	0

SOFT Maximum Number of Generations: 25
Maximum Nonchanging Generations: 4
Population size       : 200
Convergence Tolerance: 1.000000e-03

Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
Not

In [52]:
mb2r  <- MatchBalance(Trr ~  uerate + s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4.match, match.out = moutr2, nboots=500)


***** (V1) uerate *****
                       Before Matching 	 	 After Matching
mean treatment........   0.042917 	 	   0.045471 
mean control..........   0.058656 	 	   0.044647 
std mean diff.........    -103.31 	 	     4.5919 

mean raw eQQ diff.....   0.016625 	 	  0.0025882 
med  raw eQQ diff.....      0.007 	 	      0.002 
max  raw eQQ diff.....      0.044 	 	      0.007 

mean eCDF diff........    0.16381 	 	   0.069519 
med  eCDF diff........    0.18229 	 	   0.058824 
max  eCDF diff........     0.3125 	 	    0.17647 

var ratio (Tr/Co).....    0.39685 	 	    0.81507 
T-test p-value........  0.0026117 	 	      0.707 
KS Bootstrap p-value..      0.024 	 	        0.9 
KS Naive p-value......   0.073115 	 	    0.95391 
KS Statistic..........     0.3125 	 	    0.17647 


***** (V2) s_black *****
                       Before Matching 	 	 After Matching
mean treatment........   0.089455 	 	   0.075582 
mean control..........   0.082185 	 	   0.072351 
std mean diff.........     8.

In [51]:
# checking effect

moutr2y = Match(Tr = Trr, X = Xr, Y= state.data.t4.match$vep, Weight.matrix=genoutr2)
summary(mout1y)


Estimate...  0.038235 
AI SE......  0.03122 
T-stat.....  1.2247 
p.val......  0.22069 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  17 
Matched number of observations  (unweighted).  17 



# Sensitivity Analysis on Incumbent Party

In [30]:
library('rbounds') 

psens(mout1ry, Gamma = 1.5, GammaInc = 0.05)

Gamma,Lower bound,Upper bound
1.0,0.4188,0.4188
1.05,0.3703,0.4685
1.1,0.3258,0.5164
1.15,0.2855,0.5619
1.2,0.2491,0.6048
1.25,0.2166,0.6448
1.3,0.1877,0.6818
1.35,0.1622,0.7159
1.4,0.1399,0.747
1.45,0.1203,0.7754


# GenMatch Rep Inc x Unemployment

In [103]:
# Gen Match
library('Matching')
library('rgenoud')

TrRe = ifelse(state.data.t4.match$incparty =='R', 1, 0)*state.data.t4.match$treat

XRe = cbind(state.data.t4.match$s_black, state.data.t4.match$college, 
          state.data.t4.match$SenElection, factor(state.data.t4.match$fips_state),
          factor(state.data.t4.match$year))


# X = cbind(state.data.t4.match$year,state.data.t4.match$fips_state,
#       state.data.t4.match$s_black, state.data.t4.match$SenElection,
#       state.data.t4.match$presyear, state.data.t4.match$college,
#       state.data.t4.match$college_diff, state.data.t4.match$pci_0000,
#       state.data.t4.match$prior_ue, state.data.t4.match$partyspending,
#       state.data.t4.match$priorue_diff, state.data.t4.match$spending_diff,
#       state.data.t4.match$share_open, state.data.t4.match$share_qual_out2,
#       state.data.t4.match$open_all_share, state.data.t4.match$quality_share_new,
#       state.data.t4.match$outparty_spend, state.data.t4.match$share_open_diff,
#       state.data.t4.match$open_all_share_diff, state.data.t4.match$share_qual_out2_diff,
#       state.data.t4.match$quality_share_diff, state.data.t4.match$outparty_spend_diff,
#       state.data.t4.match$s_black_diff, state.data.t4.match$pci0000_diff,
#       state.data.t4.match$senelec_diff, state.data.t4.match$incparty,
#       state.data.t4.match$GubElection, state.data.t4.match$vep,
#       state.data.t4.match$uerate)

genoutRe = GenMatch(Tr = TrRe, X = XRe,pop.size=200,max.generations=25)
moutRe = Match(Tr = TrRe, X = XRe, Weight.matrix=genoutRe)
summary(moutRe)



Fri Apr 26 14:46:15 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	22
	(2) Uniform Mutation.................. 	25
	(3) Boundary Mutation................. 	25
	(4) Non-Uniform Mutation.............. 	25
	(5) Polytope Crossover................ 	25
	(6) Simple Crossover.................. 	26
	(7) Whole Non-Uniform Mutation........ 	25
	(8) Heuristic Crossover............... 	26
	(9) Local-Minimum Crossover........... 	0

SOFT Maximum Number of Generations: 25
Maximum Nonchanging Generations: 4
Population size       : 200
Convergence Tolerance: 1.000000e-03

Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
Not Checking Gradients before Stopping.
Using Ou

In [104]:
mbRe  <- MatchBalance(TrRe ~ s_black + 
    college + SenElection + factor(fips_state) + factor(year), 
    data = state.data.t4.match, match.out = moutRe, nboots=500)


***** (V1) s_black *****
                       Before Matching 	 	 After Matching
mean treatment........   0.098215 	 	   0.098215 
mean control..........   0.085272 	 	   0.065704 
std mean diff.........     19.275 	 	     48.421 

mean raw eQQ diff.....   0.061956 	 	   0.032511 
med  raw eQQ diff.....   0.047775 	 	   0.035935 
max  raw eQQ diff.....    0.14037 	 	   0.046822 

mean eCDF diff........    0.13802 	 	     0.1875 
med  eCDF diff........    0.11719 	 	       0.25 
max  eCDF diff........    0.35938 	 	        0.5 

var ratio (Tr/Co).....    0.64596 	 	     1.5597 
T-test p-value........    0.73336 	 	    0.48137 
KS Bootstrap p-value..      0.564 	 	      0.654 
KS Naive p-value......    0.71574 	 	    0.77143 
KS Statistic..........    0.35938 	 	        0.5 


***** (V2) college *****
                       Before Matching 	 	 After Matching
mean treatment........    0.19698 	 	    0.19698 
mean control..........    0.24165 	 	    0.20125 
std mean diff.........    -7

In [105]:
# checking effect

moutRey = Match(Tr = TrRe, X = XRe, Y= state.data.t4.match$vep, Weight.matrix=genoutRe)
summary(mout1y)


Estimate...  0.038235 
AI SE......  0.03122 
T-stat.....  1.2247 
p.val......  0.22069 

Original number of observations..............  68 
Original number of treated obs...............  17 
Matched number of observations...............  17 
Matched number of observations  (unweighted).  17 



In [106]:
library('rbounds') 

psens(moutRey, Gamma = 1.5, GammaInc = 0.05)

Gamma,Lower bound,Upper bound
1.0,0.3575,0.3575
1.05,0.341,0.3742
1.1,0.3254,0.3903
1.15,0.3107,0.4058
1.2,0.2969,0.4207
1.25,0.2838,0.4351
1.3,0.2714,0.449
1.35,0.2597,0.4624
1.4,0.2486,0.4754
1.45,0.238,0.4879


#### Takeaways

Interanal balance but sensitive to hidden bias.

# Replicating Table 5

In [53]:
###################################################################################################
#TABLE 5 - correct coefficients and standard errors (except intercept)

#Convert educ and income variables to numeric
indiv.data1 <- indiv.data
indiv.data1$educ <- as.numeric(indiv.data1$educ)
indiv.data1$income <- as.numeric(indiv.data1$income)

indiv.data.ml <- mlogit.data(indiv.data1, choice="partyvote", shape="wide")

table5 <- mlogit(partyvote ~ 0 | uerate + totalspend_voter_inf + democrat + republican + black + hisp +
                   other + female + married + age + educ + income + incomedk + unemployed + factor(fips_state),
                 data=indiv.data.ml, reflevel="Abstain")
summary(table5)

#-----------------------------
#Calculate SE

#Remove NA's from dataset for SE
indiv.data2 <- na.omit(indiv.data)
nrow(indiv.data2)

cl.mlogit   <- function(fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- length(coefficients(fm))
  dfc <- (M/(M-1))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat.=crossprod(uj)/N)
  ses <- sqrt(diag(vcovCL))
  coeftest <- coeftest(fm, vcovCL) 
  return(newList <- list("summary" = coeftest, "vcovCL" = vcovCL, "ses" = ses))
}

ses.t51 <- cl.mlogit(table5, indiv.data2$fips_state)
ses.t51


Call:
mlogit(formula = partyvote ~ 0 | uerate + totalspend_voter_inf + 
    democrat + republican + black + hisp + other + female + married + 
    age + educ + income + incomedk + unemployed + factor(fips_state), 
    data = indiv.data.ml, reflevel = "Abstain", method = "nr")

Frequencies of alternatives:
   Abstain   Democrat Republican 
   0.42992    0.31050    0.25958 

nr method
6 iterations, 0h:0m:5s 
g'(-H)^-1g = 5.71E-06 
successive function values within tolerance limits 

Coefficients :
                                  Estimate Std. Error  z-value  Pr(>|z|)    
Democrat:(intercept)            -6.8614916  0.3282838 -20.9011 < 2.2e-16 ***
Republican:(intercept)          -7.6788257  0.3814682 -20.1297 < 2.2e-16 ***
Democrat:uerate                  6.6385869  1.6865461   3.9362 8.278e-05 ***
Republican:uerate                2.5052340  1.8842726   1.3295 0.1836667    
Democrat:totalspend_voter_inf    0.2294851  0.0477949   4.8015 1.575e-06 ***
Republican:totalspend_voter_inf  0.3

$summary

t test of coefficients:

                                  Estimate Std. Error  t value  Pr(>|t|)    
Democrat:(intercept)            -6.8614916  0.4364749 -15.7202 < 2.2e-16 ***
Republican:(intercept)          -7.6788257  0.4587937 -16.7370 < 2.2e-16 ***
Democrat:uerate                  6.6385869  3.2542649   2.0400 0.0413886 *  
Republican:uerate                2.5052340  4.8453653   0.5170 0.6051454    
Democrat:totalspend_voter_inf    0.2294851  0.0896137   2.5608 0.0104617 *  
Republican:totalspend_voter_inf  0.3350896  0.1199718   2.7931 0.0052342 ** 
Democrat:democrat                1.3775895  0.0745786  18.4717 < 2.2e-16 ***
Republican:democrat              0.3514796  0.1375939   2.5545 0.0106544 *  
Democrat:republican              0.2260218  0.1331379   1.6977 0.0896148 .  
Republican:republican            1.6878234  0.1207377  13.9793 < 2.2e-16 ***
Democrat:black                   0.3735682  0.1232908   3.0300 0.0024540 ** 
Republican:black                -0.626155

# Extension: Gen Match

In [85]:
########----------------Matching for table 5-----------------

mydata5 = indiv.data1

# deleting NAs
mydata5 = na.omit(mydata5)


#-------------------- changing the treatment variable to binary using 25 and 75 quantiles
quantile_75 =  quantile(mydata5$uerate)[4]
quantile_25 =  quantile(mydata5$uerate)[2]

mydata5$uerate[mydata5$uerate >= quantile_75] <- 1
mydata5$uerate[mydata5$uerate <= quantile_25] <- 0
mydata5$uerate[mydata5$uerate < quantile_75 & mydata5$uerate > quantile_25] <- NA
mydata5 = na.omit(mydata5)
mydata5$uerate
nrow(mydata5)

#------------------- editing the outcome variable To estimate the treatment effect on Democrats:
Democrat_Data =mydata5 
Democrat_Data$partyvote
Democrat_Data$partyvote =  as.character(Democrat_Data$partyvote)


Democrat_Data$partyvote[Democrat_Data$partyvote != "Democrat"] <- 0
Democrat_Data$partyvote[Democrat_Data$partyvote == "Democrat"] <- 1
Democrat_Data$partyvote =  as.numeric(Democrat_Data$partyvote)
Democrat_Data$partyvote
Democrat_Tr = Democrat_Data$uerate
Y = Democrat_Data$partyvote

#------ Doing the same but for Republican----

Republican_Data =mydata5 
Republican_Data$partyvote =  as.character(Republican_Data$partyvote)

#Changing the outcome variable to republican:
Republican_Data$partyvote[Republican_Data$partyvote != "Republican"] <- 0
Republican_Data$partyvote[Republican_Data$partyvote == "Republican"] <- 1
Republican_Data$partyvote =  as.numeric(Republican_Data$partyvote)
Republican_Data$partyvote


In [90]:
#------------------Matching to find the treatment effects-----------

#Variables used in the paper mode:
#uerate + totalspend_voter_inf + democrat + republican + black + hisp +
 # other + female + married + age + educ + income + incomedk + unemployed + factor(fips_state)



X = cbind(mydata5$totalspend_voter_inf, mydata5$democrat , mydata5$republican, mydata5$black , mydata5$hisp ,
          mydata5$other , mydata5$female , mydata5$married , mydata5$age , mydata5$educ, 
          mydata5$income,  mydata5$unemployed)

genoutI1 <- GenMatch(Tr=mydata5$uerate, X=X,
                   pop.size = 50,max.generations=25, wait.generations=1)



moutI1 <- Match(Tr=mydata5$uerate, X=X,  Weight.matrix=genoutI1)
summary(moutI1)

mbI1 <- MatchBalance(mydata5$uerate~mydata5$totalspend_voter_inf+mydata5$democrat +
                     mydata5$republican + mydata5$black + mydata5$hisp +
                     mydata5$other + mydata5$female + mydata5$married + mydata5$age + mydata5$educ+
                     mydata5$income+mydata5$unemployed 
                   ,match.out=moutI1, nboots=500)




Fri Apr 26 14:17:27 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 
 0.000000e+00   <=  X6   <=    1.000000e+03 
 0.000000e+00   <=  X7   <=    1.000000e+03 
 0.000000e+00   <=  X8   <=    1.000000e+03 
 0.000000e+00   <=  X9   <=    1.000000e+03 
 0.000000e+00   <=  X10  <=    1.000000e+03 
 0.000000e+00   <=  X11  <=    1.000000e+03 
 0.000000e+00   <=  X12  <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	7
	(2) Uniform Mutation.................. 	6
	(3) Boundary Mutation................. 	6
	(4) Non-Uniform Mutation.............. 	6
	(5) Polytope Crossover................ 	6
	(6) Simple Crossover.................. 	6
	(7) Whole Non-Uniform Mutation........ 	6
	(8) Heuristic Crossover............... 	6
	(9) Local


Estimate...  0 
SE.........  0 
T-stat.....  NaN 
p.val......  NA 

Original number of observations..............  3930 
Original number of treated obs...............  1913 
Matched number of observations...............  1913 
Matched number of observations  (unweighted).  1927 


***** (V1) mydata5$totalspend_voter_inf *****
                       Before Matching 	 	 After Matching
mean treatment........     1.5145 	 	     1.5145 
mean control..........     1.8219 	 	     1.5089 
std mean diff.........    -35.398 	 	    0.63768 

mean raw eQQ diff.....    0.34415 	 	   0.059473 
med  raw eQQ diff.....    0.32855 	 	   0.047279 
max  raw eQQ diff.....    0.81059 	 	     1.2327 

mean eCDF diff........    0.13177 	 	   0.018074 
med  eCDF diff........    0.13877 	 	   0.013752 
max  eCDF diff........     0.2516 	 	   0.061235 

var ratio (Tr/Co).....       1.21 	 	     1.0616 
T-test p-value........ < 2.22e-16 	 	    0.18856 
KS Bootstrap p-value.. < 2.22e-16 	 	      0.002 
KS Naive p

In [88]:
#--Treatment effect on Democrats:
Yd = Democrat_Data$partyvote

moutId1 <- Match(Y=Yd, Tr=mydata5$uerate, X=X,  Weight.matrix=genoutI1)
summary(moutId1)

#--Treatment effect on Republicans:
Yr = Republican_Data$partyvote

moutIr1 <- Match(Y=Yr, Tr=mydata5$uerate, X=X,  Weight.matrix=genoutI1)
summary(moutIr1)


Estimate...  0.028228 
AI SE......  0.01944 
T-stat.....  1.452 
p.val......  0.14649 

Original number of observations..............  3930 
Original number of treated obs...............  1913 
Matched number of observations...............  1913 
Matched number of observations  (unweighted).  1934 


Estimate...  0.02666 
AI SE......  0.017558 
T-stat.....  1.5184 
p.val......  0.12892 

Original number of observations..............  3930 
Original number of treated obs...............  1913 
Matched number of observations...............  1913 
Matched number of observations  (unweighted).  1934 



# Trying to improve Match Balance for Party Voting

In [99]:
genoutI2 <- GenMatch(Tr=mydata5$uerate, X=X,
                   pop.size = 16,max.generations=10, wait.generations=1, caliper =.2 )



moutI2 <- Match(Tr=mydata5$uerate, X=X,  Weight.matrix=genoutI2, caliper = .25)
summary(moutI2)

mbI2 <- MatchBalance(mydata5$uerate~mydata5$totalspend_voter_inf+mydata5$democrat +
                     mydata5$republican + mydata5$black + mydata5$hisp +
                     mydata5$other + mydata5$female + mydata5$married + mydata5$age + mydata5$educ+
                     mydata5$income+mydata5$unemployed 
                   ,match.out=moutI2, nboots=500)



Fri Apr 26 14:27:35 2019
Domains:
 0.000000e+00   <=  X1   <=    1.000000e+03 
 0.000000e+00   <=  X2   <=    1.000000e+03 
 0.000000e+00   <=  X3   <=    1.000000e+03 
 0.000000e+00   <=  X4   <=    1.000000e+03 
 0.000000e+00   <=  X5   <=    1.000000e+03 
 0.000000e+00   <=  X6   <=    1.000000e+03 
 0.000000e+00   <=  X7   <=    1.000000e+03 
 0.000000e+00   <=  X8   <=    1.000000e+03 
 0.000000e+00   <=  X9   <=    1.000000e+03 
 0.000000e+00   <=  X10  <=    1.000000e+03 
 0.000000e+00   <=  X11  <=    1.000000e+03 
 0.000000e+00   <=  X12  <=    1.000000e+03 

Data Type: Floating Point
Operators (code number, name, population) 
	(1) Cloning........................... 	1
	(2) Uniform Mutation.................. 	2
	(3) Boundary Mutation................. 	2
	(4) Non-Uniform Mutation.............. 	2
	(5) Polytope Crossover................ 	2
	(6) Simple Crossover.................. 	2
	(7) Whole Non-Uniform Mutation........ 	2
	(8) Heuristic Crossover............... 	2
	(9) Local

In [68]:
#--Treatment effect on Democrats:
Yd = Democrat_Data$partyvote

moutIyd2 <- Match(Y=Yd, Tr=mydata5$uerate, X=X,  Weight.matrix=genoutI2, caliper = .1)
summary(moutIyd2)

#--Treatment effect on Republicans:
Yr = Republican_Data$partyvote

moutIyr2 <- Match(Y=Yr, Tr=mydata5$uerate, X=X,  Weight.matrix=genoutI2, caliper = .1)
summary(moutIyr2)


Estimate...  -0.065789 
AI SE......  0.0034187 
T-stat.....  -19.244 
p.val......  < 2.22e-16 

Original number of observations..............  3930 
Original number of treated obs...............  1913 
Matched number of observations...............  76 
Matched number of observations  (unweighted).  77 

Caliper (SDs)........................................   0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 
Number of obs dropped by 'exact' or 'caliper'  1837 


Estimate...  0.13158 
AI SE......  0.0041285 
T-stat.....  31.871 
p.val......  < 2.22e-16 

Original number of observations..............  3930 
Original number of treated obs...............  1913 
Matched number of observations...............  76 
Matched number of observations  (unweighted).  77 

Caliper (SDs)........................................   0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 
Number of obs dropped by 'exact' or 'caliper'  1837 



#### Takeaways

Dropped too many, can't draw causal conclusion.