In [1]:
library(haven)
library(tidyverse)
crimedata<- read_dta("crime2_forlecture.dta")
head(crimedata)

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.2.1     ✔ purrr   0.3.3
✔ tibble  2.1.3     ✔ dplyr   0.8.3
✔ tidyr   1.0.0     ✔ stringr 1.4.0
✔ readr   1.1.1     ✔ forcats 0.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


pop,crimes,unem,officers,west,nrtheast,south,year,city
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
229528,17136,8.2,326,1,0,0,82,1
246815,17306,3.7,321,1,0,0,87,1
814054,75654,8.1,1621,1,0,0,82,2
933177,83960,5.4,1803,1,0,0,87,2
374974,31352,9.0,633,1,0,0,82,3
406297,31364,5.9,685,1,0,0,87,3


In [None]:
reg1<- lm(crimes~unem, data = crimedata, subset = year==82)
summary(reg1)

In [None]:
#one way to create a wide dataset: first, create 2 data sets, one for each year
crimedata82 = subset(crimedata, year == 82)
crimedata87 = subset(crimedata, year == 87)
#merge data on city id
crimedatawide = merge(crimedata82, crimedata87, by = c("city"), suffixes = c(".82",".87"))
head(crimedatawide)

In [None]:
#generate new variables
crimedatawide$dcrime<-crimedatawide$crimes.87-crimedatawide$crimes.82
crimedatawide$dunem<-crimedatawide$unem.87-crimedatawide$unem.82

#run differenced regression
reg2<-lm(dcrime~dunem, data = crimedatawide)
summary(reg2)

In [None]:
crimedata$city<-as.factor(crimedata$city)
crimedata$y87<-crimedata$year==87
#run regression with city dummies
reg3<-lm(crimes~unem + city + y87 , data = crimedata)
summary(reg3)

In [2]:
trafficdata<-read_dta("TRAFFIC1.DTA")
head(trafficdata)

state,admn90,admn85,open90,open85,dthrte90,dthrte85,speed90,speed85,cdthrte,cadmn,copen,cspeed
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
AL,0,0,0,0,2.6,2.9,1,0,-0.3000002,0,0,1
AK,1,1,1,0,2.1,3.2,0,0,-1.1000001,0,1,0
AZ,1,0,0,0,2.5,4.4,1,0,-1.9000001,1,0,1
AR,0,0,0,0,2.9,3.4,1,0,-0.5,0,0,1
CA,1,0,1,1,2.0,2.6,1,0,-0.5999999,1,0,1
CO,1,1,0,0,1.9,2.4,1,0,-0.5000001,0,0,1


In [3]:
#describe law changes
print("states with open container laws in 1990")
sum(trafficdata$open90)
print("states with open container laws in 1985")
sum(trafficdata$open85)
print("states with admin per se laws in 1990")
sum(trafficdata$admn90)
print("states with admin per se laws in 1985")
sum(trafficdata$admn85)



[1] "states with open container laws in 1990"


[1] "states with open container laws in 1985"


[1] "states with admin per se laws in 1990"


[1] "states with admin per se laws in 1985"


In [4]:
#difference-in-difference
trafficdata$change_open = trafficdata$open90-trafficdata$open85
trafficdata$change_death = trafficdata$dthrte90-trafficdata$dthrte85

#mean comparison
trafficdata %>%
    group_by(change_open) %>%
    summarize(meandth=mean(change_death))
    
#or, via regression
reg3<-lm(change_death~change_open, data = trafficdata)
summary(reg3)

change_open,meandth
<dbl>,<dbl>
0,-0.51875
1,-0.9666667



Call:
lm(formula = change_death ~ change_open, data = trafficdata)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.38125 -0.10729  0.01875  0.21875  0.81875 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.51875    0.04991 -10.393 5.51e-14 ***
change_open -0.44792    0.20580  -2.176   0.0344 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3458 on 49 degrees of freedom
Multiple R-squared:  0.08815,	Adjusted R-squared:  0.06954 
F-statistic: 4.737 on 1 and 49 DF,  p-value: 0.03437


In [8]:
#generate change in admin per se laws
trafficdata$change_admn <-trafficdata$admn90-trafficdata$admn85
reg4<-lm(change_death~change_open + change_admn, data = trafficdata)
summary(reg4)


Call:
lm(formula = change_death ~ change_open + change_admn, data = trafficdata)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.25261 -0.14337 -0.00321  0.19679  0.79679 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.49679    0.05243  -9.476 1.43e-12 ***
change_open -0.41968    0.20559  -2.041   0.0467 *  
change_admn -0.15060    0.11682  -1.289   0.2035    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3435 on 48 degrees of freedom
Multiple R-squared:  0.1187,	Adjusted R-squared:  0.08194 
F-statistic: 3.231 on 2 and 48 DF,  p-value: 0.04824


In [7]:
subset(trafficdata$state, trafficdata$change_open ==1)