forked from jpburgard/gravity
/
tetrads.R
302 lines (278 loc) · 11.3 KB
/
tetrads.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#' @title Tetrads
#'
#' @description \code{tetrads} estimates gravity models
#' by taking the ratio of the ratio of flows.
#'
#' @details \code{tetrads} is an estimation method for gravity models
#' developed by \insertCite{Head2010;textual}{gravity}.
#'
#' The function \code{tetrads} utilizes the multiplicative form of the
#' gravity equation. After choosing a reference exporter \code{A} and
#' importer \code{B} one can eliminate importer and exporter fixed effects
#' by taking the ratio of ratios.
#'
#' Only those exporters trading with the
#' reference importer and importers trading with the reference exporter will
#' remain for the estimation. Therefore, reference countries should
#' preferably be countries which trade with every other country in the dataset.
#'
#' After restricting the data in this way, \code{tetrads} estimates the gravity
#' equation in its additive form by OLS.
#'
#' By taking the ratio of ratios, all monadic effects diminish, hence no
#' unilateral variables such as GDP can be included as independent variables.
#'
#' \code{tetrads} estimation can be used for both, cross-sectional as well as
#' panel data. Nonetheless, the function is designed to be consistent with the
#' Stata code for cross-sectional data provided on the website
#' \href{https://sites.google.com/site/hiegravity/}{Gravity Equations: Workhorse, Toolkit, and Cookbook}
#' when choosing robust estimation.
#'
#' The function \code{tetrads} was therefore tested for cross-sectional data.
#'
#' If tetrads is used for panel data, the user may have to drop distance as an
#' independent variable as time-invariant effects drop.
#'
#' For applying \code{tetrads} to panel data see \insertCite{Head2010;textual}{gravity}.
#'
#' @param dependent_variable (Type: character) name of the dependent variable. This variable is logged and then used as
#' the dependent variable in the estimation.
#'
#' @param distance (Type: character) name of the distance variable that should be taken as the key independent variable
#' in the estimation. The distance is logged automatically when the function is executed.
#'
#' @param additional_regressors (Type: character) names of the additional regressors to include in the model (e.g. a dummy
#' variable to indicate contiguity). Unilateral metric variables such as GDP should be inserted via the arguments
#' \code{income_origin} and \code{income_destination}.
#'
#' Write this argument as \code{c(contiguity, common currency, ...)}. By default this is set to \code{NULL}.
#'
#' @param code_origin (Type: character) country of origin variable (e.g. ISO-3 country codes). The variables are grouped
#' using this parameter.
#'
#' @param code_destination (Type: character) country of destination variable (e.g. country ISO-3 codes). The variables are
#' grouped using this parameter.
#'
#' @param filter_origin (Type: character) Reference exporting country.
#'
#' @param filter_destination (Type: character) Reference importing country.
#'
#' @param multiway (Type: logical) in case \code{multiway = TRUE}, the
#' \code{\link[multiwayvcov]{cluster.vcov}} function is used for estimation following
#' \insertCite{Cameron2011;textual}{gravity} multi-way clustering of
#' variance-covariance matrices. The default value is set to \code{TRUE}.
#'
#' @param data (Type: data.frame) the dataset to be used.
#'
#' @param ... Additional arguments to be passed to the function.
#'
#' @references
#' For more information on gravity models, theoretical foundations and
#' estimation methods in general see
#'
#' \insertRef{Anderson1979}{gravity}
#'
#' \insertRef{Anderson2001}{gravity}
#'
#' \insertRef{Anderson2010}{gravity}
#'
#' \insertRef{Baier2009}{gravity}
#'
#' \insertRef{Baier2010}{gravity}
#'
#' \insertRef{Feenstra2002}{gravity}
#'
#' \insertRef{Head2010}{gravity}
#'
#' \insertRef{Head2014}{gravity}
#'
#' \insertRef{Santos2006}{gravity}
#'
#' and the citations therein.
#'
#' See \href{https://sites.google.com/site/hiegravity/}{Gravity Equations: Workhorse, Toolkit, and Cookbook} for gravity datasets and Stata code for estimating gravity models.
#'
#' For estimating gravity equations using panel data see
#'
#' \insertRef{Egger2003}{gravity}
#'
#' \insertRef{Gomez-Herrera2013}{gravity}
#'
#' and the references therein.
#'
#' @examples
#' # Example for CRAN checks:
#' # Executable in < 5 sec
#' library(dplyr)
#' data("gravity_no_zeros")
#'
#' # Choose 5 countries for testing
#' countries_chosen <- c("AUS", "CHN", "GBR", "BRA", "CAN")
#' grav_small <- filter(gravity_no_zeros, iso_o %in% countries_chosen)
#'
#' fit <- tetrads(
#' dependent_variable = "flow",
#' distance = "distw",
#' additional_regressors = "rta",
#' code_origin = "iso_o",
#' code_destination = "iso_d",
#' filter_origin = countries_chosen[1],
#' filter_destination = countries_chosen[2],
#' data = grav_small
#' )
#' @return
#' The function returns the summary of the estimated gravity model as an
#' \code{\link[stats]{lm}}-object.
#'
#' @seealso \code{\link[stats]{lm}}, \code{\link[lmtest]{coeftest}},
#'
#' @export
tetrads <- function(dependent_variable,
distance,
additional_regressors,
code_origin,
code_destination,
filter_origin = NULL,
filter_destination = NULL,
multiway = FALSE,
data, ...) {
# Checks ------------------------------------------------------------------
stopifnot(is.data.frame(data))
stopifnot(is.character(dependent_variable), dependent_variable %in% colnames(data), length(dependent_variable) == 1)
stopifnot(is.character(distance), distance %in% colnames(data), length(distance) == 1)
if (!is.null(additional_regressors)) {
stopifnot(is.character(additional_regressors), all(additional_regressors %in% colnames(data)))
}
stopifnot(is.character(code_origin), code_origin %in% colnames(data), length(code_origin) == 1)
stopifnot(is.character(code_destination), code_destination %in% colnames(data), length(code_destination) == 1)
valid_origin <- data %>%
select(code_origin) %>%
distinct() %>%
as_vector()
valid_destination <- data %>%
select(code_destination) %>%
distinct() %>%
as_vector()
stopifnot(is.character(filter_origin), filter_origin %in% valid_origin, length(filter_origin) == 1)
stopifnot(is.character(filter_destination), filter_destination %in% valid_destination, length(filter_destination) == 1)
# Discarding unusable observations -------------------------------------------
d <- discard_unusable(data, c(distance, dependent_variable))
# Transforming data, logging distances ---------------------------------------
d <- log_distance(d, distance)
# Transforming data, logging flows -------------------------------------------
d <- d %>%
mutate(
y_log_tetrads = log(!!sym(dependent_variable))
)
# Truncating dataset to reference importer and exporter partners -------------
d2_filter_d <- d %>%
filter_at(
vars(!!sym(code_destination)),
any_vars(. == filter_destination)
) %>%
select(!!sym(code_origin)) %>%
distinct() %>%
as_vector()
d2 <- d %>%
filter_at(vars(!!sym(code_origin)), any_vars(. %in% d2_filter_d))
d2_filter_o <- d2 %>%
filter_at(vars(!!sym(code_origin)), any_vars(. == filter_origin)) %>%
select(!!sym(code_destination)) %>%
distinct() %>%
as_vector()
d2 <- d2 %>%
filter_at(vars(!!sym(code_destination)), any_vars(. %in% d2_filter_o))
# Taking ratios, ratk --------------------------------------------------------
d2 <- left_join(
d2,
d2 %>%
filter_at(
vars(!!sym(code_destination)),
any_vars(. == filter_destination)
) %>%
select(
!!sym(code_origin),
y_log_tetrads_d = !!sym("y_log_tetrads"), dist_log_d = !!sym("dist_log")
),
by = code_origin
) %>%
mutate(
lxinratk = !!sym("y_log_tetrads") - !!sym("y_log_tetrads_d"),
ldistratk = !!sym("dist_log") - !!sym("dist_log_d")
) %>%
select(-!!sym("y_log_tetrads_d"), -!!sym("dist_log_d"))
# Taking ratios, ratk, for the other independent variables -------------------
d2 <- d2 %>%
select(c(!!sym(code_origin), !!sym(code_destination), !!sym(distance), !!!syms(additional_regressors))) %>%
gather(!!sym("key"), !!sym("value"), -!!sym(code_origin), -!!sym(code_destination)) %>%
left_join(
d2 %>%
filter_at(vars(!!sym(code_destination)), any_vars(. == filter_destination)) %>%
select(c(!!sym(code_origin), !!sym(distance), !!!syms(additional_regressors))) %>%
gather(!!sym("key"), !!sym("value"), -!!sym(code_origin)),
by = c(code_origin, "key")
) %>%
mutate(value = !!sym("value.x") - !!sym("value.y")) %>%
select(c(!!sym(code_origin), !!sym(code_destination), "key", "value")) %>%
spread(!!sym("key"), !!sym("value")) %>%
left_join(d2 %>% select(-one_of(distance, additional_regressors)), by = c(code_origin, code_destination))
# Taking the ratio of ratios, rat --------------------------------------------
d2 <- left_join(
d2,
d2 %>%
filter_at(vars(!!sym(code_origin)), any_vars(. == filter_origin)) %>%
select(!!sym(code_destination), lxinratk_o = !!sym("lxinratk"), ldistratk_o = !!sym("ldistratk")),
by = code_destination
) %>%
mutate(
y_log_rat = !!sym("lxinratk") - !!sym("lxinratk_o"),
dist_log_rat = !!sym("ldistratk") - !!sym("ldistratk_o")
) %>%
select(-!!sym("lxinratk_o"), -!!sym("ldistratk_o"))
# Taking the ratio of ratios, rat, for the other independent variables -------
d2 <- d2 %>%
select(c(!!sym(code_origin), !!sym(code_destination), !!sym(distance), !!!syms(additional_regressors))) %>%
gather(!!sym("key"), !!sym("value"), -!!sym(code_origin), -!!sym(code_destination)) %>%
left_join(
d2 %>%
filter_at(vars(!!sym(code_origin)), any_vars(. == filter_origin)) %>%
select(c(!!sym(code_destination), additional_regressors)) %>%
gather(!!sym("key"), !!sym("value"), -!!sym(code_destination)),
by = c(code_destination, "key")
) %>%
mutate(
key = paste0(!!sym("key"), "_rat"),
value = !!sym("value.x") - !!sym("value.y")
) %>%
select(c(!!sym(code_origin), !!sym(code_destination), "key", "value")) %>%
spread(!!sym("key"), !!sym("value")) %>%
left_join(d2 %>% select(-one_of(distance, additional_regressors)), by = c(code_origin, code_destination)) %>%
select(ends_with("_rat"), !!sym(code_origin), !!sym(code_destination))
# Model ----------------------------------------------------------------------
if (!is.null(additional_regressors)) {
additional_regressors <- paste0(additional_regressors, "_rat")
vars <- paste(c("dist_log_rat", additional_regressors), collapse = " + ")
} else {
vars <- "dist_log_rat"
}
form <- stats::as.formula(
sprintf("y_log_rat ~ %s", vars)
)
form2 <- as.formula(sprintf("~ %s + %s", code_origin, code_destination))
model_tetrads <- stats::lm(form, data = d2)
# Multiway Variance-Covariance -----------------------------------------------
if (multiway == TRUE) {
model_tetrads_multiway <- lmtest::coeftest(
model_tetrads,
vcov = sandwich::vcovHC(model_tetrads, "HC1")
)
}
# Return ---------------------------------------------------------------------
if (multiway == FALSE) {
model_tetrads$call <- form
return(model_tetrads)
} else {
# model_tetrads_multiway$call <- form
return(model_tetrads_multiway)
}
}