In [2]:
setwd("~/Dropbox/matheconcode/_applications/14-gravity/")
dataFile = "1_TraditionalGravity_from_WTO_book.csv"
tradedata=read.csv(dataFile)

In [4]:
library(multiwayvcov)
library(lmtest)

# (1) OLS

In [5]:
m1 = lm(ln_trade~ln_DIST + CNTG + LANG + CLNY + ln_Y + ln_E, 
            data=subset(tradedata, exporter!=importer))

In [6]:
vcov_pairID = cluster.vcov(m1, subset(tradedata, exporter!=importer)$pair_id)

In [7]:
coeftest(m1, vcov_pairID)


t test of coefficients:

               Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept) -11.2830798   0.2958274 -38.1408 < 2.2e-16 ***
ln_DIST      -1.0016075   0.0273400 -36.6353 < 2.2e-16 ***
CNTG          0.5738051   0.1847098   3.1065  0.001895 ** 
LANG          0.8015481   0.0821017   9.7629 < 2.2e-16 ***
CLNY          0.7348535   0.1441929   5.0963 3.488e-07 ***
ln_Y          1.1902357   0.0094560 125.8716 < 2.2e-16 ***
ln_E          0.9075884   0.0099098  91.5846 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [8]:
summary(m1)$r.squared

# (2) OLS with RM

In [9]:
m2 = lm(ln_trade~ln_DIST + CNTG + LANG + CLNY + ln_Y + ln_E + ln_REM_EXP + 
        ln_REM_IMP, data=subset(tradedata, exporter!=importer))

In [10]:
vcov_pairID = cluster.vcov(m2, subset(tradedata, exporter!=importer)$pair_id)

In [11]:
coeftest(m2, vcov_pairID)


t test of coefficients:

               Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept) -35.2185434   1.9862343 -17.7313 < 2.2e-16 ***
ln_DIST      -1.1848116   0.0312683 -37.8917 < 2.2e-16 ***
CNTG          0.2465850   0.1769254   1.3937    0.1634    
LANG          0.7393638   0.0784147   9.4289 < 2.2e-16 ***
CLNY          0.8424972   0.1502508   5.6073 2.077e-08 ***
ln_Y          1.1642588   0.0094795 122.8186 < 2.2e-16 ***
ln_E          0.9026382   0.0099080  91.1020 < 2.2e-16 ***
ln_REM_EXP    0.9717723   0.0681920  14.2505 < 2.2e-16 ***
ln_REM_IMP    0.2736933   0.0597821   4.5782 4.712e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [12]:
summary(m2)$r.squared

# (3) OLS with fixed effects

In [13]:
m3 = lm(as.formula(
        paste("ln_trade ~ ", 
               paste(grep("PORTER_TIME_FE", names(tradedata), value=TRUE), collapse=" + "), 
               " + ln_DIST + CNTG + LANG + CLNY")), 
        data=subset(tradedata, exporter!=importer) )

In [14]:
vcov_pairID = cluster.vcov(m3, subset(tradedata, exporter!=importer)$pair_id)

In [15]:
coeftest(m3, vcov_pairID)


t test of coefficients:

                       Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept)          1.6960e+01  3.9978e-01  42.4233 < 2.2e-16 ***
EXPORTER_TIME_FE1   -3.9491e+00  3.8948e-01 -10.1394 < 2.2e-16 ***
EXPORTER_TIME_FE2   -3.3481e+00  3.7328e-01  -8.9693 < 2.2e-16 ***
EXPORTER_TIME_FE3   -1.7665e+00  2.5516e-01  -6.9232 4.524e-12 ***
EXPORTER_TIME_FE4   -1.1717e+00  2.0396e-01  -5.7445 9.325e-09 ***
EXPORTER_TIME_FE5   -1.1884e+00  2.3886e-01  -4.9752 6.562e-07 ***
EXPORTER_TIME_FE6   -1.3302e-02  1.6937e-01  -0.0785 0.9373982    
EXPORTER_TIME_FE7   -3.3871e+00  3.8991e-01  -8.6870 < 2.2e-16 ***
EXPORTER_TIME_FE8   -3.3290e+00  3.6803e-01  -9.0454 < 2.2e-16 ***
EXPORTER_TIME_FE9   -1.5381e+00  2.5273e-01  -6.0859 1.176e-09 ***
EXPORTER_TIME_FE10  -6.4300e-01  1.9811e-01  -3.2457 0.0011732 ** 
EXPORTER_TIME_FE11  -6.5455e-01  2.0622e-01  -3.1741 0.0015048 ** 
EXPORTER_TIME_FE12   3.3158e-01  1.5536e-01   2.1342 0.0328343 *  
EXPORTER_TIME_FE13  -3.7629e+00  3.5

In [16]:
summary(m3)$r.squared

# (4) PPML

In [17]:
ptm <- proc.time()

m4 = glm(as.formula(
        paste("trade ~ ", 
               paste(grep("PORTER_TIME_FE", names(tradedata), value=TRUE), collapse=" + "), 
               " + ln_DIST + CNTG + LANG + CLNY")),
        family = quasipoisson,
        data=subset(tradedata, exporter!=importer) )

proc.time() - ptm

   user  system elapsed 
121.586   4.024 129.110 

In [18]:
summary(m4)


Call:
glm(formula = as.formula(paste("trade ~ ", paste(grep("PORTER_TIME_FE", 
    names(tradedata), value = TRUE), collapse = " + "), " + ln_DIST + CNTG + LANG + CLNY")), 
    family = quasipoisson, data = subset(tradedata, exporter != 
        importer))

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-131.848    -4.518    -1.542     0.345   168.081  

Coefficients: (7 not defined because of singularities)
                     Estimate Std. Error  t value Pr(>|t|)    
(Intercept)         13.894774   0.107374  129.405  < 2e-16 ***
EXPORTER_TIME_FE1   -2.609858   0.304533   -8.570  < 2e-16 ***
EXPORTER_TIME_FE2   -1.844846   0.225349   -8.187 2.80e-16 ***
EXPORTER_TIME_FE3   -1.633100   0.202296   -8.073 7.15e-16 ***
EXPORTER_TIME_FE4   -1.277962   0.176649   -7.234 4.80e-13 ***
EXPORTER_TIME_FE5   -1.419334   0.178758   -7.940 2.10e-15 ***
EXPORTER_TIME_FE6   -0.461740   0.109868   -4.203 2.65e-05 ***
EXPORTER_TIME_FE7   -1.682877   0.248686   -6.767 1.34e-11

In [19]:
vcov_pairID = cluster.vcov(m4, subset(tradedata, exporter!=importer)$pair_id)

In [20]:
coeftest(m4, vcov_pairID)


z test of coefficients:

                      Estimate Std. Error  z value  Pr(>|z|)    
(Intercept)         13.8947739  0.4371118  31.7877 < 2.2e-16 ***
EXPORTER_TIME_FE1   -2.6098576  0.3021749  -8.6369 < 2.2e-16 ***
EXPORTER_TIME_FE2   -1.8448464  0.2712533  -6.8012 1.038e-11 ***
EXPORTER_TIME_FE3   -1.6330995  0.3118124  -5.2374 1.628e-07 ***
EXPORTER_TIME_FE4   -1.2779623  0.4125341  -3.0978 0.0019494 ** 
EXPORTER_TIME_FE5   -1.4193337  0.3392349  -4.1839 2.865e-05 ***
EXPORTER_TIME_FE6   -0.4617404  0.3293359  -1.4020 0.1609049    
EXPORTER_TIME_FE7   -1.6828772  0.3168295  -5.3116 1.087e-07 ***
EXPORTER_TIME_FE8   -1.0261283  0.2831216  -3.6243 0.0002897 ***
EXPORTER_TIME_FE9   -0.7638224  0.3104315  -2.4605 0.0138736 *  
EXPORTER_TIME_FE10  -0.6635405  0.2971536  -2.2330 0.0255497 *  
EXPORTER_TIME_FE11  -0.7179754  0.2875768  -2.4966 0.0125377 *  
EXPORTER_TIME_FE12   0.2147019  0.2928388   0.7332 0.4634521    
EXPORTER_TIME_FE13  -2.3449130  0.2293189 -10.2256 < 2.2e-16 ***

# Using the PPML function in the gravity package

In [22]:
library(gravity)

In [23]:
m5 = PPML('trade', 'DIST', c(grep("PORTER_TIME_FE", names(tradedata), value=TRUE), 'CNTG', 'LANG', 'CLNY'),
    vce_robust = FALSE, data = subset(tradedata, exporter!=importer))

In [24]:
m5


Call:
y ~ dist_log + EXPORTER_TIME_FE1 + EXPORTER_TIME_FE2 + EXPORTER_TIME_FE3 + 
    EXPORTER_TIME_FE4 + EXPORTER_TIME_FE5 + EXPORTER_TIME_FE6 + 
    EXPORTER_TIME_FE7 + EXPORTER_TIME_FE8 + EXPORTER_TIME_FE9 + 
    EXPORTER_TIME_FE10 + EXPORTER_TIME_FE11 + EXPORTER_TIME_FE12 + 
    EXPORTER_TIME_FE13 + EXPORTER_TIME_FE14 + EXPORTER_TIME_FE15 + 
    EXPORTER_TIME_FE16 + EXPORTER_TIME_FE17 + EXPORTER_TIME_FE18 + 
    EXPORTER_TIME_FE19 + EXPORTER_TIME_FE20 + EXPORTER_TIME_FE21 + 
    EXPORTER_TIME_FE22 + EXPORTER_TIME_FE23 + EXPORTER_TIME_FE24 + 
    EXPORTER_TIME_FE25 + EXPORTER_TIME_FE26 + EXPORTER_TIME_FE27 + 
    EXPORTER_TIME_FE28 + EXPORTER_TIME_FE29 + EXPORTER_TIME_FE30 + 
    EXPORTER_TIME_FE31 + EXPORTER_TIME_FE32 + EXPORTER_TIME_FE33 + 
    EXPORTER_TIME_FE34 + EXPORTER_TIME_FE35 + EXPORTER_TIME_FE36 + 
    EXPORTER_TIME_FE37 + EXPORTER_TIME_FE38 + EXPORTER_TIME_FE39 + 
    EXPORTER_TIME_FE40 + EXPORTER_TIME_FE41 + EXPORTER_TIME_FE42 + 
    EXPORTER_TIME_FE43 + EXPORTER_TIME_

# Fixed_Effect function in the gravity package

In [120]:
m6 = PPML('trade', 'DIST', c('exporter','importer','year'), c('CNTG', 'LANG', 'CLNY'),
    vce_robust = FALSE, data = subset(tradedata, (exporter!=importer)&(trade!=0) ))

In [121]:
m6


Call:
y_log ~ dist_log + CNTG + LANG + CLNY + exporter + importer

Residuals:
     Min       1Q   Median       3Q      Max 
-11.5786  -0.8287   0.1171   0.9712   6.7077 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 14.602609   0.216178  67.549  < 2e-16 ***
dist_log    -1.203989   0.018086 -66.572  < 2e-16 ***
CNTG         0.253501   0.074093   3.421 0.000624 ***
LANG         0.615900   0.037975  16.219  < 2e-16 ***
CLNY         0.683077   0.073517   9.291  < 2e-16 ***
exporterAUS  0.367799   0.115339   3.189 0.001430 ** 
exporterAUT -0.218959   0.116698  -1.876 0.060626 .  
exporterBEL  0.995160   0.116481   8.544  < 2e-16 ***
exporterBGR -2.888221   0.120009 -24.067  < 2e-16 ***
exporterBOL -4.940552   0.128317 -38.503  < 2e-16 ***
exporterBRA  1.311847   0.115250  11.383  < 2e-16 ***
exporterCAN  0.626452   0.115566   5.421 5.99e-08 ***
exporterCHE  0.708466   0.116427   6.085 1.18e-09 ***
exporterCHL -0.924969   0.116874  -7.914 2.59e-15 ***
expo

In [6]:
length(unique(tradedata$pair_id))

In [25]:
#############################################
############ DATA PREPARATION ###############
#############################################

countrylist = sort(unique(tradedata$importer))
exportercountrylist = sort(unique(tradedata$exporter))
if (!identical(countrylist, exportercountrylist)) {stop("exporter and importer country lists do not coincide")}


#regressorsIndices = 4:13
regressorsIndices = c("ln_DIST","CNTG","LANG","CLNY")
yearslist = c(1986, 1990, 1994, 1998, 2002, 2006)

regressors_raw = tradedata[regressorsIndices]
regressorsNames = names(regressors_raw)
flow_raw = tradedata$trade


nbt = length(yearslist) # number of years
nbk = dim(regressors_raw)[2] # number of regressors
nbi = length(countrylist) # number of countries
yearsIndices = 1:nbt


Dnikt = array(0,dim=c(nbi,nbi,nbk,nbt)) # basis functions
Xhatnit = array(0,dim=c(nbi,nbi,nbt)) # trade flows from n to i

missingObs = array(0,dim = c(0,2,nbt))

for (year in 1:nbt)
{
  theYear = yearslist[year]
#   print(theYear)
  for (dest in 1:nbi)
  {
    theDest = as.character(countrylist[dest])
#     print(theDest)
    for (orig in 1:nbi)
    {
      if (orig != dest )
      {
        theOrig = as.character(countrylist[orig])
        extract = (tradedata$exporter == theOrig) & (tradedata$importer == theDest) & (tradedata$year == theYear)
        line = regressors_raw[extract , ]
        
        if (dim(line)[1] == 0 )
        { missingObs = rbind(missingObs,c(theOrig,theDest)) }
        
        if (dim(line)[1] > 1 )
        { stop("Several lines with year, exporter and importer.") }
        
        if (dim(line)[1] == 1 )
        {
          Dnikt[orig,dest,,year] = as.numeric(line)
          Xhatnit[orig,dest,year] = flow_raw[extract]
        }
        
        
      }
    }
  }
}

if(length(missingObs) > 0) {stop("Missing observations")}
Xnt = apply(X = Xhatnit,MARGIN = c(1,3),FUN = sum)
Yit = apply(X = Xhatnit,MARGIN = c(2,3),FUN = sum)


In [26]:
#############################################
########## AFFINITY ESTIMATION ##############
#############################################

sigma = 1
maxiterIpfp =1000
lambda = 0.0
tolIpfp = 1E-12
tolDescent = 1E-12

# totmass_t = apply(X = Xnt, MARGIN = 2, FUN = sum)
totmass_t = rep(sum(Xnt) / nbt,nbt)
p_nt = t( t(Xnt) / totmass_t)
q_nt = t( t(Yit) / totmass_t)
IX=rep(1,nbi)
tIY=matrix(rep(1,nbi),nrow=1)

f_nit = array(0,dim=c(nbi,nbi,nbt))
g_nit = array(0,dim=c(nbi,nbi,nbt))
pihat_nit = array(0,dim=c(nbi,nbi,nbt))

sdD_k = rep(1,nbk)
meanD_k = rep(0,nbk)

for (t in 1:nbt)
{
  f_nit[,,t] = p_nt[,t] %*% tIY
  g_nit[,,t] = IX %*% t(q_nt[,t])
  pihat_nit[,,t] = Xhatnit[,,t] / totmass_t[t]
}

for (k in 1:nbk)
{
  meanD_k[k] = mean(Dnikt[,,k,])
  sdD_k[k] = sd(Dnikt[,,k,])
  Dnikt[,,k,] = (Dnikt[,,k,] - meanD_k[k]) / sdD_k[k]
}


v_it = matrix(rep(0,nbi*nbt),nbi,nbt)
# beta_kt = matrix(rep(0,nbk*nbt),nbk,nbt)
beta_k = rep(0,nbk)

t_s = .03   # step size for the prox grad algorithm (or grad descent when lambda=0)
iterCount = 0

ptm <- proc.time()

while (1)
{
  thegrad = rep(0,nbk )
  pi_nit  = array(0,dim=c(nbi,nbi,nbt)) 
  
  for (t in 1:nbt)
  {
    D_ij_k = matrix(Dnikt[,,,t],ncol = nbk)
    Phi = matrix( D_ij_k %*% matrix( beta_k , ncol=1) , nrow = nbi)
    contIpfp = TRUE
    iterIpfp = 0
    v = v_it[,t]
    f = f_nit[,,t]
    g = g_nit[,,t]
    K = exp(Phi / sigma)
    diag(K) = 0
    gK = g*K
    fK = f * K
    
    
    while(contIpfp)
    {
      iterIpfp = iterIpfp+1
      u = sigma*log(apply(gK * exp( ( - IX %*% t(v) ) / sigma ),1,sum))
      vnext = sigma*log(apply(fK * exp( ( - u %*% tIY ) / sigma ),2,sum))
      error = max(abs(apply(gK * exp( ( - IX %*% t(vnext) - u %*% tIY ) / sigma ),1,sum)-1))
      if( (error<tolIpfp) | (iterIpfp >= maxiterIpfp)) {contIpfp=FALSE}
      v=vnext
    }
    v_it[,t] = v
    pi_nit[,,t] = f * gK * exp( ( - IX %*% t(v) - u %*% tIY ) / sigma ) 
    if (iterIpfp >= maxiterIpfp ) {stop('maximum number of iterations reached')} 
    
    
    thegrad = thegrad + c( c(pi_nit[,,t] - pihat_nit[,,t]) %*% D_ij_k)  
    
  }
  # take one gradient step
  beta_k = beta_k - t_s*thegrad 
  
  ###########################################################################
  ###########################################################################
  # if (lambda > 0)
  # {
  #   # compute the proximal operator
  #   beta_k = pmax(beta_k - lambda/nbt*t_s, 0.0) - pmax(-beta_k - lambda/nbt*t_s, 0.0)  
  #   # eqn (6.9) of the proximal methods paper
  # } # if lambda = 0 then we are just taking one step of gradient descent
  # 
  if (lambda > 0)
  {
    theval = sum(thegrad * beta_k) - sigma * sum(pi_nit[pi_nit>0]*log(pi_nit[pi_nit>0])) + lambda/nbt * sum(abs(beta_k))
  } 
  else
  {
    theval = sum(thegrad * beta_k) - sigma * sum(pi_nit[pi_nit>0]*log(pi_nit[pi_nit>0]))
  }
  
  iterCount = iterCount + 1
  #  print(min(pi_nit))
#   print(theval)
 #print(c(sum(thegrad * beta_k),sigma * sum(pi_nit[pi_nit>0]*log(pi_nit[pi_nit>0]) )))
  
  if (iterCount>1 && abs(theval - theval_old) < tolDescent) { break }
  
  theval_old = theval
  
}

beta_k = beta_k / sdD_k

proc.time() - ptm
print(beta_k)

   user  system elapsed 
 43.084   3.787  47.897 

[1] -0.8409237  0.4374486  0.2474767 -0.2224904


# make the comparison

In [27]:
unique(tradedata$year)

In [28]:
m4 = glm(as.formula(
        paste("trade ~ ", 
              paste(paste("EXPORTER_TIME_FE", seq(6, 408, 6), sep=""), collapse = " + "),
              " + ",
              paste(paste("IMPORTER_TIME_FE", seq(6, 408, 6), sep=""), collapse = " + "),
              " + ln_DIST + CNTG + LANG + CLNY")),
        family = quasipoisson,
        data=subset(tradedata, (exporter!=importer)&(year==2006) ) )

In [29]:
summary(m4)


Call:
glm(formula = as.formula(paste("trade ~ ", paste(paste("EXPORTER_TIME_FE", 
    seq(6, 408, 6), sep = ""), collapse = " + "), " + ", paste(paste("IMPORTER_TIME_FE", 
    seq(6, 408, 6), sep = ""), collapse = " + "), " + ln_DIST + CNTG + LANG + CLNY")), 
    family = quasipoisson, data = subset(tradedata, (exporter != 
        importer) & (year == 2006)))

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-125.408    -7.652    -2.583     0.369   161.255  

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         14.16819    0.18454  76.777  < 2e-16 ***
EXPORTER_TIME_FE6   -0.46737    0.15896  -2.940 0.003297 ** 
EXPORTER_TIME_FE12   0.21811    0.13892   1.570 0.116487    
EXPORTER_TIME_FE18  -0.42417    0.12145  -3.493 0.000483 ***
EXPORTER_TIME_FE24   0.27840    0.11047   2.520 0.011766 *  
EXPORTER_TIME_FE30  -2.23345    0.22323 -10.005  < 2e-16 ***
EXPORTER_TIME_FE36  -4.11371    0.71873  -5.724 1.11e-08 ***
EXPORTER_

In [6]:
m4 = glm(as.formula(
        paste("trade ~ ", 
              paste(paste("EXPORTER_TIME_FE", seq(6, 414, 6), sep=""), collapse = " + "),
              " + ",
              paste(paste("IMPORTER_TIME_FE", seq(6, 414, 6), sep=""), collapse = " + "),
              " + ln_DIST + CNTG + LANG + CLNY")),
        family = quasipoisson,
        data=subset(tradedata, (exporter!=importer)&(year==2006) ) )

In [7]:
summary(m4)


Call:
glm(formula = as.formula(paste("trade ~ ", paste(paste("EXPORTER_TIME_FE", 
    seq(6, 414, 6), sep = ""), collapse = " + "), " + ", paste(paste("IMPORTER_TIME_FE", 
    seq(6, 414, 6), sep = ""), collapse = " + "), " + ln_DIST + CNTG + LANG + CLNY")), 
    family = quasipoisson, data = subset(tradedata, (exporter != 
        importer) & (year == 2006)))

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-125.408    -7.652    -2.583     0.369   161.255  

Coefficients: (2 not defined because of singularities)
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         14.16819    0.18454  76.777  < 2e-16 ***
EXPORTER_TIME_FE6   -0.46737    0.15896  -2.940 0.003297 ** 
EXPORTER_TIME_FE12   0.21811    0.13892   1.570 0.116487    
EXPORTER_TIME_FE18  -0.42417    0.12145  -3.493 0.000483 ***
EXPORTER_TIME_FE24   0.27840    0.11047   2.520 0.011766 *  
EXPORTER_TIME_FE30  -2.23345    0.22323 -10.005  < 2e-16 ***
EXPORTER_TIME_FE36  -4.11371 