## Basic Specification:
$$ y_{ijt} = \alpha + \beta_1 GDP_i + \beta_2 GDP_j + \beta_3 Dist_{ij} + \theta_t \epsilon_{ijt} $$

## Modified Specification:
$$ y_{ijt} = \alpha + \beta_1 GDP_i + \beta_2 GDP_j + \beta_3 Dist_{ij} + \beta_4 PolityDist_{ijt} + \theta_t + \epsilon_{ijt} $$

In [1]:
import numpy as np
import pandas as pd
from linearmodels import PanelOLS

In [2]:
data = pd.read_csv("trade_polity.csv")
data.head(10)

Unnamed: 0,tradelink,year,iso_o,iso_d,gdp_o,gdp_d,flow,distw,polity_o,polity_d,polity_dist,log_gdp_o,log_gdp_d,log_flow,log_distw,log_polity_dist
0,AUS-ARG,1960,AUS,ARG,18790.14,11693.45,6.38,12044.573996,10,-1,11,9.841087,9.366784,1.04843,9.396453,2.484907
1,AUT-ARG,1960,AUT,ARG,6592.694,11693.45,8.58,11751.146582,10,-1,11,8.793717,9.366784,1.147223,9.371791,2.484907
2,BEL-ARG,1960,BEL,ARG,11658.723,11693.45,31.1,11305.28588,10,-1,11,9.36381,9.366784,1.490025,9.333114,2.484907
3,BOL-ARG,1960,BOL,ARG,668.973,11693.45,4.6,1865.839405,-3,-1,2,6.505743,9.366784,0.926659,7.532002,1.098612
4,BRA-ARG,1960,BRA,ARG,15165.57,11693.45,62.15,2391.846403,6,-1,7,9.626783,9.366784,1.635018,7.780239,2.079442
5,CAN-ARG,1960,CAN,ARG,40525.668,11693.45,22.0,9391.460934,10,-1,11,10.609691,9.366784,1.4088,9.147663,2.484907
6,CHE-ARG,1960,CHE,ARG,9281.987,11693.45,23.87,11232.932834,10,-1,11,9.135831,9.366784,1.428545,9.326694,2.484907
7,CHL-ARG,1960,CHL,ARG,4211.762,11693.45,19.69,1156.725891,5,-1,6,8.345636,9.366784,1.38131,7.054213,1.94591
8,CUB-ARG,1960,CUB,ARG,2678.0,11693.45,0.4,6477.504793,-8,-1,7,7.892826,9.366784,-2.480406,8.776245,2.079442
9,DNK-ARG,1960,DNK,ARG,6248.1333,11693.45,4.73,12006.052565,10,-1,11,8.740038,9.366784,0.937631,9.393249,2.484907


In [3]:
data = data.set_index(['tradelink', 'year'])

In [4]:
model_basic = PanelOLS(data.log_flow, data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3698
Estimator:,PanelOLS,R-squared (Between):,-1.6784
No. Observations:,301499,R-squared (Within):,0.1838
Date:,"Wed, Feb 05 2020",R-squared (Overall):,-0.3593
Time:,13:25:34,Log-likelihood,-3.434e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,5.897e+04
Entities:,14323,P-value,0.0000
Avg Obs:,21.050,Distribution:,"F(3,301449)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2492,0.0023,110.16,0.0000,0.2448,0.2537
log_gdp_d,0.2237,0.0022,101.21,0.0000,0.2194,0.2280
log_distw,-0.2868,0.0052,-55.654,0.0000,-0.2969,-0.2767


In [5]:
model_modified = PanelOLS(data.log_flow, data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3699
Estimator:,PanelOLS,R-squared (Between):,-1.6522
No. Observations:,301499,R-squared (Within):,0.1836
Date:,"Wed, Feb 05 2020",R-squared (Overall):,-0.3441
Time:,13:25:35,Log-likelihood,-3.434e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,4.424e+04
Entities:,14323,P-value,0.0000
Avg Obs:,21.050,Distribution:,"F(4,301448)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2489,0.0023,109.65,0.0000,0.2444,0.2533
log_gdp_d,0.2233,0.0022,100.39,0.0000,0.2190,0.2277
log_distw,-0.2857,0.0052,-54.984,0.0000,-0.2959,-0.2755
log_polity_dist,-0.0070,0.0035,-1.9630,0.0496,-0.0139,-1.086e-05


In [6]:
post_89_data = data.query("year >= 1989")

In [7]:
model_basic = PanelOLS(post_89_data.log_flow, post_89_data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3877
Estimator:,PanelOLS,R-squared (Between):,-0.7922
No. Observations:,164061,R-squared (Within):,0.0537
Date:,"Wed, Feb 05 2020",R-squared (Overall):,-0.0874
Time:,13:25:36,Log-likelihood,-1.858e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,3.462e+04
Entities:,14323,P-value,0.0000
Avg Obs:,11.454,Distribution:,"F(3,164040)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2484,0.0024,104.22,0.0000,0.2437,0.2530
log_gdp_d,0.2157,0.0023,92.984,0.0000,0.2112,0.2203
log_distw,-0.3068,0.0056,-55.131,0.0000,-0.3177,-0.2959


In [8]:
model_modified = PanelOLS(post_89_data.log_flow, post_89_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3880
Estimator:,PanelOLS,R-squared (Between):,-0.7215
No. Observations:,164061,R-squared (Within):,0.0533
Date:,"Wed, Feb 05 2020",R-squared (Overall):,-0.0398
Time:,13:25:36,Log-likelihood,-1.858e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,2.6e+04
Entities:,14323,P-value,0.0000
Avg Obs:,11.454,Distribution:,"F(4,164039)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2471,0.0024,102.53,0.0000,0.2424,0.2519
log_gdp_d,0.2144,0.0024,91.204,0.0000,0.2098,0.2190
log_distw,-0.3043,0.0056,-54.476,0.0000,-0.3152,-0.2933
log_polity_dist,-0.0188,0.0042,-4.4814,0.0000,-0.0271,-0.0106


In [9]:
# World post-1989 exclude China
post_89_noChina_data = data.query("year >= 1989")
post_89_noChina_data = post_89_noChina_data[post_89_noChina_data["iso_o"] != ("CHN" or "USA")]
post_89_noChina_data = post_89_noChina_data[post_89_noChina_data["iso_d"] != ("CHN" or "USA")]

In [10]:
model_basic = PanelOLS(post_89_noChina_data.log_flow, post_89_noChina_data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3839
Estimator:,PanelOLS,R-squared (Between):,-0.7553
No. Observations:,159498,R-squared (Within):,0.0499
Date:,"Wed, Feb 05 2020",R-squared (Overall):,-0.0575
Time:,13:25:37,Log-likelihood,-1.817e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,3.312e+04
Entities:,14323,P-value,0.0000
Avg Obs:,11.136,Distribution:,"F(3,159477)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2471,0.0024,101.53,0.0000,0.2423,0.2518
log_gdp_d,0.2163,0.0024,91.087,0.0000,0.2117,0.2210
log_distw,-0.3105,0.0056,-55.299,0.0000,-0.3215,-0.2995


In [11]:
model_modified = PanelOLS(post_89_noChina_data.log_flow, post_89_noChina_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3842
Estimator:,PanelOLS,R-squared (Between):,-0.6725
No. Observations:,159498,R-squared (Within):,0.0495
Date:,"Wed, Feb 05 2020",R-squared (Overall):,-0.0021
Time:,13:25:38,Log-likelihood,-1.816e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,2.488e+04
Entities:,14323,P-value,0.0000
Avg Obs:,11.136,Distribution:,"F(4,159476)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2456,0.0025,99.428,0.0000,0.2407,0.2504
log_gdp_d,0.2148,0.0024,88.793,0.0000,0.2100,0.2195
log_distw,-0.3079,0.0056,-54.688,0.0000,-0.3190,-0.2969
log_polity_dist,-0.0203,0.0043,-4.6806,0.0000,-0.0288,-0.0118


In [12]:
# Dataset of former origin as former Soviet countries from 1960-2006
FORMER_SOVIET_COUNTRIES = {
    "Estonia": "EST",
    "Latvia": "LVA",
    "Lithuania": "LTU",
    "Kazakhstan": "KAZ",
    "Kyrgyzstan": "KGZ",
    "Tajikistan": "TJK",
    "Turkmenistan": "TKM",
    "Uzbekistan": "UZB",
    "Belarus": "BLR",
    "Moldova": "MDA",
    "Ukraine": "UKR",
    "Russia": "RUS",
    "Armenia": "ARM",
    "Azerbaijan": "AZE",
    "Georgia": "GEO"
}
post_soviet_data = data
post_soviet_data = post_soviet_data[post_soviet_data['iso_o'].isin(list(FORMER_SOVIET_COUNTRIES.values()))]
post_soviet_data.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,iso_o,iso_d,gdp_o,gdp_d,flow,distw,polity_o,polity_d,polity_dist,log_gdp_o,log_gdp_d,log_flow,log_distw,log_polity_dist
tradelink,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
BLR-ARG,1992,BLR,ARG,17022.18,228779.38,0.7964,12833.472873,7,7,0,9.742272,12.340513,-0.258322,9.45989,0.0
RUS-ARG,1992,RUS,ARG,460205.44,228779.38,51.48,14195.326754,5,7,2,13.039429,12.340513,1.597607,9.560739,1.098612
AZE-AUS,1992,AZE,AUS,4991.35,321708.28,3.29351,12862.659951,1,10,9,8.515462,12.6814,0.784793,9.462162,2.302585
BLR-AUS,1992,BLR,AUS,17022.18,321708.28,0.8844,14713.924563,7,10,3,9.742272,12.6814,-0.131073,9.596618,1.386294
RUS-AUS,1992,RUS,AUS,460205.44,321708.28,8.69,13424.896701,5,10,5,13.039429,12.6814,1.151259,9.504941,1.791759
AZE-AUT,1992,AZE,AUT,4991.35,194684.08,36.98838,2790.650554,1,10,9,8.515462,12.179133,1.528359,7.934388,2.302585
BLR-AUT,1992,BLR,AUT,17022.18,194684.08,121.6006,1101.093279,7,10,3,9.742272,12.179133,1.757986,7.004967,1.386294
EST-AUT,1992,EST,AUT,3986.6152,194684.08,2.482315,1421.983529,6,10,4,8.290698,12.179133,0.64668,7.260511,1.609438
GEO-AUT,1992,GEO,AUT,3691.1106,194684.08,1.502237,2333.705089,4,10,6,8.213683,12.179133,0.341428,7.755641,1.94591
KAZ-AUT,1992,KAZ,AUT,24906.94,194684.08,4.187183,4113.831491,-3,10,13,10.122902,12.179133,0.888726,8.322353,2.639057


In [13]:
model_basic = PanelOLS(post_soviet_data.log_flow, post_soviet_data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3259
Estimator:,PanelOLS,R-squared (Between):,0.5165
No. Observations:,12821,R-squared (Within):,0.0534
Date:,"Wed, Feb 05 2020",R-squared (Overall):,0.5925
Time:,13:25:38,Log-likelihood,-1.509e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,2063.6
Entities:,13798,P-value,0.0000
Avg Obs:,0.9292,Distribution:,"F(3,12803)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2645,0.0089,29.735,0.0000,0.2470,0.2819
log_gdp_d,0.1735,0.0069,25.307,0.0000,0.1601,0.1870
log_distw,-0.4773,0.0158,-30.274,0.0000,-0.5082,-0.4464


In [14]:
model_modified = PanelOLS(post_soviet_data.log_flow, post_soviet_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3263
Estimator:,PanelOLS,R-squared (Between):,0.5224
No. Observations:,12821,R-squared (Within):,0.0530
Date:,"Wed, Feb 05 2020",R-squared (Overall):,0.6014
Time:,13:25:38,Log-likelihood,-1.508e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,1549.9
Entities:,13798,P-value,0.0000
Avg Obs:,0.9292,Distribution:,"F(4,12802)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2657,0.0089,29.862,0.0000,0.2482,0.2831
log_gdp_d,0.1738,0.0069,25.364,0.0000,0.1604,0.1873
log_distw,-0.4810,0.0162,-29.605,0.0000,-0.5129,-0.4492
log_polity_dist,0.0199,0.0152,1.3113,0.1898,-0.0098,0.0496


In [15]:
post_soviet_89_data = post_soviet_data.query("year >= 1989")
post_soviet_89_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,iso_o,iso_d,gdp_o,gdp_d,flow,distw,polity_o,polity_d,polity_dist,log_gdp_o,log_gdp_d,log_flow,log_distw,log_polity_dist
tradelink,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
BLR-ARG,1992,BLR,ARG,17022.18,228779.38,0.7964,12833.472873,7,7,0,9.742272,12.340513,-0.258322,9.45989,0.0
RUS-ARG,1992,RUS,ARG,460205.44,228779.38,51.48,14195.326754,5,7,2,13.039429,12.340513,1.597607,9.560739,1.098612
AZE-AUS,1992,AZE,AUS,4991.35,321708.28,3.29351,12862.659951,1,10,9,8.515462,12.6814,0.784793,9.462162,2.302585
BLR-AUS,1992,BLR,AUS,17022.18,321708.28,0.8844,14713.924563,7,10,3,9.742272,12.6814,-0.131073,9.596618,1.386294
RUS-AUS,1992,RUS,AUS,460205.44,321708.28,8.69,13424.896701,5,10,5,13.039429,12.6814,1.151259,9.504941,1.791759


In [16]:
model_basic = PanelOLS(post_soviet_89_data.log_flow, post_soviet_89_data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3259
Estimator:,PanelOLS,R-squared (Between):,0.5165
No. Observations:,12821,R-squared (Within):,0.0534
Date:,"Wed, Feb 05 2020",R-squared (Overall):,0.5925
Time:,13:25:38,Log-likelihood,-1.509e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,2063.6
Entities:,13798,P-value,0.0000
Avg Obs:,0.9292,Distribution:,"F(3,12803)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2645,0.0089,29.735,0.0000,0.2470,0.2819
log_gdp_d,0.1735,0.0069,25.307,0.0000,0.1601,0.1870
log_distw,-0.4773,0.0158,-30.274,0.0000,-0.5082,-0.4464


In [17]:
model_modified = PanelOLS(post_soviet_89_data.log_flow, post_soviet_89_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3263
Estimator:,PanelOLS,R-squared (Between):,0.5224
No. Observations:,12821,R-squared (Within):,0.0530
Date:,"Wed, Feb 05 2020",R-squared (Overall):,0.6014
Time:,13:25:39,Log-likelihood,-1.508e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,1549.9
Entities:,13798,P-value,0.0000
Avg Obs:,0.9292,Distribution:,"F(4,12802)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2657,0.0089,29.862,0.0000,0.2482,0.2831
log_gdp_d,0.1738,0.0069,25.364,0.0000,0.1604,0.1873
log_distw,-0.4810,0.0162,-29.605,0.0000,-0.5129,-0.4492
log_polity_dist,0.0199,0.0152,1.3113,0.1898,-0.0098,0.0496


In [18]:
post_soviet_89_noChina_data = post_soviet_89_data[post_soviet_89_data["iso_d"] != "CHN"]

In [19]:
model_modified = PanelOLS(post_soviet_89_noChina_data.log_flow, post_soviet_89_noChina_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3254
Estimator:,PanelOLS,R-squared (Between):,0.5113
No. Observations:,12608,R-squared (Within):,0.0521
Date:,"Wed, Feb 05 2020",R-squared (Overall):,0.5893
Time:,13:25:39,Log-likelihood,-1.486e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,1518.0
Entities:,13798,P-value,0.0000
Avg Obs:,0.9138,Distribution:,"F(4,12589)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2653,0.0090,29.496,0.0000,0.2476,0.2829
log_gdp_d,0.1720,0.0069,24.813,0.0000,0.1584,0.1856
log_distw,-0.4817,0.0163,-29.524,0.0000,-0.5137,-0.4497
log_polity_dist,0.0217,0.0154,1.4103,0.1585,-0.0085,0.0518
