# Gravity Model with PolityV Scores
---

### Basic Specification:
$$ y_{ijt} = \alpha + \beta_1 GDP_i + \beta_2 GDP_j + \beta_3 Dist_{ij} + \theta_t + \epsilon_{ijt} $$

### Modified Specification:
$$ y_{ijt} = \alpha + \beta_1 GDP_i + \beta_2 GDP_j + \beta_3 Dist_{ij} + \beta_4 PolityDist_{ijt} + \theta_t + \epsilon_{ijt} $$

In [1]:
import numpy as np
import math
import pandas as pd
from linearmodels import PanelOLS

## 1. Prepare data
---

In [2]:
data = pd.read_csv("../data/trade-polity.csv", index_col=0)
data

Unnamed: 0,year,iso_o,iso_d,contig,comlang_off,distw,pop_o,gdp_o,gdpcap_o,iso2_o,...,gsp_rec,flow,validmirror,family,country_o,country_d,tradelink,polity_o,polity_d,polity_dist
0,1990,ARG,AGO,0,0,7977.915823,32.580860,141352.3800,4338.51000,AR,...,0,24.747800,1,,Argentina,Angola,ARG-AGO,7.0,-7.0,14.0
1,1990,AUS,AGO,0,0,12954.905818,17.065100,319264.7000,18708.63300,AU,...,1,1.296240,1,,Australia,Angola,AUS-AGO,10.0,-7.0,17.0
2,1990,AUT,AGO,0,0,6432.058253,7.710882,164984.0300,21396.26000,AT,...,1,2.999282,1,,Austria,Angola,AUT-AGO,10.0,-7.0,17.0
3,1990,BDI,AGO,0,0,1905.419703,5.670250,1132.1012,199.65631,BI,...,0,0.000000,1,,Burundi,Angola,BDI-AGO,-6.0,-7.0,1.0
4,1990,BEL,AGO,0,0,6817.662686,9.967400,202690.9700,20335.39000,BE,...,1,76.786900,1,,Belgium,Angola,BEL-AGO,10.0,-7.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
334437,2002,TWN,LUX,0,0,9609.759792,22.400000,294803.0000,13160.84900,TW,...,0,637.017600,0,,Taiwan,Luxembourg,TWN-LUX,9.0,10.0,1.0
334438,2003,TWN,LUX,0,0,9609.759792,22.490000,299785.0000,13329.70200,TW,...,0,775.699000,0,,Taiwan,Luxembourg,TWN-LUX,9.0,10.0,1.0
334439,2004,TWN,LUX,0,0,9609.759792,22.580000,322179.0000,14268.33500,TW,...,0,485.395600,0,,Taiwan,Luxembourg,TWN-LUX,10.0,10.0,0.0
334440,2005,TWN,LUX,0,0,9609.759792,22.650000,345862.0000,15269.84600,TW,...,0,248.613600,0,,Taiwan,Luxembourg,TWN-LUX,10.0,10.0,0.0


In [3]:
data.columns

Index(['year', 'iso_o', 'iso_d', 'contig', 'comlang_off', 'distw', 'pop_o',
       'gdp_o', 'gdpcap_o', 'iso2_o', 'pop_d', 'gdp_d', 'gdpcap_d', 'iso2_d',
       'heg_d', 'conflict', 'indepdate', 'heg_o', 'col_to', 'col_fr',
       'col_hist', 'col_cur', 'sever', 'sib_conflict', 'gatt_o', 'gatt_d',
       'rta', 'comleg', 'comcur', 'acp_to_eu', 'gsp', 'eu_to_acp', 'gsp_rec',
       'flow', 'validmirror', 'family', 'country_o', 'country_d', 'tradelink',
       'polity_o', 'polity_d', 'polity_dist'],
      dtype='object')

In [4]:
# Drop NaN in polity_dist column
data = data.dropna(subset=['polity_dist'])
data.shape

(325396, 42)

In [5]:
data.loc[:,'log_gdp_o'] = np.log(data.gdp_o)
data.loc[:,'log_gdp_d'] = np.log(data.gdp_d)
data.loc[:,'log_flow'] = np.log(data.flow)
data.loc[:,'log_flow'] = data.loc[:,'log_flow'].apply(lambda x: np.log(1+x))
data.loc[:,'log_distw'] = data.loc[:,'distw'].apply(lambda x: np.log(1+x))
data.loc[:,'log_polity_dist'] = data.loc[:,'polity_dist'].apply(lambda x: np.log(int(1+x)))
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,year,iso_o,iso_d,contig,comlang_off,distw,pop_o,gdp_o,gdpcap_o,iso2_o,...,country_d,tradelink,polity_o,polity_d,polity_dist,log_gdp_o,log_gdp_d,log_flow,log_distw,log_polity_dist
0,1990,ARG,AGO,0,0,7977.915823,32.580860,141352.3800,4338.51000,AR,...,Angola,ARG-AGO,7.0,-7.0,14.0,11.859011,9.236027,1.437163,8.984558,2.708050
1,1990,AUS,AGO,0,0,12954.905818,17.065100,319264.7000,18708.63300,AU,...,Angola,AUS-AGO,10.0,-7.0,17.0,12.673776,9.236027,0.230689,9.469307,2.890372
2,1990,AUT,AGO,0,0,6432.058253,7.710882,164984.0300,21396.26000,AT,...,Angola,AUT-AGO,10.0,-7.0,17.0,12.013604,9.236027,0.741162,8.769205,2.890372
3,1990,BDI,AGO,0,0,1905.419703,5.670250,1132.1012,199.65631,BI,...,Angola,BDI-AGO,-6.0,-7.0,1.0,7.031831,9.236027,,7.552982,0.693147
4,1990,BEL,AGO,0,0,6817.662686,9.967400,202690.9700,20335.39000,BE,...,Angola,BEL-AGO,10.0,-7.0,17.0,12.219438,9.236027,1.675419,8.827419,2.890372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
334437,2002,TWN,LUX,0,0,9609.759792,22.400000,294803.0000,13160.84900,TW,...,Luxembourg,TWN-LUX,9.0,10.0,1.0,12.594063,10.026265,2.009126,9.170639,0.693147
334438,2003,TWN,LUX,0,0,9609.759792,22.490000,299785.0000,13329.70200,TW,...,Luxembourg,TWN-LUX,9.0,10.0,1.0,12.610821,10.274654,2.035198,9.170639,0.693147
334439,2004,TWN,LUX,0,0,9609.759792,22.580000,322179.0000,14268.33500,TW,...,Luxembourg,TWN-LUX,10.0,10.0,0.0,12.682863,10.422069,1.971991,9.170639,0.000000
334440,2005,TWN,LUX,0,0,9609.759792,22.650000,345862.0000,15269.84600,TW,...,Luxembourg,TWN-LUX,10.0,10.0,0.0,12.753795,10.504210,1.874245,9.170639,0.000000


In [6]:
# Set entity-time multi index

data = data.set_index(['tradelink', 'year'])

In [7]:
# Drop rows with NaN in ['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_flow']

data = data.dropna(subset=['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_flow'])

In [8]:
# Remaining countries after dropping NaN
print('Number of countries remaining', len(data['country_o'].unique()))
data['country_o'].unique()

Number of countries remaining 156


array(['Argentina', 'Australia', 'Austria', 'Belgium', 'Brazil', 'Canada',
       'Switzerland', 'China', 'Germany', 'Denmark', 'Spain', 'Finland',
       'France', 'United Kingdom', 'Greece', 'Hungary', 'Indonesia',
       'India', 'Ireland', 'Italy', 'Japan', 'Kenya', 'Korea South',
       'Mexico', 'Malaysia', 'Netherlands', 'Norway', 'Philippines',
       'Poland', 'Portugal', 'Sweden', 'Thailand', 'Tunisia', 'Turkey',
       'Taiwan', 'Uruguay', 'United States', 'Zambia', 'Zimbabwe',
       'Bangladesh', 'Bahrain', 'Chile', 'Cyprus', 'Egypt', 'Iran',
       'Iraq', 'Jordan', 'Libya', 'Sri Lanka', 'Morocco', 'Mozambique',
       'New Zealand', 'Pakistan', 'Romania', 'Saudi Arabia', 'Singapore',
       'Somalia', 'Tanzania', 'Yemen', 'Oman', 'Qatar', 'Syria',
       'Bulgaria', 'Cameroon', 'Colombia', 'Costa Rica', 'Cuba',
       'Algeria', 'Guatemala', 'Guyana', 'Israel', 'Jamaica', 'Mauritius',
       'Malawi', 'Peru', 'Papua New Guinea', 'El Salvador', 'Venezuela',
       'Vietna

In [9]:
# Remaining countries number of tradelinks after dropping NaN
data['country_o'].value_counts

<bound method IndexOpsMixin.value_counts of tradelink  year
ARG-AGO    1990    Argentina
AUS-AGO    1990    Australia
AUT-AGO    1990      Austria
BEL-AGO    1990      Belgium
BRA-AGO    1990       Brazil
                     ...    
TWN-LUX    2001       Taiwan
           2002       Taiwan
           2003       Taiwan
           2004       Taiwan
           2005       Taiwan
Name: country_o, Length: 171920, dtype: object>

## 2. Results
---

In [10]:
model_basic = PanelOLS(data.log_flow, data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

  if is_categorical(s):


0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3985
Estimator:,PanelOLS,R-squared (Between):,-0.9731
No. Observations:,171920,R-squared (Within):,0.0511
Date:,"Thu, Nov 26 2020",R-squared (Overall):,-0.2312
Time:,00:54:12,Log-likelihood,-1.916e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,3.797e+04
Entities:,21360,P-value,0.0000
Avg Obs:,8.0487,Distribution:,"F(3,171900)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2520,0.0023,110.11,0.0000,0.2475,0.2565
log_gdp_d,0.2190,0.0022,98.940,0.0000,0.2146,0.2233
log_distw,-0.2993,0.0053,-56.755,0.0000,-0.3097,-0.2890


In [11]:
model_modified = PanelOLS(data.log_flow, data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3987
Estimator:,PanelOLS,R-squared (Between):,-0.9137
No. Observations:,171920,R-squared (Within):,0.0510
Date:,"Thu, Nov 26 2020",R-squared (Overall):,-0.1903
Time:,00:54:13,Log-likelihood,-1.916e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,2.85e+04
Entities:,21360,P-value,0.0000
Avg Obs:,8.0487,Distribution:,"F(4,171899)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2510,0.0023,108.47,0.0000,0.2465,0.2556
log_gdp_d,0.2179,0.0022,97.139,0.0000,0.2135,0.2223
log_distw,-0.2974,0.0053,-56.080,0.0000,-0.3078,-0.2870
log_polity_dist,-0.0147,0.0041,-3.6169,0.0003,-0.0227,-0.0067


In [12]:
model_modified_control = PanelOLS(data.log_flow, data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist','col_hist']], time_effects=True)
result_modified_control = model_modified_control.fit(cov_type='clustered', cluster_entity=True)
result_modified_control.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.4013
Estimator:,PanelOLS,R-squared (Between):,-0.9070
No. Observations:,171920,R-squared (Within):,0.0511
Date:,"Thu, Nov 26 2020",R-squared (Overall):,-0.1864
Time:,00:54:14,Log-likelihood,-1.913e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,2.304e+04
Entities:,21360,P-value,0.0000
Avg Obs:,8.0487,Distribution:,"F(5,171898)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2492,0.0023,108.22,0.0000,0.2447,0.2537
log_gdp_d,0.2160,0.0022,96.709,0.0000,0.2116,0.2204
log_distw,-0.2936,0.0053,-55.150,0.0000,-0.3040,-0.2831
log_polity_dist,-0.0157,0.0041,-3.8862,0.0001,-0.0237,-0.0078
col_hist,0.2908,0.0224,12.955,0.0000,0.2468,0.3348


In [13]:
post_89_data = data.query("year >= 1989")

In [14]:
model_basic = PanelOLS(post_89_data.log_flow, post_89_data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

  if is_categorical(s):


0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3985
Estimator:,PanelOLS,R-squared (Between):,-0.9731
No. Observations:,171920,R-squared (Within):,0.0511
Date:,"Thu, Nov 26 2020",R-squared (Overall):,-0.2312
Time:,00:54:15,Log-likelihood,-1.916e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,3.797e+04
Entities:,21360,P-value,0.0000
Avg Obs:,8.0487,Distribution:,"F(3,171900)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2520,0.0023,110.11,0.0000,0.2475,0.2565
log_gdp_d,0.2190,0.0022,98.940,0.0000,0.2146,0.2233
log_distw,-0.2993,0.0053,-56.755,0.0000,-0.3097,-0.2890


In [15]:
model_modified = PanelOLS(post_89_data.log_flow, post_89_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3987
Estimator:,PanelOLS,R-squared (Between):,-0.9137
No. Observations:,171920,R-squared (Within):,0.0510
Date:,"Thu, Nov 26 2020",R-squared (Overall):,-0.1903
Time:,00:54:16,Log-likelihood,-1.916e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,2.85e+04
Entities:,21360,P-value,0.0000
Avg Obs:,8.0487,Distribution:,"F(4,171899)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2510,0.0023,108.47,0.0000,0.2465,0.2556
log_gdp_d,0.2179,0.0022,97.139,0.0000,0.2135,0.2223
log_distw,-0.2974,0.0053,-56.080,0.0000,-0.3078,-0.2870
log_polity_dist,-0.0147,0.0041,-3.6169,0.0003,-0.0227,-0.0067


In [16]:
# World post-1989 exclude China
post_89_noChina_data = data.query("year >= 1989")
post_89_noChina_data = post_89_noChina_data[post_89_noChina_data["iso_o"] != ("CHN" or "USA")]
post_89_noChina_data = post_89_noChina_data[post_89_noChina_data["iso_d"] != ("CHN" or "USA")]

In [17]:
model_basic = PanelOLS(post_89_noChina_data.log_flow, post_89_noChina_data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

  if is_categorical(s):


0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3945
Estimator:,PanelOLS,R-squared (Between):,-0.9508
No. Observations:,167434,R-squared (Within):,0.0479
Date:,"Thu, Nov 26 2020",R-squared (Overall):,-0.2102
Time:,00:54:18,Log-likelihood,-1.877e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,3.637e+04
Entities:,21360,P-value,0.0000
Avg Obs:,7.8387,Distribution:,"F(3,167414)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2511,0.0023,107.36,0.0000,0.2465,0.2557
log_gdp_d,0.2199,0.0023,97.079,0.0000,0.2154,0.2243
log_distw,-0.3028,0.0053,-56.865,0.0000,-0.3132,-0.2924


In [18]:
model_modified = PanelOLS(post_89_noChina_data.log_flow, post_89_noChina_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3948
Estimator:,PanelOLS,R-squared (Between):,-0.8839
No. Observations:,167434,R-squared (Within):,0.0478
Date:,"Thu, Nov 26 2020",R-squared (Overall):,-0.1643
Time:,00:54:19,Log-likelihood,-1.877e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,2.73e+04
Entities:,21360,P-value,0.0000
Avg Obs:,7.8387,Distribution:,"F(4,167413)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2499,0.0024,105.36,0.0000,0.2453,0.2546
log_gdp_d,0.2187,0.0023,94.818,0.0000,0.2142,0.2232
log_distw,-0.3009,0.0053,-56.249,0.0000,-0.3114,-0.2904
log_polity_dist,-0.0152,0.0042,-3.6204,0.0003,-0.0234,-0.0070


In [19]:
# Dataset of former Soviet countries as the origin from 1960-2006
FORMER_SOVIET_COUNTRIES = {
    "Estonia": "EST",
    "Latvia": "LVA",
    "Lithuania": "LTU",
    "Kazakhstan": "KAZ",
    "Kyrgyzstan": "KGZ",
    "Tajikistan": "TJK",
    "Turkmenistan": "TKM",
    "Uzbekistan": "UZB",
    "Belarus": "BLR",
    "Moldova": "MDA",
    "Ukraine": "UKR",
    "Russia": "RUS",
    "Armenia": "ARM",
    "Azerbaijan": "AZE",
    "Georgia": "GEO"
}
post_soviet_data = data
post_soviet_data = post_soviet_data[post_soviet_data['iso_o'].isin(list(FORMER_SOVIET_COUNTRIES.values()))]
post_soviet_data.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,iso_o,iso_d,contig,comlang_off,distw,pop_o,gdp_o,gdpcap_o,iso2_o,pop_d,...,country_o,country_d,polity_o,polity_d,polity_dist,log_gdp_o,log_gdp_d,log_flow,log_distw,log_polity_dist
tradelink,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
RUS-AGO,1992,RUS,AGO,0,0,8103.861455,148.689,460205.44,3095.0874,RU,11.203767,...,Russia,Angola,5.0,0.0,5.0,13.039428,8.662054,1.470073,9.000219,1.791759
RUS-ARE,1992,RUS,ARE,0,0,3721.569046,148.689,460205.44,3095.0874,RU,1.983,...,Russia,UAE,5.0,-8.0,13.0,13.039428,10.474826,1.761459,8.222169,2.639057
AZE-AUS,1992,AZE,AUS,0,0,12862.659951,7.382,4991.35,676.1515,AZ,17.495,...,Azerbaijan,Australia,1.0,10.0,9.0,8.515462,12.6814,0.784793,9.462162,2.302585
RUS-AUS,1992,RUS,AUS,0,0,13424.896701,148.689,460205.44,3095.0874,RU,17.495,...,Russia,Australia,5.0,10.0,5.0,13.039428,12.6814,1.151259,9.504941,1.791759
BLR-AUS,1992,BLR,AUS,0,0,14713.924563,10.216,17022.18,1666.2275,BY,17.495,...,Belarus,Australia,6.0,10.0,4.0,9.742272,12.6814,-0.131073,9.596618,1.609438
AZE-AUT,1992,AZE,AUT,0,0,2790.650554,7.382,4991.35,676.1515,AZ,7.782519,...,Azerbaijan,Austria,1.0,10.0,9.0,8.515462,12.179133,1.528359,7.934388,2.302585
EST-AUT,1992,EST,AUT,0,0,1421.983529,1.533,3986.6152,2600.5317,EE,7.782519,...,Estonia,Austria,6.0,10.0,4.0,8.290698,12.179133,0.64668,7.260511,1.609438
MDA-AUT,1992,MDA,AUT,0,0,1026.710341,4.410836,2319.2434,525.80585,MD,7.782519,...,Moldova,Austria,5.0,10.0,5.0,7.748996,12.179133,0.09526,6.935089,1.791759
RUS-AUT,1992,RUS,AUT,0,0,2527.122805,148.689,460205.44,3095.0874,RU,7.782519,...,Russia,Austria,5.0,10.0,5.0,13.039428,12.179133,2.025908,7.835232,1.791759
BLR-AUT,1992,BLR,AUT,0,0,1101.093279,10.216,17022.18,1666.2275,BY,7.782519,...,Belarus,Austria,6.0,10.0,4.0,9.742272,12.179133,1.757986,7.004967,1.609438


In [20]:
model_basic = PanelOLS(post_soviet_data.log_flow, post_soviet_data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

  if is_categorical(s):


0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3284
Estimator:,PanelOLS,R-squared (Between):,0.5470
No. Observations:,13576,R-squared (Within):,0.0548
Date:,"Thu, Nov 26 2020",R-squared (Overall):,0.6259
Time:,00:54:19,Log-likelihood,-1.581e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,2209.5
Entities:,20464,P-value,0.0000
Avg Obs:,0.6634,Distribution:,"F(3,13558)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2684,0.0085,31.441,0.0000,0.2517,0.2852
log_gdp_d,0.1711,0.0067,25.368,0.0000,0.1579,0.1844
log_distw,-0.4687,0.0154,-30.368,0.0000,-0.4989,-0.4384


In [21]:
model_modified = PanelOLS(post_soviet_data.log_flow, post_soviet_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3287
Estimator:,PanelOLS,R-squared (Between):,0.5508
No. Observations:,13576,R-squared (Within):,0.0542
Date:,"Thu, Nov 26 2020",R-squared (Overall):,0.6326
Time:,00:54:20,Log-likelihood,-1.58e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,1659.2
Entities:,20464,P-value,0.0000
Avg Obs:,0.6634,Distribution:,"F(4,13557)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2696,0.0085,31.550,0.0000,0.2529,0.2864
log_gdp_d,0.1713,0.0067,25.401,0.0000,0.1581,0.1846
log_distw,-0.4721,0.0158,-29.936,0.0000,-0.5030,-0.4412
log_polity_dist,0.0185,0.0146,1.2646,0.2060,-0.0102,0.0471


In [22]:
post_soviet_89_data = post_soviet_data.query("year >= 1989")
post_soviet_89_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,iso_o,iso_d,contig,comlang_off,distw,pop_o,gdp_o,gdpcap_o,iso2_o,pop_d,...,country_o,country_d,polity_o,polity_d,polity_dist,log_gdp_o,log_gdp_d,log_flow,log_distw,log_polity_dist
tradelink,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
RUS-AGO,1992,RUS,AGO,0,0,8103.861455,148.689,460205.44,3095.0874,RU,11.203767,...,Russia,Angola,5.0,0.0,5.0,13.039428,8.662054,1.470073,9.000219,1.791759
RUS-ARE,1992,RUS,ARE,0,0,3721.569046,148.689,460205.44,3095.0874,RU,1.983,...,Russia,UAE,5.0,-8.0,13.0,13.039428,10.474826,1.761459,8.222169,2.639057
AZE-AUS,1992,AZE,AUS,0,0,12862.659951,7.382,4991.35,676.1515,AZ,17.495,...,Azerbaijan,Australia,1.0,10.0,9.0,8.515462,12.6814,0.784793,9.462162,2.302585
RUS-AUS,1992,RUS,AUS,0,0,13424.896701,148.689,460205.44,3095.0874,RU,17.495,...,Russia,Australia,5.0,10.0,5.0,13.039428,12.6814,1.151259,9.504941,1.791759
BLR-AUS,1992,BLR,AUS,0,0,14713.924563,10.216,17022.18,1666.2275,BY,17.495,...,Belarus,Australia,6.0,10.0,4.0,9.742272,12.6814,-0.131073,9.596618,1.609438


In [23]:
model_basic = PanelOLS(post_soviet_89_data.log_flow, post_soviet_89_data[['log_gdp_o', 'log_gdp_d', 'log_distw']], time_effects=True)
result_basic = model_basic.fit(cov_type='clustered', cluster_entity=True)
result_basic.summary

  if is_categorical(s):


0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3284
Estimator:,PanelOLS,R-squared (Between):,0.5470
No. Observations:,13576,R-squared (Within):,0.0548
Date:,"Thu, Nov 26 2020",R-squared (Overall):,0.6259
Time:,00:54:20,Log-likelihood,-1.581e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,2209.5
Entities:,20464,P-value,0.0000
Avg Obs:,0.6634,Distribution:,"F(3,13558)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2684,0.0085,31.441,0.0000,0.2517,0.2852
log_gdp_d,0.1711,0.0067,25.368,0.0000,0.1579,0.1844
log_distw,-0.4687,0.0154,-30.368,0.0000,-0.4989,-0.4384


In [24]:
model_modified = PanelOLS(post_soviet_89_data.log_flow, post_soviet_89_data[['log_gdp_o', 'log_gdp_d', 'log_distw', 'log_polity_dist']], time_effects=True)
result_modified = model_modified.fit(cov_type='clustered', cluster_entity=True)
result_modified.summary

0,1,2,3
Dep. Variable:,log_flow,R-squared:,0.3287
Estimator:,PanelOLS,R-squared (Between):,0.5508
No. Observations:,13576,R-squared (Within):,0.0542
Date:,"Thu, Nov 26 2020",R-squared (Overall):,0.6326
Time:,00:54:20,Log-likelihood,-1.58e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,1659.2
Entities:,20464,P-value,0.0000
Avg Obs:,0.6634,Distribution:,"F(4,13557)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
log_gdp_o,0.2696,0.0085,31.550,0.0000,0.2529,0.2864
log_gdp_d,0.1713,0.0067,25.401,0.0000,0.1581,0.1846
log_distw,-0.4721,0.0158,-29.936,0.0000,-0.5030,-0.4412
log_polity_dist,0.0185,0.0146,1.2646,0.2060,-0.0102,0.0471
