## We require four subsets for the regression analysis
- All data
- Countries with above average trade with UK
- OECD countries
- EU countries

In [87]:
import pandas as pd

df = pd.read_csv('cleaned.csv')

In [12]:
df1 = df.copy()

In [13]:
#We define a descending list of average trade with UK and use it to get subset 2
import math

tradeflows = df[df['iso3_d'] == 'GBR'].groupby('iso3_o').mean()['tradeflow'].sort_values(ascending=False)
above_average = list(tradeflows[0:math.floor(len(tradeflows)/2)].index)

df2 = df[(df['iso3_d'] == 'GBR') & df['iso3_o'].isin(above_average)].copy()

In [14]:
#Creating subset 3, using a list of all OECD countries (with iso codes)
OECD = ['AUS', 'AUT', 'BEL', 'CAN','CHE', 'CHL', 'COL', 'CRI', 'CZE', 'DEU', 'DNK', 'ESP', 'EST', 'FIN', 'FRA', 'GBR', 'GRC', 'HUN', 'IRL', 'ISL', 'ISR', 'ITA', 'JPN', 'KOR', 'LTU', 'LUX', 'LVA', 'MEX', 'NLD', 'NOR', 'NZL', 'POL', 'PRT', 'SVK', 'SVN', 'SWE', 'TUR', 'USA']

df3 = df[df['iso3_o'].isin(OECD) & df['iso3_d'].isin(OECD)].copy()

In [15]:
#Creating subset 4
df4 = df[df['both_eu'] == 1].copy()

## De-meaning for fixed effects regression
- In the fixed effects model, we include importer and exporter dummies for each time period
- There will be approximately 20,000 dummies if estimated directly, so use double de-meaning instead
- We de-mean in the exporter (origin) dimension first, and then the importer (destination) dimension

In [110]:
# Select all columns containing bilateral regressors
cols = ['year', 'iso3_o', 'iso3_d', 'distw', 'comlang_off', 'comlang_ethno', 'comcol', 'col45', 'col_dep_ever', 'sibling_ever',
       'rta', 'rta_coverage', 'rta_type', 'tradeflow', 'both_eu', 'one_eu']
regressors = list(set(cols)-set(['year', 'iso3_o','iso3_d']))

df_fe = pd.read_csv('cleaned_fe.csv')[cols]

o_means = df_fe.groupby(['iso3_o', 'year']).mean()[regressors]
o_means = o_means.reset_index()
o_means = pd.merge(df_fe[['year', 'iso3_o']],o_means,how='left',on=['year', 'iso3_o'])

In [111]:
demeaned_o = df_fe.copy()
demeaned_o[regressors] = df_fe[regressors]-o_means[regressors]

In [112]:
d_means = demeaned_o.groupby(['iso3_d', 'year']).mean()[regressors]
d_means = d_means.reset_index()
d_means = pd.merge(demeaned_o[['year','iso3_d']],d_means,how='left',on=['year','iso3_d'])

In [113]:
demeaned_o_d = demeaned_o.copy()
demeaned_o_d[regressors] = demeaned_o[regressors]-d_means[regressors]