In [None]:
import numpy as np
import pandas as pd

In [None]:
trade_df = pd.read_stata("tradeflows.dta")
trade_df.head()

In [None]:
trade_df = trade_df.loc[:,['year','iso_o','iso_d','gdp_o','gdp_d','flow', 'distw']]
trade_df.head()

In [None]:
polity_scores_df = pd.read_excel("polity.xls")
polity_scores_df.head()

In [None]:
same_countries = sorted(list(set(trade_df['iso_o'].unique()) & set(polity_scores_df['scode'].unique())))
print(same_countries)
print("Polity - Same =", set(polity_scores_df['scode'].unique()).difference(set(same_countries)))

In [None]:
polity_scores_df = polity_scores_df.loc[:,['scode', 'country', 'year', 'polity']]
polity_scores_df.head()

In [None]:
# Remove all rows prior to 1950
trade_df = trade_df[trade_df['year'] >= 1950]
polity_scores_df = polity_scores_df[polity_scores_df['year'] >= 1950]
trade_df.shape

In [None]:
# Remove all rows with iso_o not in same countries
trade_df = trade_df[trade_df['iso_o'].isin(same_countries)]
# Remove all rows with iso_d not in same countries
trade_df = trade_df[trade_df['iso_d'].isin(same_countries)]
trade_df.reset_index(drop=True, inplace=True)

In [None]:
# Create new column with trade link (iso_d + iso_d)
trade_df["tradelink"] = trade_df.apply(lambda row: row['iso_o'] + '-' + row['iso_d'], axis=1)
trade_df.reset_index(drop=True, inplace=True)

In [None]:
trade_df.describe()

In [None]:
trade_df = pd.merge(left=trade_df, right=polity_scores_df, left_on=['year','iso_o'], right_on=['year','scode']).drop(columns=['country', 'scode'])
trade_df = trade_df.rename(columns={'polity':'polity_o'})
trade_df = pd.merge(left=trade_df, right=polity_scores_df, left_on=['year','iso_d'], right_on=['year','scode']).drop(columns=['country', 'scode'])
trade_df = trade_df.rename(columns={'polity':'polity_d'})
trade_df.head()

In [None]:
# Append difference in scores column to trade_df
trade_df['polity_dist'] = abs(trade_df['polity_o'] - trade_df['polity_d'])
trade_df.head(20)

In [None]:
import math

In [None]:
trade_df['log_gdp_o'] = np.log(trade_df.gdp_o)
trade_df['log_gdp_d'] = np.log(trade_df.gdp_d)
trade_df['log_flow'] = np.log(trade_df.flow)
trade_df['log_flow'] = trade_df['log_flow'].apply(lambda x: np.log(1+x))
trade_df['log_distw'] = trade_df['distw'].apply(lambda x: np.log(1+x))
trade_df['log_polity_dist'] = trade_df['polity_dist'].apply(lambda x: np.log(int(1+x)))
trade_df.head(10)

In [None]:
trade_df = trade_df.set_index(['tradelink', 'year'])

In [None]:
trade_df = trade_df.dropna()

In [None]:
trade_df.to_csv("trade_polity.csv")