## Visualising residuals
- We aim to visualise residuals from different regression specifications to highlight the types of errors
- The UK is chosen to be the reference country

In [1]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns; sns.set(style="ticks", color_codes=True)
import geopandas as gpd
from geopandas import GeoDataFrame

In [2]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

In [31]:
test = pd.DataFrame([[2010,'GBR','GBR',124],[2008,'GBR','ITA',1000],[2009,'GBR','ITA',9000],[2008,'ITA','GBR',1000]], columns=['year', 'iso3_d', 'iso3_o', 'prediction'])
test

Unnamed: 0,year,iso3_d,iso3_o,prediction
0,2010,GBR,GBR,124
1,2008,GBR,ITA,1000
2,2009,GBR,ITA,9000
3,2008,ITA,GBR,1000


## Defining the visualisation function
- The ```animate()``` function takes the name of a reference country and a dataframe containing a column named ```prediction```
- It then uses plotly to generate a choropleth map animated over time

In [42]:
data = pd.read_csv('cleaned.csv')

#Takes the ISO code of country and a dataframe containing predicted values under a column named 'prediction'
def animate(country, predicted):
    actual = data[data['iso3_d'] == country][['year','iso3_o','tradeflow']]
    predictions = predicted[predicted['iso3_d'] == country][['year', 'iso3_o', 'prediction']]
    
    merged = pd.merge(predictions,actual,on=['year','iso3_o'],how='left')
    merged = merged.drop(merged[merged['tradeflow'].isna()].index)
    merged['error'] = merged['prediction']-np.log(merged['tradeflow'])
    fig = px.choropleth_mapbox(merged,
        geojson=world,
        animation_frame='year',
        featureidkey='properties.iso_a3',
        locations='iso3_o',
        center={'lat':50, 'lon':0},
        color='error',
        range_color=[merged['error'].min(), merged['error'].max()],
        width=950,
        height=600,
        color_continuous_scale=['darkred', 'lime'],
        mapbox_style='carto-positron',
        zoom=2)
    fig.show()


In [37]:
df = data.copy()
import pandas as pd
import statsmodels.api as sm
import numpy as np


spec1_df1 = df.copy()
X = np.log(df1[['gdp_o','gdp_d','distw']])
# X = pd.DataFrame(np.log(df1['gdp_o']), np.log(df1['gdp_d']), np.log(df1['distw']))
Y = np.log(df1['tradeflow'])

X = sm.add_constant(X)

model = sm.OLS(Y, X)

results = model.fit()

predicted_values = results.fittedvalues

# print(predicted_values)

spec1_df1['prediction'] = predicted_values
