The following code is written in Python on [Google Collab](https://colab.research.google.com/drive/1qMEHTI8SMX9qTIdC0Tsnued3ALp-D5_t?usp=sharing) to create chloropleth and surprise maps in order to explore the relationship between GDP and energy consumption per capita. 

In [12]:
import pandas as pd
import plotly.graph_objects as go
from plotly.offline import iplot

In [13]:
gdp_df = pd.read_csv("https://raw.githubusercontent.com/lianahasan/CSC-474-Assignment-2/main/Data/gdp_per_capita.csv")
energy_df = pd.read_csv("https://raw.githubusercontent.com/lianahasan/CSC-474-Assignment-2/main/Data/energy_per_capita.csv")

In [14]:
# cleaning gdp data 
gdp_df['cleaning_countries'] = gdp_df['country'].str.split('(').str[0]
gdp_df['Country'] = gdp_df['cleaning_countries'].str.split('[').str[0]
gdp = gdp_df.drop(columns = ['id', 'country', 'my19', 'mypct', 'cleaning_countries'])

In [15]:
# cleaning energy consumption data 
energy_cleaning = energy_df.query('Year == 2019')
energy_cleaned = energy_cleaning.rename({'Entity': 'Country'}, axis='columns')
energy = energy_cleaned.drop(columns = ['Year', "Code"])

In [16]:
# combining data sets 
combined_df = pd.merge(gdp, energy, on='Country')
combined_df.head(80)

Unnamed: 0,gdppc,Country,Energy consumption per capita (kWh)
0,3.975,Algeria,16376.102
1,10.041,Argentina,21479.671
2,54.764,Australia,70644.033
3,49.701,Austria,46523.958
4,4.782,Azerbaijan,18127.566
...,...,...,...
71,41.855,United Kingdom,32249.735
72,65.134,United States,79897.151
73,1.756,Uzbekistan,15022.418
74,4.733,Venezuela,21691.926


In [17]:
# calculating expected value based on averages
expected = combined_df["gdppc"].astype(int).mean() / combined_df["Energy consumption per capita (kWh)"].astype(int).mean()
print(expected)

0.0005730577351214318


In [18]:
# calculating actual value 
actual = combined_df['gdppc'].astype(int) / combined_df['Energy consumption per capita (kWh)'].astype(int)

In [19]:
# chloropleth map with only gdp per capita 
fig_cases = go.Figure(data = go.Choropleth(locations = combined_df['Country'],
                                     z = (combined_df['gdppc'].astype(int)),
                                     locationmode = 'country names',
                                     colorscale = 'thermal',
                                     colorbar_title = "GDP"))

fig_cases.update_layout(title_text = 'GDP Per Capita',
                  geo = dict(showframe = False,
                           showcoastlines = False,
                           projection_type = 'equirectangular'))
iplot(fig_cases)

In [20]:
# chloropleth map with only energy consumption per capita 
fig_cases = go.Figure(data = go.Choropleth(locations = combined_df['Country'],
                                     z = (combined_df['Energy consumption per capita (kWh)'].astype(int)),
                                     locationmode = 'country names',
                                     colorscale = 'thermal',
                                     colorbar_title = "Energy"))

fig_cases.update_layout(title_text = 'Energy Consumption Per Capita',
                  geo = dict(showframe = False,
                           showcoastlines = False,
                           projection_type = 'equirectangular'))
iplot(fig_cases)

In [21]:
# chloropleth map for gdp and energy consumption per capita 
fig_cases = go.Figure(data = go.Choropleth(locations = combined_df['Country'],
                                     z = actual,
                                     locationmode = 'country names',
                                     colorscale = 'thermal',
                                     colorbar_title = "GDP/Energy"))

fig_cases.update_layout(title_text = 'GDP Per Capita In Relation To Energy Consumption',
                  geo = dict(showframe = False,
                           showcoastlines = True,
                           projection_type = 'equirectangular'))
iplot(fig_cases)

In [22]:
# Surprise Map: Surprise is calculated based off this formula (Expected - Actual) / Expected
fig_cases = go.Figure(data = go.Choropleth(locations = combined_df['Country'],
                                     z = (expected - actual)/expected,
                                     locationmode = 'country names',
                                     colorscale = 'thermal',
                                     colorbar_title = "Surprise"))

fig_cases.update_layout(title_text = 'Surprise Map',
                  geo = dict(showframe = False,
                           showcoastlines = False,
                           projection_type = 'equirectangular'))
iplot(fig_cases)