In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas
import os
import pandas_datareader.data as web
import us
import datetime
from pandas_datareader import wb
from ipywidgets import interact, interact_manual
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [2]:
from bokeh.io import output_notebook
output_notebook()

In [3]:
# global variables of path and shapefile
PATH = r'D:\uchi\2021Fall\PPHA30536_Data and Programming for Public Policy II\homework-4-luotianling327'
NE_SHP = os.path.join(PATH, 'ne_50m_admin_1_states_provinces', 'ne_50m_admin_1_states_provinces.shp')

In [4]:
# function to load geo-information on continent level
def continent_geo(world, continent_name, countrycode):
    continent = world[world['continent'] == continent_name]
    continent = continent.merge(countrycode[['name','iso2c','iso3c']], left_on='iso_a3', 
                                right_on='iso3c', how='left')
    return continent

In [5]:
# add the variable that we are interested in to the continent geopandas dataframe
def continent_variable(continent, indicator, country_abbr, start, end, variable):
    conti_variable = wb.download(indicator=indicator, country=country_abbr, start=start, end=end)
    conti_variable = conti_variable.unstack()
    conti_variable.columns = [start,end]
    conti_variable.reset_index(inplace=True)
    conti_variable[variable+'_growth'] = (conti_variable[end] - conti_variable[start]) / conti_variable[start]
    continent = continent.merge(conti_variable[['country',variable+'_growth']], left_on='name_y', 
                                right_on='country', how='left')
    return continent

In [6]:
# function to load geo-information on country level
def country_geo(country_name):
    ne  = geopandas.read_file(NE_SHP)
    country = ne[ne['iso_a2'] == country_name]
    return country

In [7]:
# add the variable that we are interested in to the country geopandas dataframe
def country_variable(country, country_name, indicator, start_country, end_country, variable):
    coun_variable = web.DataReader(indicator, 'fred', start_country, end_country)
    coun_variable.columns = [col[:2] for col in coun_variable]
    coun_variable = coun_variable.T
    coun_variable = coun_variable.reset_index().rename(columns={'index':'state'})
    coun_variable['state'] = coun_variable['state'].map(lambda state: country_name+'-'+state)
    coun_variable[variable+'_growth'] = (coun_variable[datetime.datetime(end, 1, 1)] - 
                                         coun_variable[datetime.datetime(start, 1, 1)]) / \
                                        coun_variable[datetime.datetime(start, 1, 1)]
    country = country.merge(coun_variable[['state', variable+'_growth']], left_on='iso_3166_2', 
                            right_on='state', how='left')
    return country

In [8]:
# function to plot a graph regarding a certain geographic level and a certain variable 
def geo_plot(df, column_name, geographic, variables, start, end):
    fig, ax = plt.subplots(figsize=(12,12))
    divider = make_axes_locatable(ax)
    cax = divider.append_axes('right', size='5%', pad=0.1)
    ax = df.plot(ax=ax, column=column_name, legend=True, cax=cax)
    ax.axis('off')
    ax.set_title('Change of '+variables+' in '+geographic+': '+str(start)+' - '+str(end));

### At the continent level:

In [9]:
continent_name = 'North America'
country_name = 'US'
countrycode = wb.get_countries()
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

continent = continent_geo(world, continent_name, countrycode)

In [10]:
country_abbr = continent['iso2c'].unique()
start = 2019
end = 2020

# Gross Domestic Product of each country
# Note: GDP for Greenland and Cuba are missing in the dataset
continent = continent_variable(continent, 'NY.GDP.PCAP.KD', country_abbr, start, end, 'gdp')

# Population in each country: total
continent = continent_variable(continent, 'SP.POP.TOTL', country_abbr, start, end, 'population')

### At the country level:

In [11]:
country = country_geo(country_name)

In [12]:
start_month = '2019-01'
end_month = '2020-01'

# Gross Domestic Product: All Industry Total in each state
# Annual; Not Seasonly Adjusted
state_gdp = [state.abbr + 'NGSP' for state in us.STATES]
country = country_variable(country, country_name, state_gdp, start_month, end_month, 'gdp')

# Population in each state
state_population = [state.abbr + 'POP' for state in us.STATES]
country = country_variable(country, country_name, state_population, start_month, end_month, 'population')

### Interactive Plotting

In [13]:
option_geo = [continent_name, country_name]
option_variables = ['GDP Growth','Population Growth']
column_key = {'GDP Growth':'gdp_growth','Population Growth':'population_growth'}

@interact_manual(geographic=option_geo, variables=option_variables)
def multiple(geographic=option_geo[0], variables=option_variables[0]):
    if geographic == option_geo[0]:
        geo_plot(continent, column_key[variables], geographic, variables, start, end)
    elif geographic == option_geo[1]:
        geo_plot(country, column_key[variables], geographic, variables, start, end)

interactive(children=(Dropdown(description='geographic', options=('North America', 'US'), value='North America…

### Discussion: 
#### In this assignment, most parts are generalized in functions. We can easily modify the continents, countries, and variables that we are interested in with minor change and generate a desired outcome and interactive choropleth.

#### The functions that are written for interactive plotting in HW3 can be modified and reused in this HW4. The code from HW4 can be further applied in the final project. 
#### First, I used the code to grasp data from FRED and World Bank, which can be also applied with other data resources such as the Fama-French data library.
#### Second, the functions and the plotting part can be applied in the final project to give a nice data visualization. It will be convenient because most parts are generalized and we don't need to make big changes.