In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import geopandas
from ipywidgets import interact, interact_manual  
import seaborn as sns

# Read in Data (Already cleaned in .py file)

In [2]:
path = r'C:\Users\Ruairi\documents\github\final-project-final-project-ruairi-ocearuil'
def read_in_premade_data(no_outliers=False):
    if(no_outliers):
        df_final = pd.read_csv(os.path.join(path, 'df_final.csv'))
    else:
        df_final = pd.read_csv(os.path.join(path, 'df_final_reduced.csv'))
    return df_final

# Interactive Plot 1: Spatial Mapping (with helper function)

In [3]:
# First 2 functions are helper functions taken from my .py file
def reconcile_world_country_names(this_country, df):
    countries_to_match = df['Country'].unique()
    updated_country = this_country
    for country in countries_to_match:
        if(this_country.startswith('Bosnia') and country.startswith('Bosnia')):
            updated_country =  country
        elif(this_country.startswith('Czech') and country.startswith('Czech')):
            updated_country = country
    return updated_country

def world_pd_2_world_geo(df_final):
    world_geo = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
    world_geo['name'] = world_geo['name'].apply(lambda x : reconcile_world_country_names(x, df_final))
    df_merged_pandas = df_final.merge(world_geo[['name', 'geometry']], left_on='Country', right_on='name')
    df_merged_geo = geopandas.GeoDataFrame(df_merged_pandas, geometry='geometry')
    df_merged_geo.set_crs(epsg=4326)
    return df_merged_geo

def plot_choropleth(df_final_geo, year, var):
    # https://stackabuse.com/change-figure-size-in-matplotlib/
    # https://gis.stackexchange.com/questions/152920/changing-colours-in-geopandas
    color_map_dict = {'Count': 'GnBu', 'Avg_EC': 'OrRd'}
    this_yr_df = df_final_geo[df_final_geo['Year'] == year]
    fig, ax = plt.subplots(figsize=(14,14))
    divider = make_axes_locatable(ax)
    cax = divider.append_axes('right', size='5%', pad=0.1)
    this_ax = this_yr_df.plot(ax=ax,
                     column=var,
                     edgecolor='grey',
                     legend=True,
                     cmap='viridis',
                     cax=cax)
    this_ax.set_title(f'{var} Levels in {year}')
    return fig

In [4]:
df_final_pd = read_in_premade_data(no_outliers=False)
df_final_geo = world_pd_2_world_geo(df_final_pd)

years=(1980,2019)
var_list = ['Avg_EC', 'Count', 'Stat_ID_Count'] + list(df_final_geo.columns)[-11:-2]

@interact
def final_plot(year=years, var=var_list):
    fig = plot_choropleth(df_final_geo, year, var)
    fig.show

interactive(children=(IntSlider(value=1999, description='year', max=2019, min=1980), Dropdown(description='var…

# Interactive Plot 2: Timeseries

In [5]:
def plot_timeseries_cntry_var(df_final_pd, country_list, var_list, water_type='River'):
    # https://medium.com/@andykashyap/top-5-tricks-to-make-plots-look-better-9f6e687c1e08
    assert len(var_list) <= 2, "Please input up to 2 variables of interest."
    this_df_pd = df_final_pd[df_final_pd['Country'].isin(country_list)]
    if('Count' in var_list or 'Avg_EC' in var_list):
        this_df_pd = this_df_pd[this_df_pd['Water_type'] == water_type]
    sns.set()
    sns.set_style('darkgrid')
    sns.set_context('paper')
    fig_ts, ax_ts = plt.subplots(len(var_list),1,figsize=(7,7))
    fig_ts.tight_layout(pad=3)
    for var_ind in range(len(var_list)): 
        if(len(var_list) == 1):
            this_ax = ax_ts
        elif(len(var_list) == 2):
            this_ax = ax_ts[var_ind]
        # https://stackoverflow.com/questions/51762938/seaborn-color-palette-not-working-appropiate-with-lineplot
        this_ax = sns.lineplot(x='Year',
                               y=var_list[var_ind],
                               hue='Country',
                               palette=sns.color_palette('colorblind',
                                                           n_colors=len(set(country_list))),
                               data=this_df_pd,
                               ax=this_ax)
        this_ax.set_title(f'Timeseries of {var_list[var_ind]}',
                          weight='bold') 
        this_ax.set_xlabel('') # No need to label year, it is clear in plot
        this_ax.set_ylabel(var_list[var_ind].replace('_',' '), 
                           weight='bold')
        this_ax.legend(loc='center left', bbox_to_anchor=(1,0.5),
                       fancybox=True, shadow=True)    
    sns.reset_orig()
    return fig_ts

In [6]:
def make_final_timeseries(df_final_pd, c1, c2, c3, c4, var1, var2, water_type='River'):
    country_list = [country for country in [c1, c2, c3, c4] if country]
    var_list = [var for var in [var1, var2] if var]
    fig = plot_timeseries_cntry_var(df_final_geo, country_list, var_list, water_type)
    return fig

water_type_opts = ['River', 'Lake/Reservoir', 'Groundwater']
var_opts = var_list = list(df_final_pd.columns)[-9:] +  ['Avg_EC', 'Count', None] 
country_opts = list(df_final_pd['Country'].unique()) + [None]

@interact
def final_plot(c1=country_opts, c2=country_opts, c3=country_opts, c4=country_opts,
               var1=var_opts, var2=var_opts,
               water_type=water_type_opts):
    fig = make_final_timeseries(df_final_pd, c1, c2, c3, c4, var1, var2, water_type)
    fig.show

interactive(children=(Dropdown(description='c1', options=('Argentina', 'Australia', 'Austria', 'Bangladesh', '…