# Visualize Forecast Result

Generate aggregated maps and plots to visualize the forecast results. 

In [None]:
import os
import glob
import dotenv
import json

import pandas as pd
import numpy as np
import geopandas
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import math

In [None]:
# Navigate one level up to the main repository
os.chdir('..')

In [None]:
# Read environmental variables
env_file = os.path.join('.env') 
dotenv.load_dotenv(env_file)

input_dir = os.getenv('INPUT_PATH')
out_dir = os.getenv('OUTPUT_PATH')
countries_path = os.getenv('COUNTRIES_PATH')


In [None]:
# Create the path to the forecasted model outputs
with open("config.json") as json_file:
    config = json.load(json_file)

sim_name = config['sim_name']
run_name = f"{sim_name}_forecast"

results_dir = f"{out_dir}/{run_name}"

In [None]:
# Native countries list

native_countries_list = config["native_countries_list"]

# Country of interest

coi = config["coi"]


In [None]:
# Read country file

countries_geo = geopandas.read_file(countries_path)
countries = countries_geo.iloc[:,[4]]
countries.set_index("NAME")
countries_firstintro = countries.iloc[:,[0]]
countries_reintros = countries.iloc[:,[0]]
org_dest_all = pd.DataFrame()

Will include here some selected plots from the results_plots

In [None]:
paths = glob.glob(f'{results_dir}/*/*/origin_destination.csv')
i = 0
for path in paths:
    path_in_str = str(path)
    sample = i
    org_dest = (pd.read_csv(path)).iloc[:,1:4]
    org_dest["TS"] = org_dest["TS"].astype(str)
    org_dest["TS"] = org_dest.TS.str[:4].astype(int)
    org_dest_all = org_dest_all.append(org_dest)
    intros = org_dest.iloc[:,1:4]
    intros = intros.rename(columns={"Destination":"NAME", "TS":sample})
    firstintro = intros.drop_duplicates(subset = ["NAME"])
    countries_firstintro = pd.merge(countries_firstintro, firstintro, on="NAME", how="left")
    reintros = intros.groupby("NAME").count()
    countries_reintros = pd.merge(countries_reintros, reintros, on="NAME", how="left")
    i += 1

countries_firstintro = countries_firstintro.set_index("NAME")
countries_reintros = countries_reintros.set_index("NAME")

In [None]:
coi_intros = org_dest_all[org_dest_all["Destination"] == coi]
coi_intros = coi_intros.groupby("Origin").count()[["Destination"]]
coi_intros = coi_intros.rename(columns={"Destination":"COI source"})

countries_geo = countries_geo.merge(coi_intros, how="left", left_on="NAME", right_on="Origin")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="2%", pad=0.1)
ax.set_title("Introduction Sources for " + coi + "\n" + run_name, fontsize=18)
countries_geo.plot(column='COI source', ax=ax, legend=True, legend_kwds={'label': "intro source count"}, missing_kwds={'color': 'lightgrey'}, cax=cax)
plt.savefig(results_dir + coi + "_intro_sources.png")
plt.show()

In [None]:
arr_yr_mean_all = []
arr_yr_mode_all = []
arr_yr_min_all = []
arr_yr_max_all = []
arr_yr_range_all = []
intro_proportion_all = []
for row in range(len(countries_firstintro.index)):
    runs_no_intro = countries_firstintro.iloc[row].isnull().sum()
    intro_proportion = 1 - (runs_no_intro / len(countries_firstintro.columns))
    intro_proportion_all.append(intro_proportion)
    if intro_proportion == 0:
        arr_yr_min_all.append(None)
        arr_yr_max_all.append(None)
        arr_yr_mean_all.append(None)
        arr_yr_mode_all.append(None)
        arr_yr_range_all.append(None)
        
    else:
        arr_yr_min = countries_firstintro.iloc[row].min()
        arr_yr_min_all.append(arr_yr_min)
        arr_yr_max = countries_firstintro.iloc[row].max()
        arr_yr_max_all.append(arr_yr_max)
        arr_yr_mean = math.floor(np.nanmean(countries_firstintro.iloc[row]))
        arr_yr_mean_all.append(arr_yr_mean)
        arr_yr_mode = countries_firstintro.iloc[row].mode()
        if len(arr_yr_mode) > 1:
            arr_yr_mode = int(arr_yr_mode.mean())
        else:
            arr_yr_mode = arr_yr_mode[0]
        arr_yr_mode_all.append(arr_yr_mode)
        arr_yr_range_all.append(arr_yr_max - arr_yr_min)

countries_firstintro["arr_yr_mean"] = arr_yr_mean_all
countries_firstintro["arr_yr_mode"] = arr_yr_mode_all
countries_firstintro["arr_yr_min"] = arr_yr_min_all
countries_firstintro["arr_yr_max"] = arr_yr_max_all
countries_firstintro["arr_yr_range"] = arr_yr_range_all
countries_firstintro["intro_proportion"] = intro_proportion_all
countries_firstintro.loc[native_countries_list, 'arr_yr_mean'] = None
countries_firstintro.loc[native_countries_list, 'arr_yr_mode'] = None
countries_firstintro.loc[native_countries_list, 'arr_yr_min'] = None
countries_firstintro.loc[native_countries_list, 'arr_yr_max'] = None
countries_firstintro.loc[native_countries_list, 'arr_yr_range'] = None
countries_firstintro.loc[native_countries_list, 'intro_proportion'] = None

In [None]:
countries_reintros = countries_reintros.fillna(0)
countries_reintros["num_reintros_mean"] = round(countries_reintros.mean(axis=1)).astype(int)
countries_reintros.at[native_countries_list, 'num_reintros_mean'] = None

In [None]:
countries_geo = countries_geo.merge(countries_firstintro["arr_yr_mean"], on='NAME')
countries_geo["arr_yr_mean"] = countries_geo["arr_yr_mean"].astype("Int64")

countries_geo = countries_geo.merge(countries_firstintro["arr_yr_mode"], on='NAME')
countries_geo["arr_yr_mode"] = countries_geo["arr_yr_mode"].astype("Int64")

countries_geo = countries_geo.merge(countries_firstintro["arr_yr_min"], on='NAME')
countries_geo["arr_yr_min"] = countries_geo["arr_yr_min"].astype("Int64")

countries_geo = countries_geo.merge(countries_firstintro["arr_yr_max"], on='NAME')
countries_geo["arr_yr_max"] = countries_geo["arr_yr_max"].astype("Int64")

countries_geo = countries_geo.merge(countries_firstintro["arr_yr_range"], on='NAME')

countries_geo = countries_geo.merge(countries_firstintro["intro_proportion"], on='NAME')

countries_geo = countries_geo.merge(countries_reintros["num_reintros_mean"], on='NAME')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="2%", pad=0.1)
ax.set_title("Number of Reintroductions (mean)\n" + run_name, fontsize=18)
countries_geo.plot(column='num_reintros_mean', ax=ax, legend=True, legend_kwds={'label': "reintroductions"}, missing_kwds={'color': 'lightgrey'}, cax=cax)
plt.savefig(results_dir + "num_reintros.png")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
plt.title("Year of First Introduction (mean)\n" + run_name, fontsize=18)
countries_geo.plot(column='arr_yr_mean', categorical=True, cmap="viridis", legend=True, ax=ax, missing_kwds={'color': 'lightgrey'}, legend_kwds={'loc': 'lower left'})
#countries_geo.plot(column='arr_yr_mode', scheme="User_Defined", classification_kwds=dict(bins=[2010,2012,2014,2016,2018,2020]), legend=True, ax=ax, missing_kwds={'color': 'lightgrey'})
plt.savefig(results_dir + "first_intros_mean.png")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
plt.title("Year of First Introduction (min)\n" + run_name, fontsize=18)
countries_geo.plot(column='arr_yr_min', categorical=True, cmap="viridis", legend=True, ax=ax, missing_kwds={'color': 'lightgrey'}, legend_kwds={'loc': 'lower left'})
#countries_geo.plot(column='arr_yr_mode', scheme="User_Defined", classification_kwds=dict(bins=[2010,2012,2014,2016,2018,2020]), legend=True, ax=ax, missing_kwds={'color': 'lightgrey'})
plt.savefig(results_dir + "first_intros_min.png")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
plt.title("Year of First Introduction (max)\n" + run_name, fontsize=18)
countries_geo.plot(column='arr_yr_max', categorical=True, cmap="viridis", legend=True, ax=ax, missing_kwds={'color': 'lightgrey'}, legend_kwds={'loc': 'lower left'})
#countries_geo.plot(column='arr_yr_mode', scheme="User_Defined", classification_kwds=dict(bins=[2010,2012,2014,2016,2018,2020]), legend=True, ax=ax, missing_kwds={'color': 'lightgrey'})
plt.savefig(results_dir + "first_intros_max.png")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
plt.title("Year of First Introduction (range)\n" + run_name, fontsize=18)
countries_geo.plot(column='arr_yr_range', categorical=True, cmap="viridis", legend=True, ax=ax, missing_kwds={'color': 'lightgrey'}, legend_kwds={'loc': 'lower left'})
#countries_geo.plot(column='arr_yr_mode', scheme="User_Defined", classification_kwds=dict(bins=[2010,2012,2014,2016,2018,2020]), legend=True, ax=ax, missing_kwds={'color': 'lightgrey'})
plt.savefig(results_dir + "first_intros_range.png")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="2%", pad=0.1)
ax.set_title("Proportion of Runs with Introductions\n" + run_name, fontsize=18)
countries_geo.plot(column='intro_proportion', ax=ax, legend=True, legend_kwds={'label': "proportion"}, missing_kwds={'color': 'lightgrey'}, cax=cax)
plt.savefig(results_dir + "intro_proportion.png")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20,20))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="2%", pad=0.1)
ax.set_title("Minimum Year of First Introduction (runs with intros >50%)\n" + run_name, fontsize=18)
countries_geo['arr_yr_min'] = np.where(countries_geo['intro_proportion'] < 0.5, np.nan, countries_geo['arr_yr_min'])
countries_geo.plot(column='arr_yr_min', categorical=True, cmap="viridis", legend=True, ax=ax, missing_kwds={'color': 'lightgrey'}, legend_kwds={'loc': 'lower left'})
plt.savefig(results_dir + "first_intro_min_more50pct.png", bbox_inches='tight', pad_inches = 0.01)
plt.show()


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 20))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="2%", pad=0.1)
ax.set_title("Year of First Introduction (mean of runs with intros >50%)\n" + run_name, fontsize=18)
countries_geo['arr_yr_mean'] = np.where(countries_geo['intro_proportion'] < 0.5, np.nan, countries_geo['arr_yr_mean'])
countries_geo.plot(column='arr_yr_mean', categorical=True, cmap="viridis", legend=True, ax=ax, missing_kwds={'color': 'lightgrey'}, legend_kwds={'loc': 'lower left'})
plt.savefig(results_dir + "first_intro_mean_more50pct.png", bbox_inches='tight', pad_inches = 0.01)
plt.show()


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 20))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="2%", pad=0.1)
ax.set_title("Runs with Introductions (>50%)\n" + run_name, fontsize=18)
countries_geo['intro_proportion'] = np.where(countries_geo['intro_proportion'] < 0.5, np.nan, countries_geo['intro_proportion'])
countries_geo.plot(column='intro_proportion', ax=ax, legend=True, legend_kwds={'label': "proportion"}, missing_kwds={'color': 'lightgrey'}, cax=cax)
plt.savefig(results_dir + "intro_proportion_more50pct.png", bbox_inches='tight', pad_inches = 0.01)
plt.show()
