In [None]:
import os
import glob
import math
from pathlib import Path
import datetime
import pandas as pd
import numpy as np
import geopandas
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
dir_path = "H:/Shared drives/SLF Paper Outputs"
os.chdir(dir_path)
print(os.getcwd())

In [None]:
# 10th Percentile
def q10(x):
    return x.quantile(0.1)

In [None]:
countries_path = "H:/Shared drives/Pandemic Data/slf_model/inputs/noTWN/countries_slf_hiiMask16.gpkg"
countries_geo = geopandas.read_file(countries_path)
countries = countries_geo.iloc[:,[4]]
countries.set_index("NAME")

In [None]:
main_sim = "slf_scenarios_noTWN_wChinaVietnam"
sim_names = [os.path.basename(name) for name in glob.glob(dir_path + f'/outputs/{main_sim}/*') if (os.path.basename(name) != 'header.csv') and (os.path.isdir(name)) and (os.path.basename(name) != 'viz_data') and (os.path.basename(name) != 'summary_data')]
sim_names

## Skip next cell and go to load CSVs if results are saved

In [None]:
for i in range(len(sim_names)):
    model_run_name = sim_names[i]
    print(f"\t{model_run_name}")
    results_dir = f"{dir_path}/outputs/{main_sim}/{model_run_name}/alpha0.2_lamda3.9_6801-6804/"

    org_dest_all = pd.DataFrame()
    first_intros_all = pd.DataFrame()
    first_exports_all = pd.DataFrame()
    country_count = (pd.DataFrame(index=countries.iloc[:,0], columns=["count"])).fillna(0)
    coi_first_intros_by_origin = pd.DataFrame()
    
    paths = glob.glob(results_dir + '**/origin_destination.csv')
    for path in paths:
        run = path.split("\\")[-2]
        # load origin-destination model output
        org_dest = (pd.read_csv(path)).iloc[:,1:4]
        org_dest["TS"] = org_dest["TS"].astype(str)
        org_dest["TS"] = (org_dest.TS.str[:4]).astype(int)
        org_dest_all = org_dest_all.append(org_dest)

        # get list of countries in transmission network and add to count
        run_countries = list(set(list(org_dest.Origin) + list(org_dest.Destination)))
        for country in run_countries:
            country_count.loc[country] = country_count.loc[country] + 1

        # identify first introductions to each country
        first_intro = org_dest.drop_duplicates(subset = ["Destination"])
        first_intros_all = first_intros_all.append(first_intro, ignore_index=True)

        # identify first export from each country
        first_export = org_dest.drop_duplicates(subset = ["Origin"])
        first_exports_all = first_exports_all.append(first_export, ignore_index=True)

        # COI first intros by origin
        coi_first_intros = (org_dest[org_dest["Destination"] == coi]).drop_duplicates(subset = ["Origin"])
        coi_first_intros_by_origin = coi_first_intros_by_origin.append(coi_first_intros, ignore_index=True)

    # Save all summaries    
    model_stats_dir = f"{dir_path}/outputs/summary_stats/{main_sim}/{model_run_name}"
    if not os.path.exists(model_stats_dir):
        os.makedirs(model_stats_dir)
    org_dest_all.to_csv(f"{model_stats_dir}/org_dest_all.csv")
    country_count.to_csv(f"{model_stats_dir}/country_count.csv")
    first_intros_all.to_csv(f"{model_stats_dir}/first_intros_all.csv")
    first_exports_all.to_csv(f"{model_stats_dir}/first_exports_all.csv")
    coi_first_intros_by_origin.to_csv(f"{model_stats_dir}/coi_first_intros_by_origin.csv")


## Load summary CSVs

In [None]:
native_countries_list = ["China", "Viet Nam"]
start_year = 2006
end_year = 2029
sim_years = list(range(start_year, end_year + 1))
num_runs = 1000

# set country of interest (coi)
coi = "United States"

In [None]:
model_run_name = "BAU"
model_stats_dir = f"{dir_path}/outputs/summary_stats/{main_sim}/{model_run_name}"

In [None]:
org_dest_all = pd.read_csv(f"{model_stats_dir}/org_dest_all.csv")
country_count = pd.read_csv(f"{model_stats_dir}/country_count.csv")
first_intros_all = pd.read_csv(f"{model_stats_dir}/first_intros_all.csv")
first_exports_all = pd.read_csv(f"{model_stats_dir}/first_exports_all.csv")
coi_first_intros_by_origin = pd.read_csv(f"{model_stats_dir}/coi_first_intros_by_origin.csv")

## Change country spellings

In [None]:
name_changes = {"Korea, Republic of":"Rep. of Korea", "Viet Nam":"Vietnam"}

In [None]:
org_dest_all = org_dest_all.replace(name_changes)
country_count = country_count.replace(name_changes)
first_intros_all = first_intros_all.replace(name_changes)
first_exports_all = first_exports_all.replace(name_changes)
coi_first_intros_by_origin = coi_first_intros_by_origin.replace(name_changes)
countries = countries.replace(name_changes)

## Plot first introduction year by country

In [None]:
destinations_all = list(set(first_intros_all["Destination"]))
min_intro_prop = 0.5
destinations = []
for i in range(len(destinations_all)):
    if len(first_intros_all.loc[first_intros_all["Destination"] == destinations_all[i]]) > num_runs * min_intro_prop:
        destinations.append(destinations_all[i])
num_destinations = len(destinations)

In [None]:
# Plot histograms of first intros by destination
fig, axs = plt.subplots(2, math.ceil(num_destinations/2), sharey=True, sharex=True, figsize=(12,6))
fig.subplots_adjust(hspace=0.35, wspace=0.15, top=0.82)
fig.text(0.5, 0.04, 'year', ha='center', fontsize=16)
fig.text(0.08, 0.5, 'model runs', va='center', rotation='vertical', fontsize=18)
axs = axs.ravel()
for i in range(num_destinations):
    axs[i].hist(list(first_intros_all.loc[first_intros_all["Destination"] == destinations[i], "TS"]))
    axs[i].set_title(destinations[i])
plt.suptitle(f'''{model_run_name} \n Year of First Introduction by Destination''', fontsize=18)
plt.savefig(f'{model_stats_dir}/first_intro_by_destination.png')


In [None]:
# Save separate plots for histograms of first intros by destination
for i in range(num_destinations):
    fig, ax = plt.subplots(1, figsize=(4, 3))
    fig.subplots_adjust(left=0.27, top=0.76, bottom=0.21)
    ax.hist(list(first_intros_all.loc[first_intros_all["Destination"] == destinations[i], "TS"]), color="#31688e")
    ax.set_title(f'''{destinations[i]}\nFirst Introduction Year''', fontsize=18, pad=14)
    ax.set_xlabel("year", fontsize=18)
    ax.set_ylabel("% model runs", fontsize=18)
    ax.set_xlim(left=2005,right=2030)
    ax.set_ylim(top=1000)
    y_vals = ax.get_yticks()
    ax.set_xticklabels(["",2010,"",2020,"",2030], fontsize=16)
    ax.set_yticklabels(['{:3.0f}%'.format((x / 1000) * 100) for x in y_vals], fontsize=16)
    plt.savefig(f'{model_stats_dir}/{destinations[i]}_first_intros.png')

## Plot all introductions by country

In [None]:
destinations_all = list(set(org_dest_all["Destination"]))
min_intro_prop = 0.5
destinations = []
for i in range(len(destinations_all)):
    if len(org_dest_all.loc[org_dest_all["Destination"] == destinations_all[i]]) > num_runs * min_intro_prop:
        destinations.append(destinations_all[i])
num_destinations = len(destinations)

In [None]:
# Plot histograms of all intros by destination
fig, axs = plt.subplots(2, math.ceil(num_destinations/2), sharey=True, sharex=True, figsize=(12,6))
fig.subplots_adjust(hspace=0.35, wspace=0.15, top=0.82)
fig.text(0.5, 0.04, 'year', ha='center', fontsize=13)
fig.text(0.08, 0.5, 'model runs', va='center', rotation='vertical', fontsize=18)
axs = axs.ravel()
for i in range(num_destinations):
    axs[i].hist(list(org_dest_all.loc[org_dest_all["Destination"] == destinations[i], "TS"]))
    axs[i].set_title(destinations[i])
plt.suptitle(f'''{model_run_name} \n Introductions by Destination''', fontsize=18)
plt.savefig(f'{model_stats_dir}/all_intros_by_destination.png')


In [None]:
# Save separate plots for histograms of all intros by destination
for i in range(num_destinations):
    fig, ax = plt.subplots(1, figsize=(4,3))
    fig.subplots_adjust(left=0.22, top=0.78, bottom=0.2)
    ax.hist(list(org_dest_all.loc[org_dest_all["Destination"] == destinations[i], "TS"]), color="#31688e")
    ax.set_title(f'''{destinations[i]}\nIntroduction Year''', fontsize=18, pad=14)
    ax.set_xlabel("year", fontsize=18)
    ax.set_ylabel("% model runs", fontsize=18)
    ax.set_xlim(left=2005,right=2030)
    ax.set_ylim(top=1000)
    y_vals = ax.get_yticks()
    ax.set_xticklabels(["",2010,"",2020,"",2030], fontsize=16)
    ax.set_yticklabels(['{:3.0f}%'.format((x / 1000) * 100) for x in y_vals], fontsize=16)
    plt.savefig(f'{model_stats_dir}/{destinations[i]}_all_intros.png')

## Bridgehead populations

In [None]:
# Count origins for each timestep
origin_countries_by_ts = (pd.DataFrame(index=countries.iloc[:,0], columns=sim_years)).fillna(0)
origins = (org_dest_all.groupby(["Origin", "TS"]).count()).reset_index().fillna(0)
for i in range(len(origins)):
    origin = origins.iloc[i,:]
    origin_countries_by_ts.loc[origin.Origin, origin.TS] = origin.Destination


In [None]:
# Create heatmap of bridgehead introductions
origin_countries_by_ts_filtered = origin_countries_by_ts[origin_countries_by_ts.loc[:,2029] > 0]
fig, ax = plt.subplots(figsize = (12, 8))
plt.subplots_adjust(left=0.22, right=1, top = .92)
res = sns.heatmap(origin_countries_by_ts_filtered.drop(["China", "Vietnam"]), cmap = sns.color_palette("light:#31688e", as_cmap=True), linewidths = 0.30, annot = False, cbar_kws={'label':f'Total Outgoing Transmissions Over {num_runs} Model Runs'})
res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 14)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 14)
ax.figure.axes[-1].yaxis.label.set_size(14)
plt.title("Spotted Lanternfly Exports from Bridgehead Populations", fontsize=20, pad=15)
plt.ylabel("")
plt.xlabel("Year", fontsize = 14)
plt.savefig(f"{model_stats_dir}/bridgehead_sources.png", dpi=600)
plt.show()

## COI introduction summaries

In [None]:
coi_all_intros = org_dest_all[org_dest_all["Destination"] == coi]

# Save COI intros summaries
coi_all_intros_by_origin_summary = coi_all_intros.groupby(["Origin"]).agg({'TS': ['count', q10, 'min', 'mean', 'median', 'max', 'std']})
coi_all_intros_by_origin_summary.to_csv((f'{model_stats_dir}/all_intros_by_source_to_{coi}.csv'))
coi_first_intros_by_origin_summary = coi_first_intros_by_origin.groupby(["Origin"]).agg({'TS': ['count', 'min', q10, 'mean', 'median', 'max', 'std']})
coi_first_intros_by_origin_summary.to_csv((f'{model_stats_dir}/first_intro_by_source_to_{coi}.csv'))

In [None]:
# Save separate plots for histograms of all COI first intros by origin
coi_origins = list(set(coi_first_intros_by_origin.Origin))
for i in range(len(coi_origins)):
    fig, ax = plt.subplots(1, figsize=(4,3))
    fig.subplots_adjust(left=0.25, top=0.75, bottom=0.21, right=0.85)
    ax.hist(list(coi_first_intros_by_origin.loc[coi_first_intros_by_origin["Origin"] == coi_origins[i], "TS"]), color="#31688e")
    q10_value = q10(coi_first_intros_by_origin.loc[coi_first_intros_by_origin["Origin"] == coi_origins[i], "TS"])
    ax.axvline(q10_value, color="red")
    plt.text(q10_value - 2.2,17.5,round(q10_value),rotation=90, fontsize=15)
    ax.set_title(f'''{coi_origins[i]}\nFirst Export to {coi}''', fontsize=18, pad=18)
    ax.set_xlabel("year", fontsize=18)
    ax.set_ylabel("% model runs", fontsize=18)
    ax.set_xlim(left=2005,right=2030)
    ax.set_ylim(top=25)
    y_vals = ax.get_yticks()
    ax.set_xticklabels(["",2010,"",2020,"",2030], fontsize=16)
    ax.set_yticklabels(['{:3.1f}%'.format((x / 1000) * 100) for x in y_vals], fontsize=16)
    plt.savefig(f'{model_stats_dir}/{coi_origins[i]}_first_intros_to_{coi}.png', dpi=300)

## Full network summary

In [None]:
# Save full network summaries for Vosviewer
destinations = org_dest_all.iloc[:,1:3].groupby(by=["Destination"]).count()
origins = org_dest_all.iloc[:,1:3].groupby(by=["Origin"]).count()

In [None]:
# Map file with all node IDs (name), weight (total intros as destination, or total intros as origin), and score (intro proportion, export proportion, or intro/export proportion)
network_summary = countries.copy()
network_summary.set_index("NAME", inplace=True)
network_summary["intro_prop"] = ""
network_summary["export_prop"] = ""
network_summary["prop"] = ""

for row in list(network_summary.index):  
    network_summary["intro_prop"].loc[row] = len(first_intros_all.loc[first_intros_all["Destination"] == row]) / num_runs
    network_summary["export_prop"].loc[row] = len(first_exports_all.loc[first_exports_all["Destination"] == row]) / num_runs
    network_summary["prop"].loc[row] = int(country_count.loc[country_count["NAME"] == row, "count"]) / num_runs

network_summary = pd.merge(network_summary, destinations, how='left', left_on="NAME", right_on="Destination", left_index=True, right_index=True)
network_summary = pd.merge(network_summary, origins, how='left', left_on="NAME", right_on="Origin", left_index=True, right_index=True)
network_summary.rename(columns={"Origin":"intro_dest", "Destination":"intro_orig"}, inplace=True)
network_summary.fillna(0, inplace=True)
network_summary.to_csv(f"{model_stats_dir}/network_summary.csv")

In [None]:
# Network file with all links between nodes, and link strength (number of intros between nodes), will be summed by Vosviewer
intro_pairs_count = org_dest_all.iloc[:,1:4].groupby(by = ["Origin", "Destination"]).count()
intro_pairs_count = intro_pairs_count.reset_index()
intro_pairs_count.rename(columns={"TS": "intro_count"}, inplace=True)
intro_pairs_count.to_csv(f"{model_stats_dir}/intro_pairs_count.csv")

## Plot COI introductions by timestep

In [None]:
sim_list = ['BAU', 'stopOrigins', 'stopBridgeheads']
cols = list(sns.color_palette("tab10", len(sim_list)))
markers = ["o", "^", "s"]
col_dict = dict(zip(sim_list, cols))
marker_dict = dict(zip(sim_list, markers))

out_dir = f"{dir_path}/outputs/summary_stats/{main_sim}/BAU"

In [None]:
fig, ax = plt.subplots(figsize = (10.5,7.5))
plt.subplots_adjust(left=0.1, right=0.96, bottom=0.1, top=0.88)
for model_run_name in sim_list:
    model_stats_dir = f"{dir_path}/outputs/summary_stats/{main_sim}/{model_run_name}"
    org_dest_all = pd.read_csv(f"{model_stats_dir}/org_dest_all.csv")
    coi_all_intros = org_dest_all[org_dest_all["Destination"] == coi]
    coi_all_intros['TS'] = coi_all_intros['TS'].astype(int)
    coi_all_intros_by_ts = coi_all_intros.iloc[:,2:4].groupby(by = ["TS"]).count()
    coi_all_intros_by_ts.reset_index(drop=False, inplace=True)
    coi_all_intros_by_ts.rename(columns={"Destination":model_run_name}, inplace=True)
    plt.plot("TS", model_run_name, data=coi_all_intros_by_ts, marker=marker_dict[model_run_name], markerfacecolor=col_dict[model_run_name], markersize=8, color=col_dict[model_run_name], linewidth=2)
plt.legend(labels=["Business as usual", "Native range \nmanagement", "Simulation-informed \nmanagement"], fontsize=16, frameon=False, loc = "lower left")
plt.ylabel("Introductions", fontsize=18)
plt.xlabel("Year", fontsize=18)
plt.yticks(fontsize=16)
plt.xticks(fontsize=16)
plt.title(f"Simulated Introductions to {coi}\n Over {num_runs:,} Model Runs", fontsize=20, pad=12)
plt.show
plt.savefig(f'{out_dir}/scenarios_{coi}_intros_by_ts.png', dpi=600)