# Pandemic Network Model:
## Predicting the spread of plant pests or pathogens using trade, environment, and pest ecology open data

This notebook provides the workflow for running the Pandemic Network Model. To run this notebook, the following are assumed:
- Cloned the Pandemic GitHub repository (git clone https://github.com/ncsu-landscape-dynamics/Pandemic_Model.git)
- Notebook launched from the notebook folder of the cloned repo
- Already have the required data downloaded and formatted (see data_aqucisition_format notebook)

## Imports

In [1]:
import os
import sys
import json
import math
import numpy as np
import pandas as pd
import geopandas
from dotenv import load_dotenv

In [2]:
os.chdir('../')
print(os.getcwd())

C:\Users\cawalden\Documents\GitHub\Pandemic_Model


In [3]:
from pandemic.helpers import create_trades_list
from pandemic.model_equations import (
    pandemic_multiple_time_steps,
)
from pandemic.output_files import (
    create_model_dirs,
    save_model_output,
    aggregate_monthly_output_to_annual,
    write_model_metadata,
)

## Set Paths and Environment Variables

In [4]:
# Path to folder containing .env file
# e.g., 'H:/Shared drives/APHIS  Projects/Pandemic/Data'
env_file_path = "C:/Users/cawalden/Documents/Projects/Pandemic" # str(input())

In [5]:
load_dotenv(os.path.join(env_file_path, '.env'))

True

In [6]:
# Read environmental variables
data_dir = os.getenv('DATA_PATH')
input_dir = os.getenv('INPUT_PATH')
out_dir = os.getenv('OUTPUT_PATH')
countries_path = os.getenv('COUNTRIES_PATH')

print(countries_path)

H:/Shared drives/Pandemic Data/slf_model/inputs/countries_slf_hiiMask16_wTWN.gpkg


## Set Model Parameters
Model arguments can be set with a configuration file, created in the
create_model_config notebook, or by entering in the values below

### With Configuration File

In [7]:
# Path to model configuration file
# e.g. input_dir + '/config_files/slf_gamma4-1_6801-6804_v2/config.json'
# path_to_config_json = str(input())

In [8]:
# Read model arguments from configuration file
# with open(path_to_config_json) as json_file:
#     config = json.load(json_file)

# Columns to drop from model output dataframe 
# cols_to_drop = config["columns_to_drop"]

# commodity_path = config["commodity_path"]
# commodity_forecast_path = config["commodity_forecast_path"]
# native_countries_list = config["native_countries_list"]
# season_dict = config["season_dict"]
# alpha = config["alpha"]
# beta = config["beta"]
# mu = config["mu"]
# lamda_c_list = config["lamda_c_list"]
# phi = config["phi"]
# w_phi = config["w_phi"]
# start_year = config["start_year"]
# stop_year = config["stop_year"]
# random_seed = config["random_seed"]
# cols_to_drop = config["columns_to_drop"]
# time_infect_units = config["transmission_lag_unit"]
# transmission_lag_type = config["transmission_lag_type"]
# time_infect = config["time_to_infectivity"]
# gamma_shape = config["transmission_lag_shape"]
# gamma_scale = config["transmission_lag_scale"]
# save_entry = config['save_entry']
# save_estab = config['save_estab']
# save_intro = config["save_intro"]
# save_country_intros = config["save_country_intros"]

### In Current Session

In [9]:
# commodity_path = f"{input_dir}/monthly_agg/6801-6804"
commodity_path = f"{input_dir}/comtrade_wTWN/monthly_agg/6801-6804"

# Forecast path can be set to None to exclude forecast from model run
commodity_forecast_path = f"{input_dir}/comtrade_wTWN/trade_forecast/monthly_agg/6801-6804"

# List of countries where pest is present at time T0
native_countries_list = [
        "China",
        #"Viet Nam",
        #"India"
    ]

# List of months when pest can be transported
season_dict = {
        "NH_season": [
            "09",
            "10",
            "11",
            "12",
            "01",
            "02",
            "03",
            "04"
        ],
        "SH_season": [
            "04",
            "05",
            "06",
            "07",
            "08",
            "09",
            "10"
        ]
    }

# scenario_list = []

# for i in range(2010, 2030):
#     start_scenario = [2010, 'CHN', 'USA', 'decrease', 1]
#     new_scenario = start_scenario
#     new_scenario[0] = i
#     scenario_list .append(new_scenario)
    
# for i in range(2014, 2030):
#     start_scenario = [2014, 'JPN', 'USA', 'decrease', 0.8]
#     new_scenario = start_scenario
#     new_scenario[0] = i
#     scenario_list .append(new_scenario)
    
# for i in range(2014, 2030):
#     start_scenario = [2014, 'KOR', 'USA', 'decrease', 0.8]
#     new_scenario = start_scenario
#     new_scenario[0] = i
#     scenario_list .append(new_scenario)
    
# for i in range(2020, 2030):
#     start_scenario = [2020, 'ITA', 'USA', 'decrease', 0.8]
#     new_scenario = start_scenario
#     new_scenario[0] = i
#     scenario_list .append(new_scenario)
    
# for i in range(2020, 2030):
#     start_scenario = [2020, 'TUR', 'USA', 'decrease', 0.8]
#     new_scenario = start_scenario
#     new_scenario[0] = i
#     scenario_list .append(new_scenario)

In [10]:
alpha = 0.04
beta = 0.5
mu = 0
lamda_c_list = [3]
phi = 1
w_phi = 1
start_year = 2006
stop_year = 2016
random_seed = None

# Transmission lag type to include (e.g., None, static, stochastic)
transmission_lag_type = "stochastic" 

# If transmission lag type is static, set time_infect as number of 
# years to delay country from becoming an origin. If lag type is
# none or stochastic, set to None
time_infect = None 
time_infect_units = "year"

# If transmission lag type is stochastic, set values to generate
# a gamma distribution. If lag type is none or static, ste to None. 
gamma_shape = 4
gamma_scale = 1

In [11]:
# Save n x n matrices for each time step where n is the number of countries, 
# and values represent the origin-destination probability of entry or 
# probability of establishment 
save_entry = False
save_estab = False
save_intro = False
save_country_intros = False

## Load Model Input Data

In [12]:
# Read formatted countries geopackage, distance matrix, and climate similarities matrix
countries = geopandas.read_file(countries_path, driver="GPKG")
distances = np.load(input_dir + '/distance_matrix_wTWN.npy')
climate_similarities = np.load(input_dir + '/climate_similarities_hiiMask16_wTWN.npy')

In [13]:
# Read & format trade data
trades_list, file_list_filtered, code_list, commodities_available = create_trades_list(
    commodity_path=commodity_path,
    commodity_forecast_path=commodity_forecast_path,
    start_year=start_year,
    stop_year=stop_year,
    distances=distances,
)

Loading and formatting trade data...
	 ['H:/Shared drives/Pandemic Data/slf_model/inputs//comtrade_wTWN/monthly_agg\\6801-6804']


In [14]:
# Create list of unique dates from trade data
date_list = []
for f in file_list_filtered:
    fn = os.path.split(f)[1]
    ts = str.split(os.path.splitext(fn)[0], "_")[-1]
    date_list.append(ts)
date_list.sort()
end_sim_year = date_list[-1][:4]

In [15]:
# Example trade array for formatting outputs
traded = pd.read_csv(
    file_list_filtered[0], sep=",", header=0, index_col=0, encoding="latin1"
)

In [16]:
# Checking trade array shapes
print("Length of trades list: ", len(trades_list))
for i in range(len(trades_list)):
    print("\tcommodity array shape: ", trades_list[i].shape)

Length of trades list:  1
	commodity array shape:  (132, 234, 234)


## Run Model for Selected Time Steps and Commodities

In [17]:
# Name model results: simultation name, additional description (i.e., parameter value
# or scenario tested), and run number to identify stochastic run. 
sim_name = f'slf_test'
add_descript = 'dynamic_trade_scenario'

# First run should be run_num = 0
run_num = 0

In [18]:
print("Number of commodities: ", len([c for c in lamda_c_list if c > 0]))
print("Number of time steps: ", trades_list[0].shape[0])
for i in range(len(trades_list)):
    if len(trades_list) > 1:
        code = code_list[i]
        print("\nRunning model for commodity: ", code)
    else:
        code = code_list[0]
        print(
            "\nRunning model for commodity: ",
            os.path.basename(commodities_available[0]),
        )
    trades = trades_list[i]
    distances = distances
    locations = countries
    prob = np.zeros(len(countries.index))
    pres_ts0 = [False] * len(prob)
    infect_ts0 = np.empty(locations.shape[0], dtype="object")
    for country in native_countries_list:
        country_index = countries.index[countries["NAME"] == country][0]
        pres_ts0[country_index] = True
        # if time steps are monthly and time to infectivity is in years
        if len(date_list[0]) > 4:
            infect_ts0[country_index] = str(start_year) + "01"
        # else if time steps are annual and time to infectivity is in years
        else:
            infect_ts0[country_index] = str(start_year)
    locations["Presence"] = pres_ts0
    locations["Infective"] = infect_ts0

    iu1 = np.triu_indices(climate_similarities.shape[0], 1)
    
    # revised equation values
    sigma_h = (1 - countries["Host Percent Area"]).std()
    sigma_kappa = np.std(1 - climate_similarities[iu1])
    
    
    np.random.seed(random_seed)
    lamda_c = lamda_c_list[i]

    if lamda_c > 0:
        e = pandemic_multiple_time_steps(
            trades=trades,
            distances=distances,
            locations=locations,
            climate_similarities=climate_similarities,
            alpha=alpha,
            beta=beta,
            mu=mu,
            lamda_c=lamda_c,
            phi=phi,
            sigma_h=sigma_h,
            sigma_kappa=sigma_kappa,
            w_phi=w_phi,
            start_year=start_year,
            date_list=date_list,
            season_dict=season_dict,
            transmission_lag_type=transmission_lag_type,
            time_infect_units=time_infect_units,
            time_infect=time_infect,
            gamma_shape=gamma_shape,
            gamma_scale=gamma_scale,
            # scenario_list=scenario_list
        )


        run_prefix = f"{sim_name}_{add_descript}_{code}"

        arr_dict = {
            "prob_entry": "probability_of_entry",
            "prob_intro": "probability_of_introduction",
            "prob_est": "probability_of_establishment",
            "country_introduction": "country_introduction",
        }

        outpath = out_dir + f"/{sim_name}/{run_prefix}/run_{run_num}/"
        create_model_dirs(
            outpath=outpath,
            output_dict=arr_dict,
            write_entry_probs=save_entry, 
            write_estab_probs=save_estab,
            write_intro_probs=save_intro,
            write_country_intros=save_country_intros,
        )
        print("saving model outputs: ", outpath)
        full_out_df = save_model_output(
            model_output_object=e,
            example_trade_matrix=traded,
            outpath=outpath,
            date_list=date_list,
            write_entry_probs=save_entry,
            write_estab_probs=save_estab,
            write_intro_probs=save_intro,
            write_country_intros=save_country_intros,
            columns_to_drop=None
        )

        # If time steps are monthly, aggregate predictions to
        # annual for dashboard display
        if len(date_list[i]) > 4:
            print("aggregating monthly predictions to annual time steps...")
            aggregate_monthly_output_to_annual(
                formatted_geojson=full_out_df, outpath=outpath
            )

        # Save model metadata to text file
        print("writing model metadata...")
        write_model_metadata(
            main_model_output=e[0],
            alpha=alpha,
            beta=beta,
            mu=mu,
            lamda_c_list=lamda_c_list,
            phi=phi,
            sigma_h=sigma_h,
            sigma_kappa=sigma_kappa,
            w_phi=w_phi,
            start_year=start_year,
            end_sim_year=end_sim_year,
            transmission_lag_type=transmission_lag_type,
            time_infect_units=time_infect_units,
            gamma_shape=gamma_shape,
            gamma_scale=gamma_scale,
            random_seed=random_seed,
            time_infect=time_infect,
            native_countries_list=native_countries_list,
            countries_path=countries_path,
            commodities_available=commodities_available[i], 
            commodity_forecast_path=commodity_forecast_path,
            phyto_weights=list(locations['Phytosanitary Capacity'].unique()),
            outpath=outpath, 
            run_num=run_num,
        )

    else:
        print("\tskipping as pest is not transported with this commodity")

Number of commodities:  1
Number of time steps:  132

Running model for commodity:  6801-6804
TIME STEP:  200601
TIME STEP:  200602
TIME STEP:  200603
TIME STEP:  200604
TIME STEP:  200605
TIME STEP:  200606
TIME STEP:  200607
TIME STEP:  200608
TIME STEP:  200609
		 China --> Netherlands
			first intro...
			time to infectious:  3
				Netherlands infective:  200909
TIME STEP:  200610
TIME STEP:  200611
TIME STEP:  200612
TIME STEP:  200701
		 China --> Korea, Republic of
			first intro...
			time to infectious:  4
				Korea, Republic of infective:  201101
TIME STEP:  200702
TIME STEP:  200703
TIME STEP:  200704
TIME STEP:  200705
TIME STEP:  200706
TIME STEP:  200707
TIME STEP:  200708
TIME STEP:  200709
TIME STEP:  200710
TIME STEP:  200711
TIME STEP:  200712
TIME STEP:  200801
TIME STEP:  200802
TIME STEP:  200803
TIME STEP:  200804
TIME STEP:  200805
TIME STEP:  200806
TIME STEP:  200807
TIME STEP:  200808
TIME STEP:  200809
TIME STEP:  200810
TIME STEP:  200811
TIME STEP:  200812
