In [None]:
import _path
from plot_results import *
from transition_probability_estimation import *
from Data_synthesize import *
from rw_data_processing import *
from scipy.optimize import leastsq
from matplotlib.patches import Rectangle
from pathlib import Path
import random
import seaborn
import queue
import scipy.stats as stats
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import powerlaw
%load_ext autoreload
%autoreload 2
plt.style.use("../rw_visualization.mplstyle")


In [None]:
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

# Color
current_palette = seaborn.color_palette()
current_palette


Check the distance of initial firefly so that we can turn gamma

In [None]:
# Load firefly initial guess
from scipy.spatial import distance_matrix
folder = Path('../firefly_result/Firefly_result_pop_size_100_alpha_1_betamin_1_gamma_0.131_max_generations_200')
file_name = 'firefly_result_first_initial_guess.txt'
with open(folder/file_name, 'r') as f:
    initial_guess_results = [[float(num)
                              for num in line.split(' ')] for line in f]

initial_guess_results = np.array(initial_guess_results)
initial_fireflies = initial_guess_results[:, 0:-1]
initial_costs = initial_guess_results[:, -1]

# Load bound
file_name = 'bound.txt'
# file_name = 'C:/Users/chee8/Desktop/to_be_deleted/bound.txt'
with open(folder/file_name, 'r') as f:
    bounds = [[float(num) for num in line.split(' ')] for line in f]
bounds = np.array(bounds)
lower_bound = bounds[0, :]
upper_bound = bounds[1, :]

normalized_initial_fireflies = (
    initial_fireflies - lower_bound)/(upper_bound-lower_bound)
normalized_initial_fireflies[np.isnan(normalized_initial_fireflies)] = 0

dist_matrix = distance_matrix(
    normalized_initial_fireflies, normalized_initial_fireflies)


In [None]:
# Note: How can we decide if a firefly can see another firely? The middle value of beta is 0.5.
# So we can probablly define that as threshold.
# We want a firefly can see 10%~20% of other fireflies. It will be 5%~10% for triangular matrix
gamma_array = np.arange(0.13, 0.15, 0.001)
for gamma in gamma_array:
    beta_matrix = np.exp(-gamma*dist_matrix)
    beta_upper_tri = beta_matrix[np.triu_indices(50, k=1)]
    if len(beta_upper_tri[beta_upper_tri >= 0.5])/len(beta_upper_tri) >= 0.05:
        print('Gamma: ', gamma)
        print('Proportion of visible fireflies: ', len(
            beta_upper_tri[beta_upper_tri >= 0.5])/len(beta_upper_tri)*2*100, '%')


In [None]:
gamma = 0.131
beta_matrix = np.exp(-gamma*dist_matrix)
beta_upper_tri = beta_matrix[np.triu_indices(50, k=1)]

plt.hist(beta_upper_tri)


Load best firefly result

In [None]:
file_name = 'firefly_best.txt'
with open(folder/file_name, 'r') as f:
    best_results = [[float(num) for num in line.split(' ')] for line in f]

firfly_number = np.shape(best_results)[0]

best_results = np.array(best_results)
best_iterations = best_results[:, 0]
best_fireflies = best_results[:, 1:-1]
normalized_best_fireflies = (
    best_fireflies - lower_bound)/(upper_bound-lower_bound)
normalized_best_fireflies[np.isnan(normalized_best_fireflies)] = 0
best_costs = best_results[:, -1]
best_cost_intex = np.argmin(best_costs)
best_cost = best_costs[best_cost_intex]
best_firefly = best_fireflies[best_cost_intex]
normalized_best_firefly = normalized_best_fireflies[best_cost_intex]


best_firefly_distances = np.array([])
for i in range(firfly_number):
    best_distance = np.linalg.norm(normalized_best_fireflies[i]-normalized_best_firefly)
    best_firefly_distances = np.append(best_firefly_distances, best_distance)


Load worst result

In [None]:
file_name = 'firefly_worst.txt'
with open(folder/file_name, 'r') as f:
    worst_results = [[float(num) for num in line.split(' ')] for line in f]

worst_results = np.array(worst_results)
worst_iterations = worst_results[:, 0]
worst_fireflies = worst_results[:, 1:-1]
normalized_worst_fireflies = (
    worst_fireflies - lower_bound)/(upper_bound-lower_bound)
normalized_worst_fireflies[np.isnan(normalized_worst_fireflies)] = 0
worst_costs = worst_results[:, -1]
worst_cost_intex = np.argmin(worst_costs)
worst_cost = worst_costs[worst_cost_intex]
worst_firefly = worst_fireflies[worst_cost_intex]
normalized_worst_firefly = normalized_worst_fireflies[worst_cost_intex]


worst_firefly_distances = np.array([])
for i in range(firfly_number):
    worst_distance = np.linalg.norm(normalized_worst_fireflies[i]-normalized_best_firefly)
    worst_firefly_distances = np.append(
        worst_firefly_distances, worst_distance)


Load initial firefly guess

In [None]:
file_name = 'firefly_result_first_initial_guess.txt'
with open(folder/file_name, 'r') as f:
    initial_guess_results = [[float(num)
                              for num in line.split(' ')] for line in f]

initial_guess_results = np.array(initial_guess_results)
initial_fireflies = initial_guess_results[:, 0:-1]
normalized_initial_fireflies = (
    initial_fireflies - lower_bound)/(upper_bound-lower_bound)
normalized_initial_fireflies[np.isnan(normalized_initial_fireflies)] = 0
initial_costs = initial_guess_results[:, -1]

initial_distances = np.array([])
for i in range(firfly_number):
    initial_distance = np.linalg.norm(normalized_initial_fireflies[i]-normalized_best_firefly)
    initial_distances = np.append(initial_distances, initial_distance)


Load final firefly result

In [None]:
file_name = 'firefly_result.txt'
with open(folder/file_name, 'r') as f:
    final_results = [[float(num) for num in line.split(' ')] for line in f]

final_results = np.array(final_results)
final_fireflies = final_results[:, 0:-1]

final_costs = final_results[:, -1]
normalized_final_fireflies = (
    final_fireflies - lower_bound)/(upper_bound-lower_bound)
normalized_final_fireflies[np.isnan(normalized_final_fireflies)] = 0

final_min_index = np.argmin(final_costs)
final_min_cost = final_costs[final_min_index]
final_min_firefly = final_fireflies[final_min_index, :]
normilized_final_min_firefly = normalized_final_fireflies[final_min_index, :]

final_distances = np.array([])
for i in range(firfly_number):
    final_distance = np.linalg.norm(normalized_final_fireflies[i]-normalized_best_firefly)
    final_distances = np.append(final_distances, final_distance)


In [11]:
def plot_firefly(ax, best_firefly_distances, best_costs, worst_firefly_distances,
                 worst_costs, initial_distances, initial_costs, final_distances, final_costs):
    # Plot initial guess
    ax.plot(initial_distances, initial_costs, '.',
            color=current_palette[2], label='Initial guess', markersize=10)

    # Plot worst result
    ax.plot(worst_firefly_distances, worst_costs,
            'kX', label='Worst fireflies', markersize=8)

    # Plot best result
    ax.plot(best_firefly_distances, best_costs, 'k^',
            label='Best fireflies', markersize=8)

    # Plot final result
    ax.plot(final_distances, final_costs, '.',
            color=current_palette[0], label='Final fireflies', markersize=10)

    # Plot arrow
    for i in range(firfly_number):
        if worst_iterations[i] < best_iterations[i]:
            # Initial to worst
            x = initial_distances[i]
            y = initial_costs[i]
            x_final = worst_firefly_distances[i]
            y_final = worst_costs[i]
            ax.plot([x, x_final], [y, y_final], 'k', alpha=0.2, linewidth=1)
            # worst to best
            x = worst_firefly_distances[i]
            y = worst_costs[i]
            x_final = best_firefly_distances[i]
            y_final = best_costs[i]
            ax.plot([x, x_final], [y, y_final], 'k', alpha=0.2, linewidth=1)
            # best to final
            x = best_firefly_distances[i]
            y = best_costs[i]
            x_final = final_distances[i]
            y_final = final_costs[i]
            ax.plot([x, x_final], [y, y_final], 'k', alpha=0.2, linewidth=1)
        elif worst_iterations[i] > best_iterations[i]:
            # Initial to best
            x = initial_distances[i]
            y = initial_costs[i]
            x_final = best_firefly_distances[i]
            y_final = best_costs[i]
            ax.plot([x, x_final], [y, y_final], 'k', alpha=0.2, linewidth=1)
            # best to worst
            x = best_firefly_distances[i]
            y = best_costs[i]
            x_final = worst_firefly_distances[i]
            y_final = worst_costs[i]
            ax.plot([x, x_final], [y, y_final], 'k', alpha=0.2, linewidth=1)
            # worst to final
            x = worst_firefly_distances[i]
            y = worst_costs[i]
            x_final = final_distances[i]
            y_final = final_costs[i]
            ax.plot([x, x_final], [y, y_final], 'k', alpha=0.2, linewidth=1)
        else:
            print('Iteration error')

    # Plot the best arrow
    if worst_iterations[best_cost_intex] < best_iterations[best_cost_intex]:
        # Initial to worst
        x = initial_distances[best_cost_intex]
        y = initial_costs[best_cost_intex]
        x_final = worst_firefly_distances[best_cost_intex]
        y_final = worst_costs[best_cost_intex]
        ax.plot([x, x_final], [y, y_final], 'k', linewidth=1)
        # worst to best
        x = worst_firefly_distances[best_cost_intex]
        y = worst_costs[best_cost_intex]
        x_final = best_firefly_distances[best_cost_intex]
        y_final = best_costs[best_cost_intex]
        ax.plot([x, x_final], [y, y_final], 'k', linewidth=1)
        # best to final
        x = best_firefly_distances[best_cost_intex]
        y = best_costs[best_cost_intex]
        x_final = final_distances[best_cost_intex]
        y_final = final_costs[best_cost_intex]
        ax.plot([x, x_final], [y, y_final], 'k', linewidth=1)


In [None]:
best_costs

In [None]:
# Plot best result
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(1, 1, 1)
plot_firefly(ax, best_firefly_distances, best_costs, worst_firefly_distances,
             worst_costs, initial_distances, initial_costs, final_distances, final_costs)
ax.legend(loc='best', shadow=True, fancybox=True, numpoints=1)
ax.set_xlabel('Distance from the best firefly')
ax.set_ylabel('Cost')
ax.add_patch(Rectangle((-0.1, -1), 0.3, 2,
                       edgecolor='Red',
                       facecolor='None',
                       fill=True,
                       lw=1))
ax.set_xlim([-1, 7])

# Plot zoomin
ax2 = plt.axes([0.18, 0.45, 0.18, 0.1]) # left, bottom, width, height
plot_firefly(ax2, best_firefly_distances, best_costs, worst_firefly_distances,
             worst_costs, initial_distances, initial_costs, final_distances, final_costs)
ax2.set_xlim([-0.05, 0.1])
ax2.set_xticks([0, 0.1])
ax2.set_ylim([-1, 1])
ax2.set_yticks([-1, 1])
ax2.minorticks_off()
# ax2.set_yticks([-50000, 2])

ax2.tick_params(axis='x', colors='red')    
ax2.tick_params(axis='y', colors='red') 
ax2.spines['left'].set_color('red')
ax2.spines['top'].set_color('red') 
ax2.spines['right'].set_color('red')
ax2.spines['bottom'].set_color('red') 

# plt.savefig('RW2022_cost_vs_distance.pdf')

Check how many parameters hit the boundary

In [14]:
# Load bound
file_name = 'bound.txt'
with open(folder/file_name, 'r') as f:
    bounds = [[float(num) for num in line.split(' ')] for line in f]
bounds = np.array(bounds)
lower_bound = bounds[0, :]
upper_bound = bounds[1, :]


In [15]:
# Load course of disease data
parameter_path = Path('../variable')
course_parameters = np.load(parameter_path/'course_parameters.npy')
course_parameters_lb = np.load(parameter_path/'course_parameters_lb.npy')
course_parameters_ub = np.load(parameter_path/'course_parameters_ub.npy')
# Extend lb and hb

lower_bound = np.hstack(
        (lower_bound, course_parameters_lb))
upper_bound = np.hstack(
        (upper_bound, course_parameters_ub))


In [None]:
# Print the index if the best firfly hit the boundary
lower_bound_hit_index = []
upper_bound_hit_index = []
for i in range(np.shape(best_fireflies)[0]):
    for j in range(np.shape(best_fireflies)[1]):
        if lower_bound[j] == best_fireflies[i, j]:
            lower_bound_hit_index.append(j)
        elif upper_bound[j] == best_fireflies[i, j]:
            upper_bound_hit_index.append(j)

# print('Hit the lower bound: ', lower_bound_hit_index)
# print('Hit the upper bound: ', upper_bound_hit_index)

# Plot the histogram for boundary hit
if lower_bound_hit_index:
    plt.figure()
    plt.hist(lower_bound_hit_index, bins=np.arange(max(lower_bound_hit_index))+1)
    plt.xlabel('Parameter index')
    plt.ylabel('Hit frequency of lower bound')
    print('Hit the lower bound: ', np.unique(lower_bound_hit_index))
    plt.minorticks_on()
    plt.tight_layout()
    plt.show()
else:
    print('No hit')

if upper_bound_hit_index:
    plt.figure()
    _ = plt.hist(upper_bound_hit_index, bins=np.arange(max(upper_bound_hit_index))+1)
    print('Hit the upper bound: ', np.unique(upper_bound_hit_index))
    plt.xlabel('Parameter index')
    plt.ylabel('Hit frequency of upper bound')
    plt.minorticks_on()
    plt.tight_layout()
    plt.show()
else:
    print('No hit')
# Index 1: Household consecutive daily contact probability
# Index 2: Household contact probability when health
# Index 3: Household contact probability when symptomatic
# Index 4: Household steepness
# Index 10: School contact probability when symptomatic
# Index 12: School phase relative to symptom-onset for symptomatic
# Index 23: Health care healthy p
# Index 30: municipality healthy_p
# Index 35: overdispersion_rate
# Index 37: latent_period_shape
# Index 38: latend period scale
# Index 42: incubation_period_scale
# Index 48: asymptomatic_to_recovered_loc
# Index 58: infection_to_death_shape
# Index 63:67: age_risk_ratios
# Index 68: vaccination_rate

Test tSNE

In [17]:
from sklearn import manifold, datasets


In [18]:
# Load bound
file_name = 'bound.txt'
with open(folder/file_name, 'r') as f:
    bounds = [[float(num) for num in line.split(' ')] for line in f]
bounds = np.array(bounds)
lower_bound = bounds[0, :]
upper_bound = bounds[1, :]

In [None]:
file_name = 'firefly_result.txt'
with open(folder/file_name, 'r') as f:
    final_results = [[float(num) for num in line.split(' ')] for line in f]

final_results = np.array(final_results)
final_fireflies = final_results[:, 0:-1]
normalized_final_fireflies = (
    final_fireflies - lower_bound)/(upper_bound-lower_bound)
normalized_final_fireflies[np.isnan(normalized_final_fireflies)] = 0


In [20]:
X = normalized_final_fireflies


In [21]:
n_neighbors = 10
n_components = 2
tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0)
Y = tsne.fit_transform(X)


In [None]:
plt.scatter(Y[:, 0], Y[:, 1])


Test distance matrix

In [23]:
from scipy.spatial import distance_matrix
dist_mat = distance_matrix(X, X)


In [24]:
bins = 100
histogram_matrix = np.empty([0, bins])
for i in range(len(dist_mat)):
    histogram_matrix = np.vstack(
        (histogram_matrix, np.histogram(dist_mat[i, :], bins=bins)[0]))


In [25]:
import seaborn as sns


Rank the distance matrix of final fireflies

In [26]:
from scipy.spatial.distance import squareform
from fastcluster import linkage


In [27]:
dist_mat = distance_matrix(
    normalized_final_fireflies, normalized_final_fireflies)

In [None]:
sns.heatmap(dist_mat)


In [29]:
def seriation(Z, N, cur_index):
    '''
        input:
            - Z is a hierarchical tree (dendrogram)
            - N is the number of points given to the clustering process
            - cur_index is the position in the tree for the recursive traversal
        output:
            - order implied by the hierarchical tree Z

        seriation computes the order implied by a hierarchical tree (dendrogram)
    '''
    if cur_index < N:
        return [cur_index]
    else:
        left = int(Z[cur_index-N, 0])
        right = int(Z[cur_index-N, 1])
    return (seriation(Z, N, left) + seriation(Z, N, right))


In [30]:
def compute_serial_matrix(dist_mat, method="ward"):
    '''
        input:
            - dist_mat is a distance matrix
            - method = ["ward","single","average","complete"]
        output:
            - seriated_dist is the input dist_mat,
              but with re-ordered rows and columns
              according to the seriation, i.e. the
              order implied by the hierarchical tree
            - res_order is the order implied by
              the hierarhical tree
            - res_linkage is the hierarhical tree (dendrogram)

        compute_serial_matrix transforms a distance matrix into 
        a sorted distance matrix according to the order implied 
        by the hierarchical tree (dendrogram)
    '''
    N = len(dist_mat)
    flat_dist_mat = squareform(dist_mat)
    res_linkage = linkage(flat_dist_mat, method=method, preserve_input=True)
    res_order = seriation(res_linkage, N, N + N-2)
    seriated_dist = np.zeros((N, N))
    a, b = np.triu_indices(N, k=1)
    seriated_dist[a, b] = dist_mat[[res_order[i]
                                    for i in a], [res_order[j] for j in b]]
    seriated_dist[b, a] = seriated_dist[a, b]

    return seriated_dist, res_order, res_linkage


In [None]:
# fig = plt.figure(figsize=(14, 10))
# fig = plt.figure(figsize=(14*2, 10*2))
# ax = fig.add_subplot(1, 1, 1)
sorted_dist_mat, res_order, res_linkage = compute_serial_matrix(dist_mat)
sns.heatmap(sorted_dist_mat, yticklabels=res_order, xticklabels=res_order, cbar_kws={'label': 'Euclidean distance'})
ax.set_xlabel('Firefly index')
ax.set_ylabel('Firefly index')
# plt.savefig('RW2022_Firefly_distance_matrix.pdf')


Heatmap of best fireflies

In [32]:
dist_mat = distance_matrix(
    normalized_best_fireflies, normalized_best_fireflies)

In [None]:
# fig = plt.figure(figsize=(14, 10))
ax = fig.add_subplot(1, 1, 1)
sorted_dist_mat, res_order, res_linkage = compute_serial_matrix(dist_mat)
sns.heatmap(sorted_dist_mat, yticklabels=res_order, xticklabels=res_order, cbar_kws={'label': 'Euclidean distance'})
ax.set_xlabel('Firefly index')
ax.set_ylabel('Firefly index')
# plt.savefig('RW2022_Firefly_distance_matrix.pdf')

# Check each category's cost

In [34]:
import _path
from firefly_optimizer import *

In [35]:
with open('../variable/demographic_parameters.pkl', 'rb') as f:
        demographic_parameters = pickle.load(f)

In [36]:
def cost_function(P, demographic_parameters, max_workers):
    source_case_number = 100
    repeat_number = 1
    seeds = range(source_case_number * repeat_number)

    # start_t = time.time()
    # Parallel processing
    # Multiprocessing
    with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
        seeds_copy = copy.deepcopy(seeds)
        P_copy = copy.deepcopy(P)
        demographic_parameters_copy = copy.deepcopy(demographic_parameters)
        results = [executor.submit(run_covid, seeds_copy[i], P_copy, demographic_parameters_copy, save_file=False)
                   for i in seeds_copy]

    # print('len results', len(results))
    # results = []
    # for i in seeds:
    #     result = run_covid(seeds[i], P, demographic_parameters, save_file=False)
    #     print(result)
    #     results.append(result)

    # Constants
    Cheng_contact_array = np.array([[100, 39, 6, 4, 2, 0],
                                   [236, 150, 38, 17, 110, 146],
                                   [399, 678, 172, 98, 337, 138]])
    max_Cheng_contact = np.max(Cheng_contact_array)
    norm_Cheng_contact_array = Cheng_contact_array/max_Cheng_contact

    Cheng_attack_rate = np.array([[4, 5.1, 16.7, 0, 0, 0],
                                 [0.8, 2, 2.6, 0, 0, 0],
                                 [0, 0, 0.6, 0, 0, 0]])/100
    max_Cheng_attack_rate = np.max(Cheng_attack_rate)
    norm_Cheng_attack_rate = Cheng_attack_rate/max_Cheng_attack_rate

    # weights = np.array([[12.19512195, 6.49350649, 1.87265918, 2.04081633, 1.52207002, 1],
    #                     [35.71428571, 20, 7.69230769,
    #                         5.43478261, 30.3030303, 38.46153846],
    #                     [100, 166.66666667, 31.25, 23.80952381, 90.90909091, 30.3030303]])
    # max_weights = np.max(weights)
    # norm_weights = weights/max_weights
    # weights can be found in plot_compare_previous_studies.ipynb
    norm_weights = np.array([[1, 0.53246753, 0.15355805, 0.16734694, 0.12480974, 0.082],
                             [0.92857143, 0.52, 0.2, 0.14130435, 0.78787879, 1],
                             [0.6, 1, 0.1875, 0.14285714, 0.54545455, 0.18181818]])

    layers = ['Household', 'Health care', 'Cheng others']

    costs = []
    # print('Time spend before loop: ', time.time() - start_t)
    # start_t = time.time()
    for i, layer in enumerate(layers):
        contact_array, infection_array = generate_contact_result(
            results, layer=layer, case_number=source_case_number * repeat_number)
        contact_array = contact_array.astype(float)
        norm_contact_array = contact_array/max_Cheng_contact
        infection_array = infection_array.astype(float)
        attack_rate = np.divide(infection_array, contact_array, out=np.zeros_like(
            infection_array), where=contact_array != 0)
        norm_attack_rate = attack_rate/max_Cheng_attack_rate
        norm_Cheng_data = norm_Cheng_contact_array[i]
        norm_Cheng_attack = norm_Cheng_attack_rate[i]

        # Calculate costs
        if layer == 'Health care':
            health_care_weights = np.array([1, 1, 1, 1, 2, 2])
            cost = np.sum(
                (((norm_contact_array / repeat_number -
                 norm_Cheng_data)*health_care_weights) ** 2))
        else:
            cost = np.sum(
                ((norm_contact_array / repeat_number - norm_Cheng_data) ** 2))
        attack_rate_cost = np.nansum(
            ((norm_attack_rate - norm_Cheng_attack) * norm_weights[i]) ** 2)
        # if layer == 'Household':
        #     print('Layer: ', layer)
        #     print('norm_Cheng_attack: ', norm_Cheng_attack)
        #     print('norm_attack_rate: ', norm_attack_rate)
        #     print('attack_rate_cost: ', attack_rate_cost)
        #     print()
        # print('Attack rate cost: ', attack_rate_cost)
        total_cost = cost + attack_rate_cost
        costs.append(total_cost)
    # print('Time spend in loop: ', time.time() - start_t)
    # start_t = time.time()
    # Energy distance
    taiwan_data_matrix = np.load('../variable/Taiwan_data_matrix.npy')
    # taiwan_data_matrix = np.load('../variable/Taiwan_data_matrix_full.npy')
    synthetic_data_matrix = convert_synthetic_data_to_test_matrix(
        results, taiwan_data_matrix, source_case_number * repeat_number)
    p_value, energy_cost, _ = estat(
        taiwan_data_matrix, synthetic_data_matrix, nboot=100)
    print('Energy cost: ', energy_cost)
    print('p value: ', p_value)
    # energy_cost = max(energy_cost, 0)  # Prevent negative energy cost
    # Objective function
    # print('Energy cost: ', energy_cost)
    energy_weight = 1
    total_cost = sum(costs) + energy_weight * energy_cost
    # print('Total cost: ', total_cost)
    # print()

    # print(f'Household Cost: {costs[0]:.4f}')
    # print(f'Health Care Cost: {costs[1]:.4f}')
    # print(f'Cheng Others Cost: {costs[2]:.4f}')
    # print(f'Energy Cost: {energy_cost:.4f}')
    # print('Total cost: ', total_cost)
    # print()
    # print('Time spend in cost: ', time.time() - start_t)
    return costs[0], costs[1], costs[2], energy_cost

In [None]:
# Check the energy distance and p value
# Manually set the nboot=10000. Remember to change it back to 100 for the following code
cost_function(best_firefly, demographic_parameters, 4)

In [None]:
household_cost_init = []
health_care_cost_init = []
cheng_others_cost_init = []
energy_cost_init = []
for i in range(len(initial_fireflies)):
    household_cost, health_care_cost, cheng_others_cost, energy_cost = cost_function(initial_fireflies[i], demographic_parameters, 4)
    household_cost_init.append(household_cost)
    health_care_cost_init.append(health_care_cost)
    cheng_others_cost_init.append(cheng_others_cost)
    energy_cost_init.append(energy_cost)

In [None]:
household_cost_worst = []
health_care_cost_worst = []
cheng_others_cost_worst = []
energy_cost_worst = []
for i in range(len(initial_fireflies)):
    household_cost, health_care_cost, cheng_others_cost, energy_cost = cost_function(worst_fireflies[i], demographic_parameters, 4)
    household_cost_worst.append(household_cost)
    health_care_cost_worst.append(health_care_cost)
    cheng_others_cost_worst.append(cheng_others_cost)
    energy_cost_worst.append(energy_cost)

In [None]:
household_cost_best = []
health_care_cost_best = []
cheng_others_cost_best = []
energy_cost_best = []
for i in range(len(initial_fireflies)):
    household_cost, health_care_cost, cheng_others_cost, energy_cost = cost_function(best_fireflies[i], demographic_parameters, 4)
    household_cost_best.append(household_cost)
    health_care_cost_best.append(health_care_cost)
    cheng_others_cost_best.append(cheng_others_cost)
    energy_cost_best.append(energy_cost)

In [None]:
household_cost_final = []
health_care_cost_final = []
cheng_others_cost_final = []
energy_cost_final = []
for i in range(len(initial_fireflies)):
    household_cost, health_care_cost, cheng_others_cost, energy_cost = cost_function(final_fireflies[i], demographic_parameters, 4)
    household_cost_final.append(household_cost)
    health_care_cost_final.append(health_care_cost)
    cheng_others_cost_final.append(cheng_others_cost)
    energy_cost_final.append(energy_cost)

In [None]:
for i in range(len(initial_fireflies)):
    plt.plot(range(4), [household_cost_init[i], household_cost_worst[i], household_cost_best[i], household_cost_final[i]], '*--')

plt.plot(range(4), [np.mean(household_cost_init), np.mean(household_cost_worst), np.mean(household_cost_best), np.mean(household_cost_final)], 'k^-', markersize=10)
plt.xticks(range(4), ['Initial', 'Worst', 'Best', 'Final'])
plt.ylabel('Househod cost')
plt.yscale('log')

plt.savefig('RW2024_household_cost.pdf', bbox_inches='tight')

In [None]:
for i in range(len(initial_fireflies)):
    plt.plot(range(4), [health_care_cost_init[i], health_care_cost_worst[i], health_care_cost_best[i], health_care_cost_final[i]], '*--')

plt.plot(range(4), [np.mean(health_care_cost_init), np.mean(health_care_cost_worst), np.mean(health_care_cost_best), np.mean(health_care_cost_final)], 'k^-', markersize=10)
plt.xticks(range(4), ['Initial', 'Worst', 'Best', 'Final'])
plt.ylabel('Health care cost')
plt.yscale('log')

plt.savefig('RW2024_health_care_cost.pdf', bbox_inches='tight')

In [None]:
for i in range(len(initial_fireflies)):
    plt.plot(range(4), [cheng_others_cost_init[i], cheng_others_cost_worst[i], cheng_others_cost_best[i], cheng_others_cost_final[i]], '*--')

plt.plot(range(4), [np.mean(cheng_others_cost_init), np.mean(cheng_others_cost_worst), np.mean(cheng_others_cost_best), np.mean(cheng_others_cost_final)], 'k^-', markersize=10)
plt.xticks(range(4), ['Initial', 'Worst', 'Best', 'Final'])
plt.ylabel('Others cost')
plt.yscale('log')

plt.savefig('RW2024_other_cost.pdf', bbox_inches='tight')

In [None]:
for i in range(len(initial_fireflies)):
    plt.plot(range(4), [energy_cost_init[i], energy_cost_worst[i], energy_cost_best[i], energy_cost_final[i]], '*--')

plt.plot(range(4), [np.mean(energy_cost_init), np.mean(energy_cost_worst), np.mean(energy_cost_best), np.mean(energy_cost_final)], 'k^-', markersize=10)

plt.xticks(range(4), ['Initial', 'Worst', 'Best', 'Final'])
plt.ylabel('Energy cost')
# plt.yscale('log')
plt.savefig('RW2024_energy_cost.pdf', bbox_inches='tight')

In [46]:
all_cost_init = np.array(household_cost_init) + np.array(health_care_cost_init) + np.array(cheng_others_cost_init) + np.array(energy_cost_init)
all_cost_worst = np.array(household_cost_worst) + np.array(health_care_cost_worst) + np.array(cheng_others_cost_worst) + np.array(energy_cost_worst)
all_cost_best = np.array(household_cost_best) + np.array(health_care_cost_best) + np.array(cheng_others_cost_best) + np.array(energy_cost_best)
all_cost_final = np.array(household_cost_final) + np.array(health_care_cost_final) + np.array(cheng_others_cost_final) + np.array(energy_cost_final)

In [None]:
for i in range(len(initial_fireflies)):
    plt.plot(range(4), [all_cost_init[i], all_cost_worst[i], all_cost_best[i], all_cost_final[i]], '*--')

plt.plot(range(4), [np.mean(all_cost_init), np.mean(all_cost_worst), np.mean(all_cost_best), np.mean(all_cost_final)], 'k^-', markersize=10)
plt.xticks(range(4), ['Initial', 'Worst', 'Best', 'Final'])
plt.ylabel('All cost')
# plt.yscale('log')

plt.savefig('RW2024_all_cost.pdf', bbox_inches='tight')

## Also test the all Taiwan matrix

In [48]:
def cost_function(P, demographic_parameters, max_workers):
    source_case_number = 100
    repeat_number = 1
    seeds = range(source_case_number * repeat_number)

    # start_t = time.time()
    # Parallel processing
    # Multiprocessing
    with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
        seeds_copy = copy.deepcopy(seeds)
        P_copy = copy.deepcopy(P)
        demographic_parameters_copy = copy.deepcopy(demographic_parameters)
        results = [executor.submit(run_covid, seeds_copy[i], P_copy, demographic_parameters_copy, save_file=False)
                   for i in seeds_copy]

    # print('len results', len(results))
    # results = []
    # for i in seeds:
    #     result = run_covid(seeds[i], P, demographic_parameters, save_file=False)
    #     print(result)
    #     results.append(result)

    # Constants
    Cheng_contact_array = np.array([[100, 39, 6, 4, 2, 0],
                                   [236, 150, 38, 17, 110, 146],
                                   [399, 678, 172, 98, 337, 138]])
    max_Cheng_contact = np.max(Cheng_contact_array)
    norm_Cheng_contact_array = Cheng_contact_array/max_Cheng_contact

    Cheng_attack_rate = np.array([[4, 5.1, 16.7, 0, 0, 0],
                                 [0.8, 2, 2.6, 0, 0, 0],
                                 [0, 0, 0.6, 0, 0, 0]])/100
    max_Cheng_attack_rate = np.max(Cheng_attack_rate)
    norm_Cheng_attack_rate = Cheng_attack_rate/max_Cheng_attack_rate

    # weights = np.array([[12.19512195, 6.49350649, 1.87265918, 2.04081633, 1.52207002, 1],
    #                     [35.71428571, 20, 7.69230769,
    #                         5.43478261, 30.3030303, 38.46153846],
    #                     [100, 166.66666667, 31.25, 23.80952381, 90.90909091, 30.3030303]])
    # max_weights = np.max(weights)
    # norm_weights = weights/max_weights
    # weights can be found in plot_compare_previous_studies.ipynb
    norm_weights = np.array([[1, 0.53246753, 0.15355805, 0.16734694, 0.12480974, 0.082],
                             [0.92857143, 0.52, 0.2, 0.14130435, 0.78787879, 1],
                             [0.6, 1, 0.1875, 0.14285714, 0.54545455, 0.18181818]])

    layers = ['Household', 'Health care', 'Cheng others']

    costs = []
    # print('Time spend before loop: ', time.time() - start_t)
    # start_t = time.time()
    for i, layer in enumerate(layers):
        contact_array, infection_array = generate_contact_result(
            results, layer=layer, case_number=source_case_number * repeat_number)
        contact_array = contact_array.astype(float)
        norm_contact_array = contact_array/max_Cheng_contact
        infection_array = infection_array.astype(float)
        attack_rate = np.divide(infection_array, contact_array, out=np.zeros_like(
            infection_array), where=contact_array != 0)
        norm_attack_rate = attack_rate/max_Cheng_attack_rate
        norm_Cheng_data = norm_Cheng_contact_array[i]
        norm_Cheng_attack = norm_Cheng_attack_rate[i]

        # Calculate costs
        if layer == 'Health care':
            health_care_weights = np.array([1, 1, 1, 1, 2, 2])
            cost = np.sum(
                (((norm_contact_array / repeat_number -
                 norm_Cheng_data)*health_care_weights) ** 2))
        else:
            cost = np.sum(
                ((norm_contact_array / repeat_number - norm_Cheng_data) ** 2))
        attack_rate_cost = np.nansum(
            ((norm_attack_rate - norm_Cheng_attack) * norm_weights[i]) ** 2)
        # if layer == 'Household':
        #     print('Layer: ', layer)
        #     print('norm_Cheng_attack: ', norm_Cheng_attack)
        #     print('norm_attack_rate: ', norm_attack_rate)
        #     print('attack_rate_cost: ', attack_rate_cost)
        #     print()
        # print('Attack rate cost: ', attack_rate_cost)
        total_cost = cost + attack_rate_cost
        costs.append(total_cost)
    # print('Time spend in loop: ', time.time() - start_t)
    # start_t = time.time()
    # Energy distance
    taiwan_data_matrix = np.load('../variable/Taiwan_data_matrix_full.npy')
    # taiwan_data_matrix = np.load('../variable/Taiwan_data_matrix_full.npy')
    synthetic_data_matrix = convert_synthetic_data_to_test_matrix(
        results, taiwan_data_matrix, source_case_number * repeat_number)
    p_value, energy_cost, _ = estat(
        taiwan_data_matrix, synthetic_data_matrix, nboot=100)
    print('Energy cost: ', energy_cost)
    print('p value: ', p_value)
    # energy_cost = max(energy_cost, 0)  # Prevent negative energy cost
    # Objective function
    # print('Energy cost: ', energy_cost)
    energy_weight = 1
    total_cost = sum(costs) + energy_weight * energy_cost
    # print('Total cost: ', total_cost)
    # print()

    # print(f'Household Cost: {costs[0]:.4f}')
    # print(f'Health Care Cost: {costs[1]:.4f}')
    # print(f'Cheng Others Cost: {costs[2]:.4f}')
    # print(f'Energy Cost: {energy_cost:.4f}')
    # print('Total cost: ', total_cost)
    # print()
    # print('Time spend in cost: ', time.time() - start_t)
    return costs[0], costs[1], costs[2], energy_cost

In [None]:
# Check the energy distance and p value
# Manually set the nboot=10000. Remember to change it back to 100 for the following code
cost_function(best_firefly, demographic_parameters, 4)

In [None]:
energy_cost_init = []
for i in range(len(initial_fireflies)):
    _, _, _, energy_cost = cost_function(initial_fireflies[i], demographic_parameters, 4)
    energy_cost_init.append(energy_cost)

energy_cost_worst = []
for i in range(len(initial_fireflies)):
    _, _, _, energy_cost = cost_function(worst_fireflies[i], demographic_parameters, 4)
    energy_cost_worst.append(energy_cost)

energy_cost_best = []
for i in range(len(initial_fireflies)):
    _, _, _, energy_cost = cost_function(best_fireflies[i], demographic_parameters, 4)
    energy_cost_best.append(energy_cost)

energy_cost_final = []
for i in range(len(initial_fireflies)):
    _, _, _, energy_cost = cost_function(final_fireflies[i], demographic_parameters, 4)
    energy_cost_final.append(energy_cost)

In [None]:
for i in range(len(initial_fireflies)):
    plt.plot(range(4), [energy_cost_init[i], energy_cost_worst[i], energy_cost_best[i], energy_cost_final[i]], '*--')

plt.plot(range(4), [np.mean(energy_cost_init), np.mean(energy_cost_worst), np.mean(energy_cost_best), np.mean(energy_cost_final)], 'k^-', markersize=10)

plt.xticks(range(4), ['Initial', 'Worst', 'Best', 'Final'])
plt.ylabel('Energy cost (full Taiwan data)')
# plt.yscale('log')
plt.savefig('RW2024_energy_cost_all.pdf', bbox_inches='tight')