From 3392b29bfb34f8012d602f1a75c3f25346012c55 Mon Sep 17 00:00:00 2001 From: Akshay Patel Date: Mon, 14 Sep 2020 08:36:21 -0400 Subject: [PATCH] plotting quebec and multiple sims --- src/covid19sim/configs/simulation/config.yaml | 0 .../plotting/compare_real_and_sim.py | 83 ++++++++++--------- 2 files changed, 44 insertions(+), 39 deletions(-) mode change 100755 => 100644 src/covid19sim/configs/simulation/config.yaml diff --git a/src/covid19sim/configs/simulation/config.yaml b/src/covid19sim/configs/simulation/config.yaml old mode 100755 new mode 100644 diff --git a/src/covid19sim/plotting/compare_real_and_sim.py b/src/covid19sim/plotting/compare_real_and_sim.py index 78f7c1c25..f971bfeff 100644 --- a/src/covid19sim/plotting/compare_real_and_sim.py +++ b/src/covid19sim/plotting/compare_real_and_sim.py @@ -8,23 +8,13 @@ # Constants quebec_population = 8485000 -csv_path = "COVID19Tracker.ca Data - QC.csv" -# sim_dir_path = "output/sim_v2_people-20000_days-120_init-0.005_uptake--1.0_seed-0_20200909-143542_235853" -sim_dir_path = "output/sim_v2_people-30000_days-120_init-0.005_uptake--1.0_seed-0_20200909-175946_676450" -sim_priors_path = os.path.join(sim_dir_path, "train_priors.pkl") -# sim_tracker_path = os.path.join(sim_dir_path, "tracker_data_n_100_seed_0_20200716-101538_.pkl") -# ./output/sim_v2_people-5000_days-30_init-0.05_uptake--1.0_seed-0_20200831-105018_963000 -sim_tracker_path = os.path.join(sim_dir_path, "tracker_data_n_30000_seed_0_20200909-232724.pkl") - -plot_real = False +csv_path = "path/to/csv" +sims_dir_path = "path/to/simulations/" # Load data qc_data = pd.read_csv(csv_path) -sim_tracker_data = pickle.load(open(sim_tracker_path, "rb")) -sim_prior_data = pickle.load(open(sim_priors_path, "rb")) # Utility Functions - def parse_tracker(sim_tracker_data): sim_pop = sim_tracker_data['n_humans'] dates = [] @@ -57,23 +47,44 @@ def parse_tracker(sim_tracker_data): return dates, daily_deaths_prop, tests, cases -# Parse data -sim_dates, sim_deaths, sim_tests, sim_cases = parse_tracker(sim_tracker_data) -sim_hospitalizations = [float(x)*100/sim_tracker_data['n_humans'] for x in sim_prior_data['hospitalization_per_day']] - +all_sim_cases = [] +all_sim_hospitalizations = [] +all_sim_deaths = [] + +for sim in os.listdir(sims_dir_path): + # Get the paths + sim_path = os.path.join(sims_dir_path, sim) + sim_priors_path = os.path.join(sim_path, "train_priors.pkl") + sim_tracker_name = [str(f_name) for f_name in os.listdir(sim_path) if f_name.startswith("tracker_data")][0] + sim_tracker_path = os.path.join(sim_path, sim_tracker_name) + # Load the data + sim_tracker_data = pickle.load(open(sim_tracker_path, "rb")) + sim_prior_data = pickle.load(open(sim_priors_path, "rb")) + # Parse data + sim_dates, sim_deaths, sim_tests, sim_cases = parse_tracker(sim_tracker_data) + sim_hospitalizations = [float(x)*100/sim_tracker_data['n_humans'] for x in sim_prior_data['hospitalization_per_day']] + # change key above in sim_prior_data['hospital_usage_per_day'] + + all_sim_cases.append(sim_cases) + all_sim_hospitalizations.append(sim_hospitalizations) + all_sim_deaths.append(sim_deaths) + +avg_sim_cases = [sum(elem)/len(elem) for elem in zip(*all_sim_cases)] +avg_sim_hospitalizations = [sum(elem)/len(elem) for elem in zip(*all_sim_hospitalizations)] +avg_sim_deaths = [sum(elem)/len(elem) for elem in zip(*all_sim_deaths)] # Plot cases +last_index = 63 # index of last day of Quebec data, use 63 for 30 days fig, ax = plt.subplots(figsize=(7.5, 7.5)) -real_dates = qc_data.loc[34:133, 'dates'].to_numpy() -real_cases = [100 * float(x if str(x) != "nan" else 0) / quebec_population for x in qc_data.loc[34:133, 'change_cases']] -real_hospitalizations = [100 * float(x if str(x) != "nan" else 0) / quebec_population for x in qc_data.loc[34:133, 'total_hospitalizations']] -real_deaths = [100 * float(x if str(x) != "nan" else 0) / quebec_population for x in qc_data.loc[34:133,'change_fatalities']] -real_tests = [100 * float(x if str(x) != "nan" else 0) / quebec_population for x in qc_data.loc[34:133, 'change_tests']] +real_dates = qc_data.loc[34:last_index, 'dates'].to_numpy() +real_cases = [100 * float(x if str(x) != "nan" else 0) / quebec_population for x in qc_data.loc[34:last_index, 'change_cases']] +# change key to 'total_hospitalizations' if needed +real_hospitalizations = [100 * float(x if str(x) != "nan" else 0) / quebec_population for x in qc_data.loc[34:last_index, 'change_hospitalizations']] +real_deaths = [100 * float(x if str(x) != "nan" else 0) / quebec_population for x in qc_data.loc[34:last_index,'change_fatalities']] ax.plot(real_dates, real_cases, label="Diagnoses per day") -ax.plot(real_dates, real_hospitalizations, label="Hospital utilization") +ax.plot(real_dates, real_hospitalizations, label="Hospital admissions") ax.plot(real_dates, real_deaths, label="Mortalities per day") -# ax.plot(real_dates, real_tests, label="QC Tests (per Day)") ax.legend() plt.ylabel("Percentage of Population") @@ -85,23 +96,19 @@ def parse_tracker(sim_tracker_data): # Plot deaths and hospitalizations fig, ax = plt.subplots(figsize=(7.5, 7.5)) -ax.set_ylim([0.,0.03]) -sim_cases = np.array(sim_cases[1:])*.1 # adjust for unknown cases - -ax.plot(sim_dates, sim_cases, label="Diagnoses per day") -ax.plot(sim_dates, sim_hospitalizations[1:], label="Hospital admissions per day") -ax.plot(sim_dates, sim_deaths, label="Mortalities per day") +# ax.set_ylim([0.,0.03]) +avg_sim_cases = np.array(sim_cases[1:])*.1 # adjust for unknown cases -# ax.plot(list(sim_tests.keys()), list(sim_tests.values()), label="Simulated Tests (per Day)") - -# Goodness of Fit ''' -deaths_fit = stats.chisquare(sim_deaths, real_deaths[1:]) -hospitalizations_fit = stats.chisquare(sim_hospitalizations, real_hospitalizations) -cases_fit = stats.chisquare(sim_cases, real_cases) -tests_fit = stats.chisquare([sim_tests[x] for x in sim_tests], real_tests[2:]) +ax.errorbar(np.array(sim_dates), avg_sim_cases.mean(axis=0), yerr=avg_sim_cases.std(axis=0), label="Diagnoses per day") +# change caption below if using 'total_hospitalizations' +ax.errorbar(sim_dates, avg_sim_hospitalizations.mean(axis=0)[1:], yerr=avg_sim_hospitalizations.std(axis=0)[1:], label="Hospital admissions") +ax.errorbar(sim_dates, avg_sim_deaths.mean(axis=0), yerr=avg_sim_deaths.std(axis=0), label="Mortalities per day") ''' -# fit_caption = "Chi-Square Goodness of Fit Test Results\nMortalities: {}\nHospitalizations: {}\nCases: {}\nTests: {}".format(deaths_fit, hospitalizations_fit, cases_fit, tests_fit) + +ax.plot(sim_dates, avg_sim_cases, label="Diagnoses per day") +ax.plot(sim_dates, avg_sim_hospitalizations[1:], label="Hospital admissions") +ax.plot(sim_dates, avg_sim_deaths, label="Mortalities per day") ax.legend() plt.ylabel("Percentage of Population") @@ -109,6 +116,4 @@ def parse_tracker(sim_tracker_data): plt.yticks(plt.yticks()[0], [str(round(x, 3)) + "%" for x in plt.yticks()[0]]) plt.xticks([x for i, x in enumerate(real_dates) if i % 10 == 0], rotation=45) plt.title("Simulated Quebec COVID Statistics") -# plt.figtext(0.5, 0.01, fit_caption, wrap=True, horizontalalignment='center', fontsize=12) plt.savefig("sim_stats.png") -