# The University of Hong Kong
## DASC7600 Data Science Project 2024
## Discrete Compartmental Model

# Import Modules and Settings

In [1]:
import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
from scipy.signal import find_peaks, peak_widths

import covid_module
from discrete_compartmental_model import *

# Settings
warnings.filterwarnings('ignore')

# Load Data

In [2]:
# Read csv file
covid_hk_case_cnt_std = pd.read_csv('./data/std_data/hk/covid_hk_case_count_std.csv')

# Datatime Data Type and Index

In [3]:
# Modify data type for datatime column
covid_hk_case_cnt_std['report_date'] = pd.to_datetime(covid_hk_case_cnt_std['report_date'], format='%Y%m%d')

# Set Index
covid_hk_case_cnt_std = covid_hk_case_cnt_std.set_index('report_date', drop=False)

# New Case Counts

In [4]:
covid_hk_case_cnt_std['new_case_cnt'] = covid_hk_case_cnt_std['cuml_case_cnt'].diff().fillna(0)
covid_hk_case_cnt_std['new_dischg_cnt'] = covid_hk_case_cnt_std['cuml_dischg_cnt'].diff().fillna(0)
covid_hk_case_cnt_std['new_death_cnt'] = covid_hk_case_cnt_std['cuml_death_cnt'].diff().fillna(0)
covid_hk_case_cnt_std['new_recover_cnt'] = covid_hk_case_cnt_std['new_dischg_cnt'] + covid_hk_case_cnt_std['new_death_cnt']

# Fitting Models - First Wave

In [5]:
# New case counts for 1st wave
first_wave_case_cnt = np.array(covid_hk_case_cnt_std[['new_case_cnt', 'new_recover_cnt']].values[66:100]).T

In [6]:
# # SIR Model
# Discrete_SIR_model_first_wave = Discrete_SIR_model(first_wave_case_cnt,
#                                                    [sum(first_wave_case_cnt[0][1:]),
#                                                     first_wave_case_cnt[0][0],
#                                                     first_wave_case_cnt[1][0]])
# plot_compartmental_model_result(Discrete_SIR_model_first_wave.fit(), first_wave_case_cnt, Discrete_SIR_model_first_wave.classes)

In [7]:
# # SIS Model
# Discrete_SIS_model_first_wave = Discrete_SIS_model(first_wave_case_cnt,
#                                                    [sum(first_wave_case_cnt[0][1:]),
#                                                     first_wave_case_cnt[0][0]])
# plot_compartmental_model_result(Discrete_SIS_model_first_wave.fit(), first_wave_case_cnt, Discrete_SIS_model_first_wave.classes)

In [8]:
# # SIRS Model
# ## Fix with true_I_plus counts only due to a lack of information about stage changing from R to S
# Discrete_SIRS_model_first_wave = Discrete_SIRS_model(first_wave_case_cnt[[0]],
#                                                      [sum(first_wave_case_cnt[0][1:]),
#                                                       first_wave_case_cnt[0][0],
#                                                       first_wave_case_cnt[1][0]])
# plot_compartmental_model_result(Discrete_SIRS_model_first_wave.fit(), first_wave_case_cnt, Discrete_SIRS_model_first_wave.classes)

In [9]:
# # SEIR Model
# ## Fix with true_I_plus counts only due to a lack of information about stage E
# Discrete_SEIR_model_first_wave = Discrete_SEIR_model(first_wave_case_cnt[[0]],
#                                                      [sum(first_wave_case_cnt[0][1:]),
#                                                       first_wave_case_cnt[0][0]/2,
#                                                       first_wave_case_cnt[0][0]/2,
#                                                       first_wave_case_cnt[1][0]])
# plot_compartmental_model_result(Discrete_SEIR_model_first_wave.fit(), first_wave_case_cnt, Discrete_SEIR_model_first_wave.classes)

# Waves Automated Detection and SEIR Model Fitting (1st Wave to 4th Wave)

In [10]:
# Counts of first 4 waves
first_4_wave_cnt = covid_hk_case_cnt_std[covid_hk_case_cnt_std['report_date'] <= datetime.datetime(2022,1,15)][['new_case_cnt', 'new_recover_cnt']]

# Peaks and Widths
## Return: peaks (ndarray), properties (dict)
first_4_wave_peaks_index, _ = find_peaks(first_4_wave_cnt['new_case_cnt'], width=5)
## Return: widths (ndarray), width_heights (ndarray), left_ips (ndarray), right_ips (ndarray)
first_4_wave_widths = np.array(peak_widths(first_4_wave_cnt['new_case_cnt'], first_4_wave_peaks_index, rel_height=1), 'int') 

first_4_wave_start_dt_index = first_4_wave_cnt.index[first_4_wave_widths[2]] + datetime.timedelta(days=1)
first_4_wave_end_dt_index = first_4_wave_cnt.index[first_4_wave_widths[3]]

In [11]:
# # Plot the new case counts with different waves identified
# plt.subplots(figsize=(15, 6))
# ## New Case counts
# plt.plot(first_4_wave_cnt.index, first_4_wave_cnt['new_case_cnt'])
# ## x-axis
# plt.plot(first_4_wave_cnt.index, np.zeros_like(first_4_wave_cnt['new_case_cnt']), '--', color='gray')
# ## Peak of each wave
# plt.plot(first_4_wave_cnt.index[first_4_wave_peaks_index], first_4_wave_cnt['new_case_cnt'][first_4_wave_peaks_index], 'x', color='g')
# ## Period of each wave
# plt.plot(first_4_wave_start_dt_index, first_4_wave_widths[1], '|', color='g')
# plt.plot(first_4_wave_end_dt_index, first_4_wave_widths[1], '|', color='g')
# plt.hlines(first_4_wave_widths[1], first_4_wave_start_dt_index, first_4_wave_end_dt_index, color='g')
# ## Title, x-axis label, y-axis label
# plt.title('First 4 waves of Covid-19 identified (Hong Kong)')
# plt.xlabel('Date')
# plt.ylabel('Number of New Cov-19 Cases')
# plt.show()

In [12]:
nbr_of_wave = len(first_4_wave_peaks_index)

first_4_wave_index_list = []
first_4_wave_predicted_cnt_list = []
for i in range(nbr_of_wave):
    wave_new_case_cnt = first_4_wave_cnt[first_4_wave_start_dt_index[i]:first_4_wave_end_dt_index[i]]
    wave_index = wave_new_case_cnt.index
    wave_new_case_cnt = wave_new_case_cnt.values.T
    Discrete_SEIR_model_hk = Discrete_SEIR_model(wave_new_case_cnt[[0]],
                                                 [sum(wave_new_case_cnt[0][1:]),
                                                  wave_new_case_cnt[0][0]/2,
                                                  wave_new_case_cnt[0][0]/2,
                                                  wave_new_case_cnt[1][0]])
    wave_predicted_cnt = Discrete_SEIR_model_hk.fit()[0][1]
    
    first_4_wave_index_list.append(wave_index)
    first_4_wave_predicted_cnt_list.append(wave_predicted_cnt)

The optimal parameters are: [4.45930358e-04 8.19068103e-01 1.09482751e-01]
The optimal parameters are: [8.42221671e-05 7.20361529e-01 1.41394342e-01]
The optimal parameters are: [0.00062308 0.012801   0.14927166]
The optimal parameters are: [9.02054561e-04 9.74305578e-01 3.60284624e-02]


In [13]:
# # Plot the new case counts with different waves identified and their predicted counts
# plt.subplots(figsize=(15, 6))
# ## New Case counts
# plt.plot(first_4_wave_cnt.index, first_4_wave_cnt['new_case_cnt'])
# ## x-axis
# plt.plot(first_4_wave_cnt.index, np.zeros_like(first_4_wave_cnt['new_case_cnt']), '--', color='gray')
# ## Peak of each wave
# plt.plot(first_4_wave_cnt.index[first_4_wave_peaks_index], first_4_wave_cnt['new_case_cnt'][first_4_wave_peaks_index], 'x', color='g')
# ## Period of each wave
# plt.plot(first_4_wave_start_dt_index, first_4_wave_widths[1], '|', color='g')
# plt.plot(first_4_wave_end_dt_index, first_4_wave_widths[1], '|', color='g')
# plt.hlines(first_4_wave_widths[1], first_4_wave_start_dt_index, first_4_wave_end_dt_index, color='g')
# ## Title
# plt.title('First 4 waves of Covid-19 identified (Hong Kong)')
# plt.xlabel('Date')
# plt.ylabel('Number of New Cov-19 Cases')
# # Predicted new case counts counts
# for i in range(nbr_of_wave):
#     plt.plot(first_4_wave_index_list[i], first_4_wave_predicted_cnt_list[i], color='red')
# plt.show()

# Waves Automated Detection and SEIR Model Fitting (5th Wave)

In [14]:
# Counts of the fifth wave
fifth_wave_cnt = covid_hk_case_cnt_std[covid_hk_case_cnt_std['report_date'] > datetime.datetime(2022,1,15)][['new_case_cnt', 'new_recover_cnt']]

# Peaks and Widths
## Return: peaks (ndarray), properties (dict)
fifth_wave_peaks_index, _ = find_peaks(fifth_wave_cnt['new_case_cnt'], width=5, prominence=10000)
## Return: widths (ndarray), width_heights (ndarray), left_ips (ndarray), right_ips (ndarray)
fifth_wave_widths = np.array(peak_widths(fifth_wave_cnt['new_case_cnt'], fifth_wave_peaks_index, rel_height=1), 'int') 

fifth_wave_start_dt_index = fifth_wave_cnt.index[fifth_wave_widths[2]] + datetime.timedelta(days=1)
fifth_wave_end_dt_index = fifth_wave_cnt.index[fifth_wave_widths[3]]

In [15]:
# # Plot the new case counts with different waves identified
# plt.subplots(figsize=(15, 6))
# ## New Case counts
# plt.plot(fifth_wave_cnt.index, fifth_wave_cnt['new_case_cnt'])
# ## x-axis
# plt.plot(fifth_wave_cnt.index, np.zeros_like(fifth_wave_cnt['new_case_cnt']), '--', color='gray')
# ## Peak of each wave
# plt.plot(fifth_wave_cnt.index[fifth_wave_peaks_index], fifth_wave_cnt['new_case_cnt'][fifth_wave_peaks_index], 'x', color='g')
# ## Period of each wave
# plt.plot(fifth_wave_start_dt_index, fifth_wave_widths[1], '|', color='g')
# plt.plot(fifth_wave_end_dt_index, fifth_wave_widths[1], '|', color='g')
# plt.hlines(fifth_wave_widths[1], fifth_wave_start_dt_index, fifth_wave_end_dt_index, color='g')
# ## Title, x-axis label, y-axis label
# plt.title('Fifth waves of Covid-19 identified (Hong Kong)')
# plt.xlabel('Date')
# plt.ylabel('Number of New Cov-19 Cases')
# plt.show()

In [16]:
nbr_of_wave = len(fifth_wave_peaks_index)

fifth_wave_index_list = []
fifth_wave_predicted_cnt_list = []
for i in range(nbr_of_wave):
    wave_new_case_cnt = fifth_wave_cnt[fifth_wave_start_dt_index[i]:fifth_wave_end_dt_index[i]]
    wave_index = wave_new_case_cnt.index
    wave_new_case_cnt = wave_new_case_cnt.values.T
    Discrete_SEIR_model_hk = Discrete_SEIR_model(wave_new_case_cnt[[0]],
                                                 [sum(wave_new_case_cnt[0][1:]),
                                                  wave_new_case_cnt[0][0]/2,
                                                  wave_new_case_cnt[0][0]/2,
                                                  wave_new_case_cnt[1][0]])
    wave_predicted_cnt = Discrete_SEIR_model_hk.fit()[0][1]
    
    fifth_wave_index_list.append(wave_index)
    fifth_wave_predicted_cnt_list.append(wave_predicted_cnt)

The optimal parameters are: [2.63204123e-06 1.16274412e-01 8.73092521e-02]


In [17]:
# # Plot the new case counts with different waves identified and their predicted counts
# plt.subplots(figsize=(15, 6))
# ## New Case counts
# plt.plot(fifth_wave_cnt.index, fifth_wave_cnt['new_case_cnt'])
# ## x-axis
# plt.plot(fifth_wave_cnt.index, np.zeros_like(fifth_wave_cnt['new_case_cnt']), '--', color='gray')
# ## Peak of each wave
# plt.plot(fifth_wave_cnt.index[fifth_wave_peaks_index], fifth_wave_cnt['new_case_cnt'][fifth_wave_peaks_index], 'x', color='g')
# ## Period of each wave
# plt.plot(fifth_wave_start_dt_index, fifth_wave_widths[1], '|', color='g')
# plt.plot(fifth_wave_end_dt_index, fifth_wave_widths[1], '|', color='g')
# plt.hlines(fifth_wave_widths[1], fifth_wave_start_dt_index, fifth_wave_end_dt_index, color='g')
# ## Title
# plt.title('Fifth wave of Covid-19 identified (Hong Kong)')
# plt.xlabel('Date')
# plt.ylabel('Number of New Cov-19 Cases')
# # Predicted new case counts counts
# for i in range(nbr_of_wave):
#     plt.plot(fifth_wave_index_list[i], fifth_wave_predicted_cnt_list[i], color='red')
# plt.show()