In [1]:
import pandas as pd
import io
import requests
import numpy as np
url="http://www.chp.gov.hk/files/misc/enhanced_sur_covid_19_eng.csv"
s=requests.get(url).content
df=pd.read_csv(io.StringIO(s.decode('utf-8')))


df_delay_prep = df[~df['Date of onset'].isin(['Asymptomatic','Pending',"Unknown","January","Mid-March","Mid- July","Mid-July","October"])]

#Convert date of onset and reported date to datetime format
s = pd.to_datetime(df_delay_prep["Report date"],format='%d/%m/%Y').dt.date
df_delay_prep = df_delay_prep.assign(e=pd.Series(s).values)
df_delay_prep = df_delay_prep.drop(["Report date"], axis=1)
df_delay_prep.rename(columns={'e': 'Report date'}, inplace=True)

s = pd.to_datetime(df_delay_prep["Date of onset"],format='%d/%m/%Y').dt.date
df_delay_prep = df_delay_prep.assign(e=pd.Series(s).values)
df_delay_prep = df_delay_prep.drop(["Date of onset"], axis=1)
df_delay_prep.rename(columns={'e': 'Date of onset'}, inplace=True)

In [2]:
#df_delay_prep.head()

In [3]:
df_age = df_delay_prep.copy()

#Calculate delay
df_age["Delay"]=df_age["Report date"].sub(df_age["Date of onset"],axis=0)

#Convert delay to int
df_age["Delay"]=df_age["Delay"].dt.days

df_age = df_age[(df_age["Case classification*"]=="Local case")|(df_age["Case classification*"]=="Epidemiologically linked with local case")]
df_age = df_age[df_age["Delay"]>0] #Remove no delay cases 

In [4]:
#df_age['delay_cat'] = np.where( df_age.Delay > 0, 1, 0) 
#df_age.delay_cat.value_counts()

In [5]:
age_bucket = [0,18,34,44,54,64,75,100] 
df_age["age_bucketized"]=pd.cut(df_age["Age"],age_bucket)

df_age= df_age.groupby(["age_bucketized","Case classification*"]).count()
df_age = df_age.drop(columns={"Case no.",'Gender', 'Age', 'Name of hospital admitted',
       'Hospitalised/Discharged/Deceased', 'HK/Non-HK resident',
        'Confirmed/probable', 'Report date',
       'Date of onset'})
df_age = df_age.unstack(level=-1)
df_age["local"] = df_age['Delay']["Epidemiologically linked with local case"] + df_age['Delay']["Local case"]

In [6]:
df_age

Unnamed: 0_level_0,Delay,Delay,local
Case classification*,Epidemiologically linked with local case,Local case,Unnamed: 3_level_1
age_bucketized,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
"(0, 18]",340,67,407
"(18, 34]",853,495,1348
"(34, 44]",599,400,999
"(44, 54]",656,504,1160
"(54, 64]",767,539,1306
"(64, 75]",481,378,859
"(75, 100]",258,193,451


In [7]:
#Do the same for total cases 
url="http://www.chp.gov.hk/files/misc/enhanced_sur_covid_19_eng.csv"
s=requests.get(url).content
df=pd.read_csv(io.StringIO(s.decode('utf-8')))
df_copy_1 = df.copy()
df_copy_1 = df_copy_1[(df_copy_1["Case classification*"]=="Local case")|(df_copy_1["Case classification*"]=="Epidemiologically linked with local case")]
df_copy_1["age_bucketized"]=pd.cut(df_copy_1["Age"],age_bucket)
df_copy_1 = df_copy_1.groupby(["age_bucketized","Case classification*"]).count()
df_copy_1 = df_copy_1.drop(columns={'Gender', 'Age', 'Name of hospital admitted',
       'Hospitalised/Discharged/Deceased', 'HK/Non-HK resident',
        'Confirmed/probable', 'Report date',
       'Date of onset'})

df_copy_1 = df_copy_1.unstack(level=-1)
df_copy_1["local"] = df_copy_1['Case no.']["Epidemiologically linked with local case"] + df_copy_1['Case no.']["Local case"]

In [8]:
df_copy_1

Unnamed: 0_level_0,Case no.,Case no.,local
Case classification*,Epidemiologically linked with local case,Local case,Unnamed: 3_level_1
age_bucketized,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
"(0, 18]",543,74,617
"(18, 34]",1098,550,1648
"(34, 44]",839,460,1299
"(44, 54]",879,569,1448
"(54, 64]",1011,601,1612
"(64, 75]",695,448,1143
"(75, 100]",367,215,582


In [9]:
df_age["Age"] = [1,2,3,4,5,6,7]

In [10]:
df_age

Unnamed: 0_level_0,Delay,Delay,local,Age
Case classification*,Epidemiologically linked with local case,Local case,Unnamed: 3_level_1,Unnamed: 4_level_1
age_bucketized,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
"(0, 18]",340,67,407,1
"(18, 34]",853,495,1348,2
"(34, 44]",599,400,999,3
"(44, 54]",656,504,1160,4
"(54, 64]",767,539,1306,5
"(64, 75]",481,378,859,6
"(75, 100]",258,193,451,7


In [11]:
df_age.info()

<class 'pandas.core.frame.DataFrame'>
CategoricalIndex: 7 entries, (0, 18] to (75, 100]
Data columns (total 4 columns):
 #   Column                                             Non-Null Count  Dtype
---  ------                                             --------------  -----
 0   (Delay, Epidemiologically linked with local case)  7 non-null      int64
 1   (Delay, Local case)                                7 non-null      int64
 2   (local, )                                          7 non-null      int64
 3   (Age, )                                            7 non-null      int64
dtypes: int64(4)
memory usage: 643.0 bytes


In [12]:
df_copy_1.info()

<class 'pandas.core.frame.DataFrame'>
CategoricalIndex: 7 entries, (0, 18] to (75, 100]
Data columns (total 3 columns):
 #   Column                                                Non-Null Count  Dtype
---  ------                                                --------------  -----
 0   (Case no., Epidemiologically linked with local case)  7 non-null      int64
 1   (Case no., Local case)                                7 non-null      int64
 2   (local, )                                             7 non-null      int64
dtypes: int64(3)
memory usage: 587.0 bytes


In [13]:
df_age["cases with delay % - Local with Epi-link"] =(df_age['Delay']["Epidemiologically linked with local case"]/df_copy_1["Case no."]["Epidemiologically linked with local case"])*100
df_age["cases with delay % - Local without Epi-link"] =(df_age['Delay']["Local case"]/df_copy_1["Case no."]["Local case"])*100
df_age["cases with delay % - Local"] =(df_age['local']/df_copy_1["local"])*100
df_age

Unnamed: 0_level_0,Delay,Delay,local,Age,cases with delay % - Local with Epi-link,cases with delay % - Local without Epi-link,cases with delay % - Local
Case classification*,Epidemiologically linked with local case,Local case,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
age_bucketized,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
"(0, 18]",340,67,407,1,62.615101,90.540541,65.964344
"(18, 34]",853,495,1348,2,77.686703,90.0,81.796117
"(34, 44]",599,400,999,3,71.394517,86.956522,76.905312
"(44, 54]",656,504,1160,4,74.630262,88.57645,80.110497
"(54, 64]",767,539,1306,5,75.86548,89.68386,81.01737
"(64, 75]",481,378,859,6,69.208633,84.375,75.153106
"(75, 100]",258,193,451,7,70.299728,89.767442,77.491409


In [14]:
df_age = df_age.set_index("Age")

In [15]:
df_age.index

Int64Index([1, 2, 3, 4, 5, 6, 7], dtype='int64', name='Age')

In [16]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px
import os 
import pandas as pd
import calculation  #The file which contains the calculations 
import plotly.graph_objects as go
import dash_table as dt
import geo_calculation

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_lln_v5["Delay"]=df_lln_v5["Report date"].sub(df_lln_v5["Date of onset"],axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_llp_v5["Delay"]=df_llp_v5["Report date"].sub(df_llp_v5["Date of onset"],axis=0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_lln_v5["Delay"]=df_lln_v5["Delay"].d

In [19]:

fig = px.bar(df_age, x = df_age.index, y = ["cases with delay % - Local with Epi-link","cases with delay % - Local without Epi-link","cases with delay % - Local"],template="plotly_dark",barmode='group')
fig.show()

In [18]:
dfada = pd.DataFrame.to_json(df_age)
dfada.index

<function str.index>