Imports

In [None]:
import json
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import plotly
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [None]:
import plotly.io as pio
# pio.templates
# default is "plotly"
pio.templates.default = "plotly_white"

In [None]:
pd.options.plotting.backend = "plotly"

# All data

## Preprocessing

In [None]:
df = pd.read_csv(f"../input/malaysia-covid19-data-apr-2021/all_2020-03-27_2021-04-15.csv")
df.head()

In [None]:
df.info()

In [None]:
def preprocess_df(df):
    df.drop(columns='URL', inplace=True)    
    # must change the date format to datetime
    df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%y')
    df['Active Case'] = (df['Cumulative Case'] - df['Cumulative Recovered'] - df['Cumulative Death'])    
    df.set_index('Date', inplace=True)
    return df

In [None]:
df = preprocess_df(df)
df.head()

In [None]:
rolling_day = 7
df['SMA_new'] = df['New Case'].rolling(rolling_day, min_periods=1).mean().round(2)
df['SMA_death'] = df['Death'].rolling(rolling_day, min_periods=1).mean().round(2)
df['EMA_0.1'] = df['New Case'].ewm(alpha=0.1).mean().round(2)
df['EMA_0.3'] = df['New Case'].ewm(alpha=0.3).mean().round(2)

## Visualizations

In [None]:
# fig = px.line(df, x=df.index, y='New Case', title='COVID-19 Malaysia: Confirmed Cases')
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['New Case'],
                         line=dict(color='rebeccapurple'),
                         name='Confirmed'))
fig.add_trace(go.Scatter(x=df.index, y=df['Recovered'],
                         line=dict(color='royalblue'),
                         name='Recovered'))
fig.add_trace(go.Scatter(x=df.index, y=df['Death'],
                         line=dict(color='firebrick'),
                         name='Death'))
fig.update_layout(title='COVID-19 Malaysia: Daily Cases',
                  height=600,
                  # hovermode="x unified",
                  xaxis_title=None, yaxis_title=None,
                  legend=dict(
                      yanchor="top",
                      y=1.05,
                      xanchor="left",
                      x=0.01
                 ))
fig.update_layout(
    hovermode="x",
    hoverdistance=100, # Distance to show hover label of data point
    spikedistance=1000, # Distance to show spike
    xaxis=dict(
        # linecolor="#BCCCDC",
        showspikes=True, # Show spike line for X-axis
        # Format spike
        spikethickness=2,
        spikedash="dot",
        spikecolor="#999999",
        spikemode="across",
    )
)
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
max_row = df.loc[df['SMA_new'] == df['SMA_new'].max()]
max_row

In [None]:
last_row = df.iloc[-1]
last_date = last_row.name.strftime("%b %d, %Y")
last_date

In [None]:
pct_vs_peak = round((last_row['SMA_new'] / max_row['SMA_new'].values[0] * 100), 1)
pct_vs_peak

In [None]:
max_row = df.loc[df['SMA_new'] == df['SMA_new'].max()]
fig = px.line(df, x=df.index, y='SMA_new', 
              title=f'COVID-19 Malaysia: 7-day Rolling Average of New Cases',
              labels={'SMA_new': 'Average'}
             )
fig.update_layout(xaxis_title=None, yaxis_title=None)
fig.add_annotation(x=str(max_row.index.values[0]), y=int(max_row['SMA_new'].values[0]),
                   text=f"Highest average on {max_row.index.date[0]}"
                        f": {int(max_row['SMA_new'].values[0])}",
                   xref="x",
                   yref="y",
                   showarrow=True,
                   font=dict(
                       family="Courier New, monospace",
                       size=16,
                       color="#ffffff"
                   ),
                   align="center",
                   arrowhead=1,
                   arrowsize=1,
                   arrowwidth=2,
                   arrowcolor="#636363",
                   ax=30,
                   ay=-35,
                   bordercolor="#c7c7c7",
                   borderwidth=2,
                   borderpad=4,
                   bgcolor="brown",
                   standoff=2,
                   opacity=0.8)
fig.add_annotation(x=last_row.name, y=int(last_row['SMA_new']),
                   text=f"Latest: {int(last_row['SMA_new'])}; "
                        f"{pct_vs_peak}% of the peak average",
                   xref="x",
                   yref="y",
                   showarrow=True,
                   font=dict(
                       family="Courier New, monospace",
                       size=16,
                       color="#ffffff"
                   ),
                   align="center",
                   arrowhead=1,
                   arrowsize=1,
                   arrowwidth=2,
                   arrowcolor="#636363",
                   ax=-25,
                   ay=-20,
                   bordercolor="#c7c7c7",
                   borderwidth=2,
                   borderpad=4,
                   bgcolor="brown",
                   standoff=2,
                   xanchor='right',
                   opacity=0.8)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['New Case'],
                         # marker_color='royalblue',
                         line=dict(color='royalblue'),
                         name='New Case'))
fig.add_trace(go.Scatter(x=df.index, y=df['SMA_new'],
                         line=dict(color='coral'),
                         name='Average'))
fig.update_layout(title='COVID-19 Malaysia: New Case VS 7-day Moving Average',
                  xaxis_title=None, yaxis_title=None)
fig.update_layout(hovermode="x unified",
                  height=600,
                  legend=dict(
                      yanchor="top",
                      y=1.0,
                      xanchor="left",
                      x=0.01
                 ))
fig.add_annotation(x=str(max_row.index.values[0]), y=int(max_row['SMA_new'].values[0]),
                   text=f"Highest average on {max_row.index.date[0]}"
                        f": {int(max_row['SMA_new'].values[0])}",
                   xref="x",
                   yref="y",
                   showarrow=True,
                   font=dict(
                       family="Courier New, monospace",
                       size=16,
                       color="#ffffff"
                   ),
                   align="center",
                   arrowhead=1,
                   arrowsize=1,
                   arrowwidth=2,
                   arrowcolor="#636363",
                   ax=30,
                   ay=-35,
                   bordercolor="#c7c7c7",
                   borderwidth=2,
                   borderpad=4,
                   bgcolor="brown",
                   standoff=2,
                   opacity=0.8)
fig.add_annotation(x=last_row.name, y=int(last_row['SMA_new']),
                   text=f"Latest: {int(last_row['SMA_new'])}; "
                        f"{pct_vs_peak}% of the peak average",
                   xref="x",
                   yref="y",
                   showarrow=True,
                   font=dict(
                       family="Courier New, monospace",
                       size=16,
                       color="#ffffff"
                   ),
                   align="center",
                   arrowhead=1,
                   arrowsize=1,
                   arrowwidth=2,
                   arrowcolor="#636363",
                   ax=-25,
                   ay=-20,
                   bordercolor="#c7c7c7",
                   borderwidth=2,
                   borderpad=4,
                   bgcolor="brown",
                   standoff=2,
                   xanchor='right',
                   opacity=0.8)
fig.update_xaxes(rangeslider_visible=True)
# fig.show(config={"displayModeBar": False})
fig.show()

In [None]:
# fig = px.line(df, x=df.index, y='Cumulative Case', title='COVID-19 Malaysia: Cumulative Confirmed Cases')
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Cumulative Case'],
                         line=dict(color='rebeccapurple'),
                         name='Confirmed'))
fig.add_trace(go.Scatter(x=df.index, y=df['Cumulative Recovered'],
                         line=dict(color='royalblue'),
                         name='Recovered'))
fig.add_trace(go.Scatter(x=df.index, y=df['Cumulative Death'],
                         line=dict(color='firebrick'),
                         name='Death'))
fig.update_layout(title='COVID-19 Malaysia: Cumulative Cases',
                  height=600,
                  hovermode="x unified",
                  xaxis_title=None, yaxis_title=None,
                  legend=dict(
                      yanchor="top",
                      y=1.05,
                      xanchor="left",
                      x=0.01
                 ))
fig.update_yaxes(type='log')
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Death'],
                         line=dict(color='royalblue'),
                         name='Death'))
fig.add_trace(go.Scatter(x=df.index, y=df['SMA_death'],
                         line=dict(color='coral'),
                         name='Average'))
fig.update_layout(title='COVID-19 Malaysia: Daily Death VS 7-day Moving Average')
fig.update_layout(hovermode="x unified",
                  legend=dict(
                      yanchor="top",
                      y=1.0,
                      xanchor="left",
                      x=0.01
                 ))
# fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['SMA_new'],
                         line=dict(color='royalblue'),
                         name='Average'))
fig.add_trace(go.Scatter(x=df.index, y=df['EMA_0.1'],
                         line=dict(color='green', dash='dashdot'),
                         name='EMA - alpha=0.1'))
fig.add_trace(go.Scatter(x=df.index, y=df['EMA_0.3'],
                         line=dict(color='firebrick', dash='dashdot'),
                         name='EMA - alpha=0.3'))
fig.update_layout(title='COVID-19 Malaysia: 7-day Rolling Average & Exponential Moving Average',
                  height=500,
                  legend=dict(
                      yanchor="top",
                      y=0.99,
                      xanchor="left",
                      x=0.01
                  )
                 )
# fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
# fig.update_xaxes(rangeslider_visible=True)
fig.show()

# Monthly data

In [None]:
df_m = df.resample("M").sum()
df_m.reset_index(inplace=True)
df_m.Date = df_m.Date.apply(lambda dt: dt.replace(day=1))
df_m = df_m[['Date', 'Recovered', 'New Case', 'Death', 'ICU', 'Ventilator']]
df_m.set_index('Date', inplace=True)
df_m.head()

In [None]:
def style_df(df, axis=0):
    df_copy = df.copy()
    df_copy.index = df_copy.index.strftime('%b %Y')
    df_copy = df_copy.style.background_gradient(cmap='Blues', axis=axis)
    return df_copy

In [None]:
df_m_style = style_df(df_m)
df_m_style

In [None]:
def plot_bar(y):
    fig = px.bar(df_m, x=df_m.index, y=y, text=y, color=y,
                 title=f'COVID-19 Malaysia: Monthly {y}',
                 color_continuous_scale='teal')
    fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
    fig.update_traces(texttemplate='%{text:,}')
    fig.update_layout(xaxis_title=None, yaxis_title=None,
                      uniformtext_minsize=8, uniformtext_mode='hide',
                      coloraxis_showscale=False)
    fig.show()

In [None]:
plot_bar('New Case')

In [None]:
plot_bar('Recovered')

In [None]:
plot_bar('Death')

In [None]:
long_df_m = pd.melt(df_m[['New Case', 'Recovered', 'Death']], 
                    var_name='Case', value_name='Number',
                    ignore_index=False).reset_index()
long_df_m['log_number'] = np.log10(long_df_m['Number']).round(4)
long_df_m.head()

In [None]:
fig = px.bar(long_df_m, x='Date', 
             y='Number', color='Case', 
             text='Number',
             # hover_name='Case', 
             # hover_data={'Number': True, 'Case': False, 
             #             'log_number': False, 'Date': False},
             color_discrete_sequence=['rebeccapurple', 
                                      'teal', 
                                      'firebrick']
            )
fig.update_traces(texttemplate='%{text:,}', 
                  hovertemplate='<b>%{text:,}</b>')
fig.update_layout(title_text='COVID-19 Malaysia: Monthly Cases',
                  xaxis_title=None, yaxis_title='Log Scale',
                  uniformtext_minsize=10, barmode='group',
                  legend=dict(
                      yanchor="top",
                      y=1.01,
                      xanchor="left",
                      x=0.01
                  )
                  # hovermode="x unified"
                 )
fig.update_yaxes(type='log')
fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
fig.show(config={"displayModeBar": False, "showTips": False})

# State data

In [None]:
dfState = pd.read_csv(f'../input/malaysia-covid19-data-apr-2021/state_all.csv')
dfStateCumu = pd.read_csv(f'../input/malaysia-covid19-data-apr-2021/state_cumu.csv')

dfState.Date = pd.to_datetime(dfState.Date)
dfStateCumu.Date = pd.to_datetime(dfStateCumu.Date)

dfState.set_index('Date', inplace=True)
dfStateCumu.set_index('Date', inplace=True)

In [None]:
last_date = df.iloc[-1].name
last_date = last_date.strftime('%b %d, %Y')

In [None]:
df_state_total = dfStateCumu.iloc[[-1]].T.reset_index()
df_state_total.columns = ['State', 'Confirmed']
df_state_total['State_spaced'] = df_state_total['State'] + '  '
df_state_total

In [None]:
fig = go.Figure()
for col in dfState.columns:
    fig.add_trace(go.Scatter(x=dfState.index,
                             y=dfState[col],
                             name=col,
                             visible=True
                            )
                 )
fig.update_layout(title='COVID-19 Malaysia: Daily Cases by State', height=600)
fig.add_annotation(xref='paper',
                   yref='paper',
                   x=1, y=1.09,
                   showarrow=False,
                   font=dict(
                       # family="Courier New, monospace",
                       size=12,
                       color="royalblue"
                   ),
                   text='Tip: Double click a legend to isolate only the state')
fig.show()

In [None]:
fig = px.bar(df_state_total.sort_values('Confirmed'), x='Confirmed', 
             y='State_spaced', text='Confirmed', 
             color='Confirmed',
             color_continuous_scale='teal', 
             hover_name='State', 
             hover_data={'State_spaced': False, 'Confirmed': False}
            )
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide',
                  title=f'COVID-19 Malaysia: Total Cases by {last_date}',
                  width=700, height=800,
                  xaxis_title=None, yaxis_title=None,
                  showlegend=False, coloraxis_showscale=False)
fig.update_traces(texttemplate='%{text:,}')
fig.show(config={"displayModeBar": False, "showTips": False})

In [None]:
fig = px.pie(df_state_total, values='Confirmed',
             names='State', height=600,
             hover_data={'State': False}
            )
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(
    title=f'COVID-19 Malaysia: Proportion of Confirmed Cases by {last_date}',
    title_x = 0.28
)

fig.show()

## Monthly State Data

In [None]:
# only take months to make the plotting faster
df_mState = dfState.resample('M').sum().astype(int)
df_mState.head()

In [None]:
df_longStyle = df_mState.copy()
df_longStyle = style_df(df_longStyle, axis=1)
df_longStyle

In [None]:
df_longState = pd.melt(df_mState, 
                    var_name='State', value_name='Monthly Case',
                    ignore_index=False).reset_index()
df_longState.Date = df_longState.Date.apply(lambda x: x.replace(day=1))
df_longState.sort_values(['Date', 'State'], ignore_index=True, inplace=True)
df_longState.head()

In [None]:
fig = px.bar(df_longState.loc[df_longState.Date <= '2020-07', :], 
             x='Date', y='Monthly Case', color='State', 
             color_discrete_sequence=px.colors.qualitative.Dark24
            )
fig.update_traces(hovertemplate='<b>%{y:,}</b>')
fig.update_layout(title_text='COVID-19 Malaysia: Monthly Cases Mar-Jul 2020',
                  xaxis_title=None, yaxis_title='Log Scale',
                  legend_title=None, height=600,
                  barmode='group'
                 )
fig.update_yaxes(type='log')
fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
fig.show(config={"displayModeBar": False, "showTips": False})

In [None]:
fig = px.bar(df_longState.loc[(df_longState.Date > '2020-07') & (df_longState.Date <= '2020-12'), :], 
             x='Date', y='Monthly Case', color='State', 
             color_discrete_sequence=px.colors.qualitative.Dark24
            )
fig.update_traces(hovertemplate='<b>%{y:,}</b>')
fig.update_layout(title_text='COVID-19 Malaysia: Monthly Cases Aug-Dec 2020',
                  xaxis_title=None, yaxis_title='Log Scale',
                  legend_title=None, height=600,
                  barmode='group'
                 )
fig.update_yaxes(type='log')
fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
fig.show(config={"displayModeBar": False, "showTips": False})

In [None]:
fig = px.bar(df_longState.loc[df_longState.Date > '2020-12', :], 
             x='Date', y='Monthly Case', color='State', 
             color_discrete_sequence=px.colors.qualitative.Dark24
            )
fig.update_traces(hovertemplate='<b>%{y:,}</b>')
fig.update_layout(title_text='COVID-19 Malaysia: Monthly Cases 2021',
                  xaxis_title=None, yaxis_title='Log Scale',
                  legend_title=None, height=600,
                  barmode='group'
                 )
fig.update_yaxes(type='log')
fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
fig.show(config={"displayModeBar": False, "showTips": False})

# Choropleth Map

In [None]:
# https://www.igismap.com/download-malaysia-shapefile-area-map-free-country-boundary-state-polygon/
with open('../input/malaysia-covid19-data-apr-2021/malaysia_state_province_boundary.geojson', 'r') as f:
    msia_geojson = json.load(f)

In [None]:
state_id_dict = {}
for i, feature in enumerate(msia_geojson['features']):
    feature['id'] = i
    state_id_dict[feature['properties']['locname']] = i
# state_id_dict['PULAU PINANG'] = state_id_dict.pop('PENANG')
print(state_id_dict)

In [None]:
correct_state_id = {}
for stateName in df_state_total.State:
    name_to_search = stateName.replace('WP ', '')
    for k, v in state_id_dict.items():
        if name_to_search.lower() in k.lower():
            correct_state_id[stateName] = int(v)

In [None]:
df_state_total['id'] = df_state_total.State.map(correct_state_id)
df_state_total

In [None]:
# Log is not helping the color representation in the figures
# df_state_total['log_confirmed'] = np.log10(df_state_total['Confirmed'])

# # Get the maximum value to cap displayed values
# max_log = df_state_total['log_confirmed'].max()
# max_val = int(max_log) + 1

# # Prepare the range of the colorbar
# values = [i for i in range(max_val)]
# ticks = [10**i for i in values]

In [None]:
def plot_choropleth(df):
    fig = px.choropleth(
        df,
        locations="id",
        geojson=msia_geojson,
        color="Confirmed",
        hover_name="State",
        hover_data={"id": False, "Confirmed": True},
        # title="Confirmed Cases as of April 15, 2021",
        color_continuous_scale="YlOrRd"
    )
    fig.update_layout(
        # title_x = 0.5,
        geo=dict(
            showframe = False,
            showcoastlines = False,
            # projection_type = 'equirectangular',
            fitbounds="locations", 
            visible=False
        )
    )
    # fig.update_geos()
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, height=600)
    fig.show()

In [None]:
plot_choropleth(df_state_total)

In [None]:
# # Using logarithmic number
# fig = px.choropleth(
#     df_state_total,
#     geojson=msia_geojson,
#     locations='id',
#     color='log_confirmed',
#     hover_name="State",
#     hover_data={"id": False, "log_confirmed": False, "Confirmed": True},
#     range_color=(0, max_log),
#     color_continuous_scale="teal",
# )

# # Define layout specificities
# fig.update_layout(
#     margin={'r':0,'t':0,'l':0,'b':0},
#     coloraxis_colorbar={
#         'title': 'Confirmed',
#         'tickvals': values,
#         'ticktext': ticks        
#     },
#     geo=dict(
#         showframe = False,
#         # showcoastlines = True,
#         # projection_type = 'equirectangular',
#         fitbounds="locations", 
#         visible=False
#     )
# )

# # Display figure
# fig.show()

In [None]:
fig = px.choropleth_mapbox(
    df_state_total,
    locations="id",
    geojson=msia_geojson,
    color="Confirmed",
    hover_name="State",
    hover_data={"id": False, "Confirmed": True},
    color_continuous_scale="YlOrRd",
    # range_color=(0, max_log),
    mapbox_style='open-street-map',
    zoom=4.5,
    center={'lat': 4.1, 'lon': 109.4},
    opacity=0.6
)
fig.update_layout(
    margin={'r':0,'t':0,'l':0,'b':0},
    # coloraxis_colorbar={
    #     'title': 'Confirmed',
    #     'tickvals': values,
    #     'ticktext': ticks        
    # }
)
    
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, height=600)
fig.show()