In [6]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go

from glob import glob
from collections import Counter
from plotly.offline import iplot
from scipy.optimize import curve_fit

from helper import timeSeriesPlot, daily_feature

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [7]:
df_th = pd.read_csv('./data/00_clean.csv', parse_dates=['date'], index_col='date')
df_th.head()

Unnamed: 0_level_0,confirm,deaths,recovered
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-22,2,0,0
2020-01-23,3,0,0
2020-01-24,5,0,0
2020-01-25,7,0,0
2020-01-26,8,0,2


In [8]:
timeSeriesPlot(df_th, title='Cumulative')

In [9]:
df_daily = daily_feature(df_th)
timeSeriesPlot(df_daily, title='Change', mode='markers')

In [10]:
df_concat = pd.concat([df_th, daily_feature(df_th)], axis=1)
timeSeriesPlot(df_concat[['confirm', 'confirm_daily_change']], title='confirm case', mode='markers')

# Hypothesis

We can see that line of cumulative number of appear case was stop exponential growth at 22 Mar, then that line has becoming to **Sigmoid function (figure below)**.

![](./image/sigmoid.png)

the **logistic distribution** is a continuous probability distribution. Its **cumulative distribution function is the logistic function**.

[Wikipedia](https://en.wikipedia.org/wiki/Logistic_distribution)

$$
    f(x) = \frac{e^{-x}}{(1+e^{-x})^2} 
$$

This equation is a **logistic distribution**.

## Exponential growth estimate

In [11]:
from_ = '2020-01-22'
to_   = '2020-03-22'

df_filter = df_daily[from_: to_]

y = df_filter['confirm_daily_change']
X = list(range(y.shape[0]))

In [12]:
def exponential_equation(x, a, b, c):
    return a * np.exp(b * (x - c))

coef = curve_fit(exponential_equation, X, y)[0]
y_hat = np.vectorize(lambda x: exponential_equation(x, *coef))

coef

array([2.97912423e-08, 4.10523037e-01, 5.28637681e+00])

In [13]:
data = [
    go.Scatter(x=y.index, y=y, name='Actual', mode='markers'),
    go.Scatter(x=y.index, y=y_hat(X), name='Fitting-line')
]

lay = go.Layout(title=f'Exponential estimate ({from_} to {to_})')

fig = go.Figure(data=data, layout=lay)

iplot(fig)

## Sigmoid pattern

In [14]:
from_ = '2020-01-22'
to_   = None

df_filter = df_th[from_: to_]

y = df_filter['confirm']
X = list(range(y.shape[0]))

In [15]:
def sigmoid_equation(x, a, b, c):
    return a/(1+(np.exp(-b * (x - c))))

coef = curve_fit(sigmoid_equation, X, y, maxfev=100)[0]
y_hat = np.vectorize(lambda x: sigmoid_equation(x, *coef))

coef


overflow encountered in exp



array([2.64739240e+03, 2.05110118e-01, 6.65016552e+01])

In [16]:
data = [
    go.Scatter(x=y.index, y=y, name='Actual', mode='markers'),
    go.Scatter(x=y.index, y=y_hat(X), name='Fitting-line')
]

lay = go.Layout(title=f'Sigmoid estimate ({from_} to {to_})')

fig = go.Figure(data=data, layout=lay)

iplot(fig)

## Logistic distribution

In [17]:
from_ = '2020-01-22'
to_   = None

df_filter = df_daily[from_: to_]

y = df_filter['confirm_daily_change']
X = list(range(y.shape[0]))

In [18]:
def logistic_equation(x, a, b, c):
    return (a * (np.exp(-b * (x - c))))/(1+(np.exp(-b * (x - c))))**2

coef = curve_fit(logistic_equation, X, y)[0]
y_hat = np.vectorize(lambda x: logistic_equation(x, *coef))

coef


overflow encountered in square



array([ 5.23810068e+02, -1.90108473e-01,  6.68337801e+01])

In [19]:
data = [
    go.Scatter(x=y.index, y=y, name='Actual', mode='markers'),
    go.Scatter(x=y.index, y=y_hat(X), name='Fitting-line')
]

lay = go.Layout(title=f'Logistic distribution estimate ({from_} to {to_})')

fig = go.Figure(data=data, layout=lay)

iplot(fig)

In [20]:
y_hat(100)

array(0.95341923)