In [9]:
import numpy as np
import pandas as pd

from pathlib import Path
data_dir = Path('.')

import os
os.listdir(data_dir)

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_dark"

# enable the png render for viewing graphs in the github repo
pio.renderers.default = "notebook_connected"
#pio.renderers.default = "png"

from plotly.subplots import make_subplots

from scipy.optimize import curve_fit


In [10]:
world_data = pd.read_csv('covid_19_clean_complete.csv', parse_dates=['Date'])
world_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0.0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0.0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0.0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0.0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0.0


In [11]:
world_data.rename(columns={
                     'Province/State':'State',
                     'Country/Region':'Country',
                    }, inplace=True)

world_data['Active'] = world_data['Confirmed'] - world_data['Deaths'] - world_data['Recovered']

world_data['Country'] = world_data['Country'].replace('Mainland China', 'China')

world_data[['State']] = world_data[['State']].fillna('')

world_data[['Confirmed', 'Deaths', 'Recovered', 'Active']] = world_data[['Confirmed', 'Deaths', 'Recovered', 'Active']].fillna(0)

In [12]:
world_data.head()

Unnamed: 0,State,Country,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0.0,0.0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0.0,0.0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0.0,0.0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0.0,0.0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0.0,0.0


In [13]:
def plot_data(country):
    
    grouped_data = world_data[world_data['Country'] == country].reset_index()
    grouped_data = grouped_data.groupby('Date')['Date', 'Confirmed', 'Deaths'].sum().reset_index()
    
    fig = px.line(grouped_data, x="Date", y="Confirmed", 
              title=f"Confirmed Cases in {country} Over Time", 
              color_discrete_sequence=['#F61067'],
              height=500
             )
    fig.show()
    
    fig = px.line(grouped_data, x="Date", y="Deaths", 
              title=f"Deaths in {country} Over Time", 
              color_discrete_sequence=['#F61067'],
              height=500
             )
    fig.show()

In [14]:
plot_data('India')

In [15]:
def log_curve(x, k, x_0, ymax):
    return ymax / (1 + np.exp(-k*(x-x_0)))

def fit_logistic(country):
    grouped_data = world_data[world_data['Country'] == country].reset_index()
    grouped_data = grouped_data.groupby('Date')['Date', 'Confirmed', 'Deaths'].sum().reset_index()
    
    x_data = range(len(grouped_data.index))
    y_data = grouped_data['Confirmed']


    popt, pcov = curve_fit(log_curve, x_data, y_data, bounds=([0,0,0],np.inf), maxfev=5000)
    estimated_k, estimated_x_0, ymax= popt


    k = estimated_k
    x_0 = estimated_x_0
    y_fitted = log_curve(x_data, k, x_0, ymax)
    print(k, x_0, ymax)
    
    
    x_data = [i for i in x_data]
    x_datalong = [i for i in range(0,100)]
    y_fitted = log_curve(x_datalong, k, x_0, ymax)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x_data, y=y_data,
                        mode='lines',
                        name='Actual'))
    fig.add_trace(go.Scatter(x=x_datalong, y=y_fitted,
                        mode='lines+markers',
                        name='Predicted'))

    fig.show()
    

In [16]:
fit_logistic('India')

0.19919002883161258 69.88545496749137 3164.675280326663
