In [None]:
# Setup libraries
import pandas as pd
import numpy as np
import datetime
from scipy.optimize import curve_fit

import plotly
import plotly.graph_objects as go
plotly.io.renderers.default = 'colab'

!pip install plotly>=4.0.0
!wget https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage -O /usr/local/bin/orca
!chmod +x /usr/local/bin/orca
!apt-get install xvfb libgtk2.0-0 libgconf-2-4
!pip install -U kaleido

In [2]:
dataframe = pd.read_csv('/content/owid-covid-data.csv')
dataframe.head(3)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,0.026,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,


In [3]:
vietnam = dataframe[dataframe['location'] == 'Vietnam']
us = dataframe[dataframe['location'] == 'United States']
india = dataframe[dataframe['location'] == 'India']
brazil = dataframe[dataframe['location'] == 'Brazil']
thailand = dataframe[dataframe['location'] == 'Thailand']
japan = dataframe[dataframe['location'] == 'Japan']
singapore = dataframe[dataframe['location'] == 'Singapore']
china = dataframe[dataframe['location'] == 'China']
philippines = dataframe[dataframe['location'] == 'Philippines']
malaysia = dataframe[dataframe['location'] == 'Malaysia']

countries = [vietnam, us, india, brazil, thailand, japan, singapore, china, philippines, malaysia]

In [40]:
# Plot new cases
fig = go.Figure()

for country in countries:
    fig.add_trace(go.Scatter(x=country.date, y=country.new_cases, mode='markers', 
                            name=country.iloc[0].location, marker_symbol='circle-open'))

fig.update_layout(yaxis_title='Cases',
                    title='New Cases')

fig.write_html('/content/new_cases.html')

fig.show()

In [7]:
# Plot total cases 
fig = go.Figure()

for country in countries:
    fig.add_trace(go.Scatter(x=country.date, y=country.total_cases, mode='markers', 
                            name=country.iloc[0].location, marker_symbol='circle-open'))

fig.update_layout(yaxis_title='Cases',
                    title='Total Cases')

fig.write_html('/content/total_cases.html')

fig.show()

In [None]:
# Define gaussian function
def g(x, a, b, c):
    return a * np.exp(-(x - b) ** 2 / 2 / c ** 2)

In [36]:
start_date = datetime.date(2020, 1, 1)

country = dataframe[dataframe['location'] == 'Singapore']

# Get last date in data
last_date = datetime.date(*(int(x) for x in country.iloc[-1].date.split('-')))

# Only fit to data from 01/01/2020 until last_date
country = country.tail((last_date - start_date).days + 1)

# Find peak
peak = np.argmax(np.array(
    np.convolve(country.new_cases.tolist(), np.ones(3)/3., mode='same')
))
peak = datetime.date(*(int(x) for x in country.iloc[peak].date.split('-')))

# If not peak yet, then estimate peak after 14 days
if peak + datetime.timedelta(days=4) > last_date:
    peak = (last_date - start_date).days + 14   
else:
    peak = (peak - start_date).days

# Fit a gaussian to data
y = np.convolve(country.new_cases.tolist(), np.ones(3)/3., mode='same')
x = np.arange(len(y))

sigma = np.sqrt(sum(y * (x - peak)**2) / sum(y))      

def gaus(x, a, x0, sigma):
    return a*np.exp(-(x-x0)**2 / (2*sigma**2))

popt, pcov = curve_fit(gaus, x, y, p0=[1, peak, sigma])

In [39]:
# Plot actual and predicted new cases
fig = go.Figure()

fig.add_trace(go.Scatter(x=list(gen(len(x))), y=y, mode='markers', 
                         name=f'Actual: {country.iloc[0].location}', marker_symbol='circle-open'))

# Generate date from time
def gen(peak):
    for i in range(peak): 
        date = start_date + datetime.timedelta(days=i)
        yield str(date.strftime('%Y-%m-%d'))

fig.add_trace(go.Scatter(x=list(gen(len(x)+14)), 
                         y=(gaus(np.arange(len(x)+14), *popt)),
                         mode='lines', name=f'Predict: {country.iloc[0].location}'))

fig.update_layout(yaxis_title='New Cases', 
                  title=f'Actual New Case & Prediction {country.iloc[0].location}')

fig.write_html(f'/content/prediction_{country.iloc[0].location}.html')

fig.show()