In [3]:
from sklearn.linear_model import HuberRegressor 
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from matplotlib import pyplot as plt
from datetime import datetime, timedelta
import json
import requests
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import tqdm
import warnings

warnings.simplefilter('ignore')

In [5]:
df = pd.read_csv("/Users/sonu/work/aiml/src/datasets/covid-19/time-series-19-covid-combined.csv")
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Recovered,Deaths
0,,Thailand,15.0,101.0,2020-01-22,2,0,0
1,,Thailand,15.0,101.0,2020-01-23,3,0,0
2,,Thailand,15.0,101.0,2020-01-24,5,0,0
3,,Thailand,15.0,101.0,2020-01-25,7,0,0
4,,Thailand,15.0,101.0,2020-01-26,8,2,0


In [47]:
k = 10000

df_filtered = df[df['Confirmed']>=k]

days_from_k_cases = []

for _, row in df_filtered.iterrows():
    row_time = pd.to_datetime(row['Date'])
    df_times =  df_filtered[df_filtered['Country/Region'] == row['Country/Region']]['Date']
    df_times = [pd.to_datetime(time) for time in df_times]
    days_from_k_cases.append(
        (row_time - min(df_times)).days + 1
    )

In [48]:
df_filtered = df_filtered.assign(days_from_k_case=days_from_k_cases)

In [49]:
X_raw, y_raw = [], []

for country, df_country in df_filtered.groupby(by='Country/Region'):
    X_raw += list(df_country['days_from_k_case'].values)
    y_raw += list(df_country['Confirmed'].values)
    
X = np.array(X_raw).reshape(-1, 1)
y = np.array(y_raw)

In [50]:
model = make_pipeline(PolynomialFeatures(2), HuberRegressor())
model.fit(X, np.log(y))
X_pred = np.arange(0, 100).reshape(-1, 1)
y_pred_log = model.predict(X_pred)
y_pred = np.exp(y_pred_log)
X_pred = X_pred.reshape(1, -1)[0]

In [51]:
fig = go.Figure(
    layout=go.Layout(
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)'
    )
)

for country, df_country in df_filtered.groupby(by='Country/Region'):
    fig.add_trace(
        go.Scatter(
            x=df_country['days_from_k_case'],
            y=df_country['Confirmed'],
            mode='lines',
            name=country
        )
    )

fig.add_trace(
    go.Scatter(
        x=X_pred,
        y=y_pred,
        mode='markers',
        name='Prediction'
    )
)