### Estimate the confirmed covid_19-cases in India, based on global (country/region-wise) developments of confirmation rates

Updated on 20-03-08

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('../input/corona-virus-report/covid_19_clean_complete.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(['Country/Region','Date'])
df = df.reset_index(drop=True)

In [None]:
df = df.fillna(0)
df.groupby(['Country/Region','Province/State'])['Confirmed'].shift(1)

We use a 5-day-lag window to estimate the future development

In [None]:
np.unique(np.array(df['Country/Region']))

In [None]:
len(df.loc[df['Country/Region']=='India'])

In [None]:
df.columns

In [None]:
for k,v in df.groupby(['Country/Region','Province/State']):    
    for d in range(5):        
        df.loc[v.index, f'Confirmed_Lag_{d}'] = v['Confirmed'].shift(d+1)
df = df.fillna(0)

In [None]:
X_mask = [c for c in df.columns if 'Lag_' in c]
df[X_mask]

In [None]:
from lightgbm import LGBMRegressor    
model = LGBMRegressor()
model.fit(X=df[X_mask], y=df['Confirmed'])

### German Forecast

In [None]:
from datetime import timedelta
pred_steps = 23

history = df.loc[df['Country/Region']=='India']
history0 = history.iloc[-1]
pred_init = history0[X_mask].values
pred_init_confirmed = history0['Confirmed']

# Test out of sample input
print('History 0: ', pred_init)
pred_init = np.roll(pred_init, 1)
pred_init[0] = pred_init_confirmed
print('Pred 0: ', pred_init)

pred = np.zeros(pred_steps)
for d in range(pred_steps):
    y = model.predict(pred_init.reshape(1,-1))
    pred_init = np.roll(pred_init, 1)
    pred_init[0] = y    
    pred[d] = y
    
dt_rng = pd.date_range(start=history0['Date']+timedelta(days=1), end=history0['Date']+timedelta(days=pred_steps),freq='D').values
preds = pd.Series(data=pred, index=dt_rng, )

In [None]:
from matplotlib import pyplot as plt
fig, ax = plt.subplots(figsize=(16,5))

history.set_index(['Date'])['Confirmed'].plot(ax=ax, marker='o')
preds.plot(ax=ax, marker='o')

plt.tight_layout()