In [None]:
# IMPORTS
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math

In [None]:
%%time
# LOAD TRAIN DATA
train = pd.read_csv('/kaggle/input/covid19-global-forecasting-week-2/train.csv')

In [None]:
train.tail()

In [None]:
c = list()
for i,x in enumerate(train['Province_State']):
    if x is not np.nan:
        c.append(x+ ' - '+train['Country_Region'][i])
    else:
        c.append(train['Country_Region'][i])
        
print(len(c))

In [None]:
# SCRUB DATA
junk =['Id','Date','Province_State']
train.drop(junk, axis=1, inplace=True)

In [None]:
train['Country_Region'] = c

In [None]:
train['ConfirmedCases'] = train['ConfirmedCases'].astype(int) 
train['Fatalities'] = train['Fatalities'].astype(int) 

In [None]:
train.head()

In [None]:
country_list = train['Country_Region'][0::70]
print(len(country_list))
print(country_list)

In [None]:
def prep_data (train):
    # PREP TRAIN DATA 
    X_train = train[train.ConfirmedCases >0]
    X_train.reset_index(inplace = True, drop = True) 
    
    train.reset_index(inplace = True, drop = True) 
    
    return (X_train, train)

In [None]:
def Calculate_Table ( X_train ):
    # CALCULATE EXPANSION TABLE
    diff_conf, conf_old = [], 0 
    diff_fat, fat_old = [], 0
    dd_conf, dc_old = [], 0
    dd_fat, df_old = [], 0
    ratios = []
    for row in X_train.values:
        diff_conf.append(row[1]-conf_old)
        conf_old = row[1]
        diff_fat.append(row[2]-fat_old)
        fat_old = row[2]
        dd_conf.append(diff_conf[-1]-dc_old)
        dc_old = diff_conf[-1]
        dd_fat.append(diff_fat[-1]-df_old)
        df_old = diff_fat[-1]
        ratios.append(fat_old / conf_old)
        ratio = fat_old / conf_old
        

    return diff_conf, conf_old, diff_fat, fat_old, dd_conf, dc_old, dd_fat, df_old, ratios, ratio

In [None]:
def populate_df_features(X_train,diff_conf, diff_fat, dd_conf, dd_fat, ratios):    
    # POPULATE DATAFRAME FEATURES
    pd.options.mode.chained_assignment = None  # default='warn'
    X_train['diff_confirmed'] = diff_conf
    X_train['diff_fatalities'] = diff_fat
    X_train['dd_confirmed'] = dd_conf
    X_train['dd_fatalities'] = dd_fat
    X_train['ratios'] = ratios
    return X_train

In [None]:
def fill_nan ( variable):
    if math.isnan(variable):
        return 0
    else:
        return variable

In [None]:
def Cal_Series_Avg(X_train,ratio):
    # CALCULATE SERIES AVERAGES
    d_c = fill_nan( X_train.diff_confirmed[X_train.diff_confirmed != 0].mean() )
    dd_c = fill_nan( X_train.dd_confirmed[X_train.dd_confirmed != 0].mean() )
    d_f = fill_nan( X_train.diff_fatalities[X_train.diff_fatalities != 0].mean() )
    dd_f = fill_nan( X_train.dd_fatalities[X_train.dd_fatalities != 0].mean() )
    rate = fill_nan( X_train.ratios[X_train.ratios != 0].mean() )
    rate = max(rate,ratio)
    return d_c, dd_c, d_f, dd_f, rate

In [None]:
def apply_taylor(train, d_c, dd_c, d_f, dd_f, rate):
    # ITERATE TAYLOR SERIES
    
    pred_c, pred_f = list(train.ConfirmedCases.loc[57:69].astype(int)), list(train.Fatalities.loc[57:69].astype(int))
    #pred_c, pred_f = list(train.ConfirmedCases.loc[57:58].astype(int)), list(train.Fatalities.loc[57:58].astype(int))
    for i in range(1, 31):
        pred_c.append(int( ( train.ConfirmedCases[69] + d_c*i + 0.5*dd_c*(i**2)) ) )
        pred_f.append(pred_c[-1]*rate )
    return pred_c, pred_f

In [None]:
def apply_taylor2(train, d_c, dd_c, d_f, dd_f, rate):
    # ITERATE TAYLOR SERIES
    
    #pred_c, pred_f = list(train.ConfirmedCases.loc[57:69].astype(int)), list(train.Fatalities.loc[57:69].astype(int))
    pred_c, pred_f = list(train.ConfirmedCases.loc[57:58].astype(int)), list(train.Fatalities.loc[57:58].astype(int))
    for i in range(1, 42):
        pred_c.append(int( ( train.ConfirmedCases[58] + d_c*i + 0.5*dd_c*(i**2)) ) )
        pred_f.append(pred_c[-1]*rate )
    return pred_c, pred_f

In [None]:
pc = []
pf = []
pc2 = []
pf2 = []
pred_c = []
pred_f = []
pred_c2 = []
pred_f2 = []
for i,country in enumerate(country_list):
    country_data = train[train['Country_Region'] == country]
    X_train, country_data = prep_data(country_data)

    if ( len(X_train) > 0):
        diff_conf, conf_old, diff_fat, fat_old, dd_conf, dc_old, dd_fat, df_old, ratios, ratio = Calculate_Table(X_train)

        X_train = populate_df_features(X_train,diff_conf, diff_fat, dd_conf, dd_fat, ratios)

        d_c, dd_c, d_f, dd_f, rate = Cal_Series_Avg(X_train, ratio)
        #print(type(np.nan))
        pred_c, pred_f = apply_taylor(country_data, d_c, dd_c, d_f, dd_f, rate)
        pred_c2, pred_f2 = apply_taylor2(country_data, d_c, dd_c, d_f, dd_f, rate)
        
    else:
        #print('--Zeroing--')
        pred_c = list(np.zeros(43))
        pred_f = list(np.zeros(43))
        pred_c2 = list(np.zeros(43))
        pred_f2 = list(np.zeros(43))
        
    #print(country, len(pred_c))
    #print("------------")
    pc += pred_c
    pf += pred_f
    pc2 += pred_c2
    pf2 += pred_f2

In [None]:
len(pc), len(pc2)

In [None]:
pc = list(map(int, pc))
pf = list(map(int, pf))
pc2 = list(map(int, pc2))
pf2 = list(map(int, pf2))

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize= (15,6))
plt.plot(pc[0:1000])
plt.plot(pc2[0:1000])
plt.title("Confirmed")
plt.show()

In [None]:
plt.figure(figsize= (15,6))
plt.plot(pf[0:1000])
plt.plot(pf2[0:1000])
plt.title("Fatalities")
plt.show()

In [None]:
# WRITE SUBMISSION
my_submission = pd.DataFrame({'ForecastId': list(range(1,len(pc2)+1)), 'ConfirmedCases': pc2, 'Fatalities': pf2})
print(my_submission)
my_submission.to_csv('submission.csv', index=False)