In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
## 1. DATA READING
train_ds = pd.read_csv('/kaggle/input/covid19-global-forecasting-week-1/train.csv')
test_ds = pd.read_csv('/kaggle/input/covid19-global-forecasting-week-1/test.csv')

In [None]:
## 2. RENAME COLUMNS AND SPLIT TRAINING DATA TO X AND y  
train_ds.rename(columns={'Province/State':'ProvinceState', 'Country/Region':'CountryRegion'}, inplace=True)
test_ds.rename(columns={'Province/State':'ProvinceState','Country/Region':'CountryRegion'}, inplace=True)
train_ds.head()

In [None]:
## 3. DEALING MISSING VALUES
cols_with_missing = [col for col in train_ds.columns if train_ds[col].isnull().any()]
countries_with_PSinfo = train_ds[train_ds['ProvinceState'].isna()==False]['CountryRegion'].unique()
#Only Province/State column contain missing values. Only Australia, Canada, China, Denmark, France, Netherlands, USA, UK contains Province/State info. 

In [None]:
## 4. ENCODE DATE
from datetime import date
d0 = [int(ele) for ele in min(train_ds.Date).split("-")]
d0 = date(d0[0], d0[1], d0[2])
for i in range(0, len(train_ds.Date)):
    train_ds.Date[i] = (date(int(train_ds.Date[i].split("-")[0]), int(train_ds.Date[i].split("-")[1]), int(train_ds.Date[i].split("-")[2])) - d0).days

In [None]:
## 5. PREPARING TEST DATA
test_ds_cp = test_ds.copy()
for i in range(0, len(test_ds.Date)):
    test_ds_cp.Date[i] = (date(int(test_ds.Date[i].split("-")[0]), int(test_ds.Date[i].split("-")[1]), int(test_ds.Date[i].split("-")[2])) - d0).days

In [None]:
## 6. CREATING POLYNOMIAL REGRESSION MODEL FOR EACH COUNTRY OR PROVINCE(if data contains info)
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
models_poly_cc = {}
models_lin_cc = {}
models_poly_f = {}
models_lin_f = {}
for country in train_ds.CountryRegion.unique():
    if country in countries_with_PSinfo:
        for country_with_PSinfo in countries_with_PSinfo:
            PSs = train_ds[train_ds['CountryRegion'] == country]['ProvinceState'].unique()
            for PS in PSs:
                PS_ds_cc = train_ds.loc[(train_ds.CountryRegion == country) & (train_ds.ProvinceState == PS)]
                PS_ds_f = train_ds.loc[(train_ds.CountryRegion == country) & (train_ds.ProvinceState == PS)]
                PS_X_cc = PS_ds_cc.iloc[:, 5:6].values
                PS_X_f = PS_ds_f.iloc[:, 5:6].values
                PS_y_cc = PS_ds_cc.iloc[:, 6].values 
                PS_y_f = PS_ds_f.iloc[:, 7].values
                poly_reg = PolynomialFeatures(degree = 7)
                X_cc_poly = poly_reg.fit_transform(PS_X_cc)
                poly_reg.fit(X_cc_poly, PS_y_cc)
                X_f_poly = poly_reg.fit_transform(PS_X_f)
                poly_reg_f = PolynomialFeatures(degree = 7)
                poly_reg_f.fit(X_f_poly, PS_y_f)
                linear_reg = LinearRegression()
                linear_reg.fit(X_cc_poly, PS_y_cc.reshape(-1,1))
                linear_reg_f = LinearRegression()
                linear_reg_f.fit(X_f_poly, PS_y_f.reshape(-1,1))
                models_poly_cc[PS] = poly_reg
                models_lin_cc[PS] = linear_reg
                models_poly_f[PS] =  poly_reg_f
                models_lin_f[PS] = linear_reg_f
    else:
        country_ds_cc = train_ds.loc[(train_ds.CountryRegion == country)]
        country_ds_f = train_ds.loc[(train_ds.CountryRegion == country)]
        country_X_cc = country_ds_cc.iloc[:, 5:6].values
        country_X_f = country_ds_f.iloc[:, 5:6].values
        country_y_cc = country_ds_cc.iloc[:, 6].values 
        country_y_f = country_ds_f.iloc[:, 7].values
        poly_reg = PolynomialFeatures(degree = 7)
        X_cc_poly = poly_reg.fit_transform(country_X_cc)
        poly_reg.fit(X_cc_poly, country_y_cc)
        X_f_poly = poly_reg.fit_transform(country_X_f)
        poly_reg_f = PolynomialFeatures(degree = 7)
        poly_reg_f.fit(X_f_poly, country_y_f)
        linear_reg = LinearRegression()
        linear_reg.fit(X_cc_poly, country_y_cc.reshape(-1,1))
        linear_reg_f = LinearRegression()
        linear_reg_f.fit(X_f_poly, country_y_f.reshape(-1,1))
        models_poly_cc[country] = poly_reg
        models_lin_cc[country] = linear_reg
        models_poly_f[country] =  poly_reg_f
        models_lin_f[country] = linear_reg_f

In [None]:
## 7. MAKING PREDICTIONS
test_ds_cp['ProvinceCountry'] = np.where(test_ds_cp.ProvinceState.isnull(), test_ds_cp.CountryRegion, test_ds_cp.ProvinceState)
test_ds_cp
test_preds_cc =  []
test_preds_f = []

for index, row in test_ds_cp.iterrows():
    poly_predict_cc = models_poly_cc[row['ProvinceCountry']].fit_transform([[row['Date']]])
    predict_cc = models_lin_cc[row['ProvinceCountry']].predict(poly_predict_cc)
    poly_predict_f = models_poly_f[row['ProvinceCountry']].fit_transform([[row['Date']]])
    predict_f = models_lin_f[row['ProvinceCountry']].predict(poly_predict_f)
    test_preds_cc.append(abs(predict_cc[0,0]))
    test_preds_f.append(abs(predict_f[0,0]))

In [None]:
## 8. WRITING TO CSV
test_ds_out = pd.DataFrame({'ForecastId' : test_ds['ForecastId'], 'ConfirmedCases' : test_preds_cc, 'Fatalities' : test_preds_f})
test_ds_out.to_csv('submission.csv', index = False)