<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#COVID19-Local-US-CA-Forecasting-(Week-1)" data-toc-modified-id="COVID19-Local-US-CA-Forecasting-(Week-1)-1">COVID19 Local US-CA Forecasting (Week 1)</a></span></li><li><span><a href="#Technical-specification" data-toc-modified-id="Technical-specification-2">Technical specification</a></span><ul class="toc-item"><li><span><a href="#1.-Hardware-overview" data-toc-modified-id="1.-Hardware-overview-2.1">1. Hardware overview</a></span></li><li><span><a href="#2.-Software-overview" data-toc-modified-id="2.-Software-overview-2.2">2. Software overview</a></span></li></ul></li><li><span><a href="#Required-libraries" data-toc-modified-id="Required-libraries-3">Required libraries</a></span></li><li><span><a href="#Reading-data" data-toc-modified-id="Reading-data-4">Reading data</a></span></li><li><span><a href="#Data-understanding-and-wrangling" data-toc-modified-id="Data-understanding-and-wrangling-5">Data understanding and wrangling</a></span></li><li><span><a href="#Modeling" data-toc-modified-id="Modeling-6">Modeling</a></span><ul class="toc-item"><li><span><a href="#Regression" data-toc-modified-id="Regression-6.1">Regression</a></span></li><li><span><a href="#ARIMA" data-toc-modified-id="ARIMA-6.2">ARIMA</a></span><ul class="toc-item"><li><span><a href="#Confirmed-Cases" data-toc-modified-id="Confirmed-Cases-6.2.1">Confirmed Cases</a></span></li><li><span><a href="#Fatalities" data-toc-modified-id="Fatalities-6.2.2">Fatalities</a></span></li></ul></li></ul></li><li><span><a href="#Prediction" data-toc-modified-id="Prediction-7">Prediction</a></span></li></ul></div>

# Technical specification

For this project all the data cleaning and all the model building including training, validation and testing was done in Python 3.7.3. Python needs to have the following additional packages installed:

•	Python <br>
•	Numpy <br>
•	Pandas<br>
•	chart_studio<br>
•	plotly<br>


# Required libraries

In [None]:
# used for data analysis
import pandas as pd
import numpy as np


# Data visualization libraries
# 1. matplotlib
import matplotlib.pyplot as plt

# 2. plotly
import cufflinks as cf
import plotly.offline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.subplots import make_subplots
import plotly.graph_objects as go

cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)
init_notebook_mode(connected=True)

from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf 
from statsmodels.tsa.seasonal import seasonal_decompose 
#from pmdarima import auto_arima                        
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse

In [None]:
import warnings
warnings.filterwarnings('ignore')

# Reading data

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
# Loading the train dataset
test = pd.read_csv('/kaggle/input/covid19-local-us-ca-forecasting-week-1/ca_test.csv')

# Loading the test dataset
train = pd.read_csv('/kaggle/input/covid19-local-us-ca-forecasting-week-1/ca_train.csv')

# Data understanding and wrangling

In [None]:
train.tail()

In [None]:
print('The train data has',train.shape[0],'rows.')
print('The tarin data has',train.shape[1],'columns.')

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=train['Date'], y=train['ConfirmedCases'],
                    mode='lines+markers',
                    name='Confirmed Cases'))
fig.add_trace(go.Scatter(x=train['Date'], y=train['Fatalities'],
                    mode='lines+markers',
                    name='Fatalities'))
fig.update_layout(
    title="Confirmed Cases and Fatalities in CA",
    xaxis_title="Date",
    yaxis_title="Count",
)

fig.show()

In [None]:
df = train[train['Date'] > '2020-03-08']
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['Date'], y=np.log(df['ConfirmedCases']+1),
                    mode='lines+markers',
                    name='Confirmed Cases'))
fig.add_trace(go.Scatter(x=df['Date'], y=np.log(df['Fatalities']+1),
                    mode='lines+markers',
                    name='Fatalities'))
fig.update_layout(
    title="Confirmed Cases and Fatalities in CA",
    xaxis_title="Date",
    yaxis_title="Count",
)

fig.show()

In [None]:
# Creating time series data
train_data = train[['Date','ConfirmedCases','Fatalities']]
#test_data = test[['Date','ConfirmedCases','Fatalities']]

train_data.index = pd.to_datetime(train_data['Date'])
train_data = train_data[['ConfirmedCases','Fatalities']]
#test_data.index = pd.to_datetime(test_data['Date'])

# Modeling

## Regression

In [None]:
# Linear regression


## ARIMA
### Confirmed Cases

In [None]:
import statsmodels.api as sm

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(train['ConfirmedCases'], lags=40, ax=ax1) # 
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(train['ConfirmedCases'], lags=40, ax=ax2)# , lags=40

In [None]:
from pmdarima.arima import auto_arima
stepwise_model_cc = auto_arima(train['ConfirmedCases'], start_p=1, start_q=1,
                           max_p=3, max_q=3, m=12,
                           start_P=0, seasonal=False,
                           d=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True)
print(stepwise_model_cc.aic())

stepwise_model_cc.fit(train['ConfirmedCases'])

In [None]:
arima_model = ARIMA(train_data['ConfirmedCases'], order = (1,1,0))
arima_result = arima_model.fit()
arima_result.summary()

In [None]:
arima_pred_conf = arima_result.predict(start = '2020-03-12', end = '2020-04-23', typ="levels").rename("ARIMA Predictions")

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=arima_pred_conf.index, y=arima_pred_conf.values,
                    mode='lines+markers',
                    name='Prediction'))
fig.add_trace(go.Scatter(x=train['Date'], y=train['ConfirmedCases'],
                    mode='lines+markers',
                    name='Actual'))
fig.update_layout(
    title="Confirmed Cases in CA",
    xaxis_title="Date",
    yaxis_title="Count",
)

fig.show()

### Fatalities

In [None]:
import statsmodels.api as sm

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(train['Fatalities'], lags=40, ax=ax1) # 
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(train['Fatalities'], lags=40, ax=ax2)# , lags=40

In [None]:
from pmdarima.arima import auto_arima
stepwise_model_f = auto_arima(train['Fatalities'], start_p=1, start_q=1,
                           max_p=3, max_q=3, m=12,
                           start_P=0, seasonal=False,
                           d=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True)
print(stepwise_model_f.aic())

In [None]:
arima_model = ARIMA(train_data['Fatalities'], order = (1,1,0))
arima_result = arima_model.fit()
arima_result.summary()

In [None]:
arima_pred_fatal = arima_result.predict(start = '2020-03-12', end = '2020-04-23', typ="levels").rename("ARIMA Predictions")

# Prediction

In [None]:
frame = { 'ConfirmedCases': arima_pred_conf, 'Fatalities': arima_pred_fatal } 
df = pd.DataFrame(frame) 

test.index = pd.to_datetime(test['Date'])
final = pd.merge(test,df,left_index=True,right_index=True,how='left')
final.head()

In [None]:
final[['ForecastId','ConfirmedCases','Fatalities']].to_csv('submission.csv',index=False)