In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from fbprophet import Prophet

In [None]:
# fix for broken plots after prophet import
pd.plotting.register_matplotlib_converters()

In [None]:
train_data = pd.read_csv('./train.csv')
test_data = pd.read_csv('./test.csv')

combined = [train_data, test_data]

# EDA

In [None]:
train_data.head()

In [None]:
train_data.describe()

In [None]:
total_cases = train_data.groupby(['Date']).sum().drop(['Id'],axis=1)
total_cases.head()

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(17,7))
total_cases.plot(ax=ax1)
ax1.set_title("Global confirmed cases", size=13)
ax1.set_ylabel("Number of cases", size=13)
ax1.set_xlabel("Date", size=13)
ax1.set_xticklabels(labels=total_cases.index, rotation=45)
total_cases['Fatalities'].plot(ax=ax2, color='orange')
ax2.set_title("Fatalities", size=13)
ax2.set_ylabel("Number of cases", size=13)
ax2.set_xlabel("Date", size=13)
ax2.set_xticklabels(labels=total_cases.index, rotation=45)

In [None]:
total_cases_without_china = train_data[train_data['Country_Region']!='China'].groupby(['Date']).sum().drop(['Id'],axis=1)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(17,7))
total_cases_without_china.plot(ax=ax1)
fig.suptitle('Excluding china', fontsize=16)
ax1.set_title("Confirmed cases and fatalities", size=13)
ax1.set_ylabel("Number of cases", size=13)
ax1.set_xlabel("Date", size=13)
ax1.set_xticklabels(labels=total_cases.index, rotation=45)
total_cases_without_china['Fatalities'].plot(ax=ax2, color='orange')
ax2.set_title("Fatalities", size=13)
ax2.set_ylabel("Number of cases", size=13)
ax2.set_xlabel("Date", size=13)
ax2.set_xticklabels(labels=total_cases.index, rotation=45)

# Convert Train data to Time Series

In [None]:
#setting Datetime as index for the df -> this makes a df to a time series

ts = train_data
ts.index = pd.to_datetime(ts.Date)
ts

In [None]:
ts = ts[ts['Country_Region'] == 'Germany']
ts = ts[['ConfirmedCases','Fatalities']]
ts

In [None]:
ts.plot(grid=True)

In [None]:
ts.pct_change().plot(grid=True)

In [None]:
# showing correlation between cases and fatalities.

plt.scatter(ts['ConfirmedCases'],ts['Fatalities'])
plt.title("Slightly exponential trend of Fatalities and Confirmed Cases")
plt.xlabel("Confirmed Cases")
plt.ylabel("Fatalities")
plt.show()

# Regression and correlation between Fatalities and ConfirmedCases

In [None]:
ts.corr()

In [None]:
ts.pct_change().corr()

In [None]:
#adding constant so ols calculates intercept

model = sm.OLS(ts['ConfirmedCases'], sm.add_constant(ts['Fatalities'])).fit()

In [None]:
sm.add_constant(ts['ConfirmedCases'])

In [None]:
model.summary()

# Univariate TS Forecasting using Facebook Prohet library

In [None]:
model = Prophet()
prophet_ts = pd.DataFrame({'ds': ts.index, 'y': ts['ConfirmedCases']})
model.fit(prophet_ts)

In [None]:
future = model.make_future_dataframe(periods=5)
forecast = model.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

In [None]:
model.plot(forecast)

In [None]:
model.plot_components(forecast)