In [None]:
# !pip install fbprophet

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

### Data Gathering and Preprocessing:

In [None]:
def get_active_indian_case():
  positive_cases_url = "/kaggle/input/novel-corona-virus-2019-dataset/time_series_covid_19_confirmed.csv"
  df = pd.read_csv(positive_cases_url)
  # Filter data realted to India
  df = df[df['Country/Region'] == "India"]
  # remove unwanted columns
  melted_df = df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'])
  melted_df.rename(columns={"variable":"Date","value":"positive_count"},inplace=True)
  #Sum counts per day
  positive_per_day = melted_df.groupby("Date")['positive_count'].sum()
  positive_per_day = positive_per_day.reset_index()
  print(positive_per_day)
  positive_per_day = positive_per_day[['Date','positive_count']]
  return positive_per_day

positive_cases = get_active_indian_case()


In [None]:
positive_cases.sample(5)

So our data frame consists of 2 columns, the Date and Count of positive cases on that date.

The input to Prophet is always a dataframe with two columns: ds and y. The ds (datestamp) column should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp. The y column must be numeric, and represents the measurement we wish to forecast.

In [None]:
positive_cases.rename(columns={"Date":"ds","positive_count":"y"},inplace=True)

In [None]:
positive_cases['ds'] = pd.to_datetime(positive_cases['ds'])
positive_cases.sort_values(by='ds',inplace=True)

If you skim through the data you will realise that the dates are not in proper sequence when the dates are stored as “string” objects, because as string the date 2/19/20 is smaller than 2/2/20, which defies nature, right ? 

This will give us incorrect plots of positive_cases[‘y’]. Hence, we are going to convert the string object to DateTimeStamp and sort them.

In [None]:
ax = positive_cases.reset_index()['y'].plot(title="#PositiveCases Vs Day");
ax.set(xlabel="Date", ylabel="#PosiitveCases");

From the graph we can deduce that the trend is Uptrend (which is bad), and has no sign of seasonality and cycle (Thank god, we don’t want it to recur at regular intervals. PS: Go Corona Go! )

In [None]:
train = positive_cases[:-4]
test = positive_cases[-4:]

test = test.set_index("ds")
test = test['y']

In [None]:
# Model Initialize
from fbprophet import Prophet
m = Prophet()
m.fit(train)
'''
Predictions are then made on a dataframe with a column ds containing the dates for which a prediction is to be made. 
You can get a suitable dataframe that extends into the future a specified number of days using the helper method 
Prophet.make_future_dataframe. By default it will also include the dates from the history, so we will see the model fit as well.
'''
future_dates = m.make_future_dataframe(periods=10)
# Prediction
forecast =  m.predict(future_dates)
pd.plotting.register_matplotlib_converters()
ax = forecast.plot(x='ds',y='yhat',label='Our forecast',legend=True,figsize=(12,8))
test.plot(y='y',label='Test Positive Counts',legend=True,ax=ax)

So our model is not that accurate, our blue line doesn’t properly follow the trend. Real time series frequently have abrupt changes in their trajectories. By default, Prophet will automatically detect these changepoints and will allow the trend to adapt appropriately. However, if you wish to have finer control over this process (e.g., Prophet missed a rate change, or is overfitting rate changes in the history), then there are several input arguments you can use.


In [None]:
# Model Initialize
from fbprophet import Prophet
m = Prophet(changepoint_range=0.95, changepoint_prior_scale=0.1, changepoints=['2020-03-17','2020-03-31','2020-04-05'])
m.fit(train)

future_dates = m.make_future_dataframe(periods=10)
# Prediction
forecast =  m.predict(future_dates)

ax = forecast.plot(x='ds',y='yhat',label='Our forecast',legend=True,figsize=(12,8))
test.plot(y='y',label='Test Positive Counts',legend=True,ax=ax)#,xlim=['2020-04-01','2020-04-15'])
ax.set(xlabel="Date", ylabel="#PosiitveCases");

Wallah!! The test data points are close to forecast points.


### Doomsday ??

Prophet implements another growth trend model for g(t), a Nonlinear, Saturating Growth. It is represented in the form of the logistic growth model. It is suitable for problems where growth saturates as it reaches the upper or lower capacity. 

In our problem this capacity is our population, it makes sense that, number of covid counts will saturate when it reaches the population limit. Let's predict when we will reach Doomsday.

In [None]:
# Model Initialize
from fbprophet import Prophet
m = Prophet(growth = 'logistic')
population = 1380004385
train['cap'] = population
m.fit(train)
# Future Prediction
future_dates = m.make_future_dataframe(periods=120)
future_dates['cap'] = population
forecast =  m.predict(future_dates)
# Plotting
ax = forecast.plot(x='ds',y='yhat',label='Our forecast',legend=True,figsize=(12,8))
test.plot(y='y',label='Test Positive Counts',legend=True,ax=ax)
ax.set(xlabel="Date", ylabel="#PosiitveCases");

In [None]:
So we have approx 120 days off until Doomsday!!