In [29]:
#importing required libraries
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.graph_objects as go
from fbprophet import Prophet
import pycountry
import plotly.express as px
from collections import namedtuple
from glob import glob


In [30]:
#Extracting data from all files in the folder

all_files = sorted(glob('csse_covid_19_daily_reports/*.csv'))
df = pd.DataFrame()
for file in all_files:
    df1=pd.read_csv(file)
    df1.rename(columns={'Last Update':'Date', 'Last_Update':'Date','Country/Region':'Country','Country_Region':'Country',
                                'Province_State':'Province/State'}, inplace=True)
    df= pd.concat([df,df1[['Date','Province/State','Country','Confirmed', 'Deaths', 'Recovered']]])
    

In [31]:
df.tail()

Unnamed: 0,Date,Province/State,Country,Confirmed,Deaths,Recovered
3954,2020-10-29 04:24:49,,West Bank and Gaza,51948.0,465.0,44985.0
3955,2020-10-29 04:24:49,,Western Sahara,10.0,1.0,8.0
3956,2020-10-29 04:24:49,,Yemen,2061.0,599.0,1366.0
3957,2020-10-29 04:24:49,,Zambia,16285.0,348.0,15559.0
3958,2020-10-29 04:24:49,,Zimbabwe,8320.0,242.0,7845.0


In [32]:
#Group By country ,date
df2 = df.groupby(["Date", "Country", "Province/State"])[['Date', 'Province/State', 'Country', 'Confirmed', 'Deaths', 'Recovered']].sum().reset_index()
df2

Unnamed: 0,Date,Country,Province/State,Confirmed,Deaths,Recovered
0,1/22/2020 17:00,Hong Kong,Hong Kong,0.0,0.0,0.0
1,1/22/2020 17:00,Macau,Macau,1.0,0.0,0.0
2,1/22/2020 17:00,Mainland China,Anhui,1.0,0.0,0.0
3,1/22/2020 17:00,Mainland China,Beijing,14.0,0.0,0.0
4,1/22/2020 17:00,Mainland China,Chongqing,6.0,0.0,0.0
...,...,...,...,...,...,...
99381,4/6/20 2:17,China,Zhejiang,1264.0,1.0,1230.0
99382,4/6/20 2:21,China,Hubei,67803.0,3212.0,64014.0
99383,4/6/20 2:36,China,Gansu,139.0,2.0,132.0
99384,4/6/20 5:30,China,Shanghai,536.0,6.0,389.0


In [34]:
#Filter by country : India
df.query('Country=="India"').groupby("Date")[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()

Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,1/30/20 16:00,1.0,0.0,0.0
1,1/31/2020 23:59,1.0,0.0,0.0
2,1/31/2020 8:15,1.0,0.0,0.0
3,2020-02-02T06:03:08,2.0,0.0,0.0
4,2020-02-03T21:43:02,30.0,0.0,0.0
...,...,...,...,...
252,3/29/20 23:08,1024.0,27.0,95.0
253,3/30/20 22:52,1251.0,32.0,102.0
254,4/2/20 23:25,2543.0,72.0,191.0
255,4/4/20 23:34,3082.0,86.0,229.0


In [35]:
# Day wise

df.groupby('Date').sum()

Unnamed: 0_level_0,Confirmed,Deaths,Recovered
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/22/2020 17:00,555.0,17.0,28.0
1/23/20 17:00,653.0,18.0,30.0
1/24/20 17:00,941.0,26.0,36.0
1/25/20 17:00,1438.0,42.0,39.0
1/26/20 16:00,2118.0,56.0,52.0
...,...,...,...
4/6/20 2:21,67803.0,3212.0,64014.0
4/6/20 2:36,139.0,2.0,132.0
4/6/20 5:30,536.0,6.0,389.0
4/6/20 6:20,373.0,5.0,57.0


In [36]:
#Transforming Data for Forecasting
#The input to Prophet is always a dataframe with two columns: ds and y. 
#The ds (datestamp) column should be of a format expected by Pandas, ideally YYYY-MM-DD for a date The y column must be numeric
confirmed_cases = df.groupby('Date').sum()['Confirmed'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
recovered_cases = df.groupby('Date').sum()['Recovered'].reset_index()


In [37]:
confirmed_cases.columns = ['ds','y']
confirmed_cases['ds'] = pd.to_datetime(confirmed['ds'])

In [41]:
m = Prophet(interval_width=0.95)
m.fit(confirmed)
future = m.make_future_dataframe(periods=30)
future_confirmed = future.copy() # for non-baseline predictions later on
future.tail()

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Unnamed: 0,ds
271,2020-10-19
272,2020-10-20
273,2020-10-21
274,2020-10-22
275,2020-10-23


In [42]:
#The predict method will assign each row in future a predicted value which it names yhat and uncertainty intervals.
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
271,2020-10-19,38412050.0,37701440.0,39154140.0
272,2020-10-20,38673380.0,37883640.0,39438320.0
273,2020-10-21,38942280.0,38087860.0,39785850.0
274,2020-10-22,39208210.0,38337790.0,40077980.0
275,2020-10-23,39485000.0,38555490.0,40395490.0


In [43]:
from fbprophet.plot import plot_plotly, plot_components_plotly

plot_plotly(m, forecast)

In [46]:
plot_components_plotly(m, forecast)
