In [1]:
# essential libraries
import json
import random
from urllib.request import urlopen

# storing and anaysis
import numpy as np
import pandas as pd

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly
plotly.offline.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff
import calmap
import folium

# color pallette
cnf = '#393e46' # confirmed - grey
dth = '#ff2e63' # death - red
rec = '#21bf73' # recovered - cyan
act = '#fe9801' # active case - yellow

# converter
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()   

# hide warnings
import warnings
warnings.filterwarnings('ignore')

# html embedding
from IPython.display import Javascript
from IPython.core.display import display, HTML
pd.options.display.float_format = '{:.2f}'.format

***

# Initial Preprocessing and Data Loading

 Code from [Kaggle notebook](https://www.kaggle.com/imdevskp/covid-19-analysis-viz-prediction-comparisons)

In [24]:
# importing datasets
cases = ['Confirmed', 'Deaths', 'Recovered', 'Active']
full_table = pd.read_csv('data\covid_19_clean_complete.csv', 
                         parse_dates=['Date'])
full_table.head()
countries_effected = full_table['Country/Region'].unique().tolist()

In [25]:
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']
full_table['Country/Region'] = full_table['Country/Region'].replace('Mainland China', 'China')
full_table['Province/State'] = full_table[['Province/State']].fillna('')
full_table[cases] = full_table[cases].fillna(0)

In [26]:
# latest
full_latest = full_table[full_table['Date'] == max(full_table['Date'])].reset_index()
china_latest = full_latest[full_latest['Country/Region']=='China']
row_latest = full_latest[full_latest['Country/Region']!='China']

# latest condensed
full_latest_grouped = full_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
china_latest_grouped = china_latest.groupby('Province/State')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
row_latest_grouped = row_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

# EDA work

  Try to determine ...

In [5]:
temp_f = full_latest_grouped.sort_values(by='Confirmed', ascending=False)
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active
0,China,81003,3203,67017,10783
1,Italy,24747,1809,2335,20603
2,Iran,13938,724,4590,8624
3,"Korea, South",8162,75,510,7577
4,Spain,7798,289,517,6992
5,Germany,5795,11,46,5738
6,France,4513,91,12,4410
7,US,3498,63,12,3423
8,Switzerland,2200,14,4,2182
9,Norway,1221,3,1,1217


In [6]:
filter_df = full_table.groupby(['Date', 'Country/Region'])['Active','Deaths'].max().reset_index()
filter_df['Date'] = filter_df['Date'].dt.strftime('%m/%d/%Y')
filter_df['Active_size'] = filter_df['Active'].pow(0.3)
filter_df['Death_size'] = filter_df['Deaths'].pow(0.3)

In [7]:
fig = px.scatter_geo(filter_df,
                     title='Active Cases Over Time',
                     locations="Country/Region",
                     locationmode='country names', 
                     color="Active",
                     size='Active_size',
                     hover_name="Country/Region", 
                     range_color= [0, max(filter_df['Active'])],
                     projection="equirectangular",
                     animation_frame="Date",
                     width = 900)
fig.show()

In [8]:
fig = px.scatter_geo(filter_df,
                     title='Deaths Over Time',
                     locations="Country/Region",
                     locationmode='country names', 
                     color="Deaths",
                     size='Death_size',
                     hover_name="Country/Region", 
                     range_color= [0, max(filter_df['Deaths'])],
                     projection="equirectangular",
                     animation_frame="Date",
                     width = 900)
fig.show()

In [9]:
def get_split_fig(co_list):
    # Reference code from https://www.kaggle.com/khoongweihao/covid-19-novel-coronavirus-eda-forecasting-cases/notebook
    for country in co_list:
        gdf = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
        gdf = gdf.reset_index()
        temp = gdf[gdf['Country/Region']==country].reset_index()
        temp = temp.melt(id_vars='Date', value_vars=['Confirmed', 'Deaths', 'Recovered'],var_name='Case', value_name='Count')
        fig = px.bar(temp, x="Date", y="Count", color='Case', facet_col="Case",title=country, color_discrete_sequence=[cnf, dth, rec])
        fig.show()

In [10]:
get_split_fig(["China","Italy","Australia","India"])

***

# Prediction Using prophet

### Combined prediction

In [30]:
confirmed = full_table.groupby('Date').sum()['Confirmed'].reset_index()
deaths = full_table.groupby('Date').sum()['Deaths'].reset_index()
recovered = full_table.groupby('Date').sum()['Recovered'].reset_index()

In [31]:
confirmed.columns = ['ds','y']
confirmed['ds'] = pd.to_datetime(confirmed['ds'])

In [None]:
!pip install fbprophet

In [32]:
from fbprophet import Prophet

Unnamed: 0,ds,y
0,2020-01-22,554
1,2020-01-23,652
2,2020-01-24,939
3,2020-01-25,1432
4,2020-01-26,2113
