# <center>COVID 19 ANALYSIS<center>

![title](Res/COVID_19.jpg)

### INTRODUCTION
+ In Junary of 2020 initial report came about posible outbreak within China provincy of Hubei. Since then outbreake has developed through other Chinas provenices and now through most of the continents. While dataset we have is considered as underestimed in terms of numbers, these analysis will present the best guess in effort to describe spread and of the virus and its rates. 

#### Libraries

In [1]:
%matplotlib inline
# Import dependencies
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
import plotly.figure_factory as ff
from plotly import subplots
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from datetime import date
from fbprophet import Prophet
import math

#### Preparing Data Sets

In [2]:
# Importing data
covid = pd.read_csv("C:\\Users\\slavi\\PycharmProjects\\COVID19\\Source\\covid_19_data.csv", parse_dates=['ObservationDate'])
conf_rw = pd.read_csv("C:\\Users\\slavi\\PycharmProjects\\COVID19\\Source\\time_series_covid_19_confirmed.csv")

# Formating covid dataset
covid = covid.drop('SNo', axis =1)
covid.columns = ['DATE', 'STATE','COUNTRY','LAST UPDATED','CONFIRMED','DEATH', 'RECOVERED']
covid['DATE'] = covid['DATE'].apply(pd.to_datetime).dt.normalize() 
covid['LAST UPDATED'] = covid['LAST UPDATED'].apply(pd.to_datetime).dt.normalize()

# Selecting subset of information needed in conf_rw dataframe
conf_rw = conf_rw.loc[:,['Province/State','Country/Region','Lat','Long']]
conf_rw.columns = ['STATE','COUNTRY','LAT','LONG']

# Creating covid_geo data frame that includes long and lat. 
covid_geo = pd.merge(covid,conf_rw[['STATE','LAT','LONG']],on='STATE', how='left')

covid.tail(5)

Unnamed: 0,DATE,STATE,COUNTRY,LAST UPDATED,CONFIRMED,DEATH,RECOVERED
2694,2020-02-28,"Seattle, WA",US,2020-02-09,1.0,0.0,1.0
2695,2020-02-28,"Tempe, AZ",US,2020-02-25,1.0,0.0,1.0
2696,2020-02-28,"Lackland, TX (From Diamond Princess)",US,2020-02-24,0.0,0.0,0.0
2697,2020-02-28,"Omaha, NE (From Diamond Princess)",US,2020-02-24,0.0,0.0,0.0
2698,2020-02-28,"Travis, CA (From Diamond Princess)",US,2020-02-24,0.0,0.0,0.0


### Analysis of COVID 19 around the world
+ In order to calculate true death rate, we have to work only with cases that are conlcuded ether recovered or passed away. Deviding number of dead with total onfirmed cases is not accruate since we do not know the outcome of all cases that are still in process. Instead, this would make more sense: death/(death+recovered).

In [3]:
# Death rate analysis on global level
D_vs_R = covid.copy()
D_vs_R['REC'] = 'REC'
D_vs_R['DTH'] = 'DTH'
recovered = pd.pivot_table(D_vs_R.dropna(subset=['RECOVERED']), index='DATE', 
                         columns='REC', values='RECOVERED', aggfunc=np.sum).fillna(method='ffill').reset_index()

death = pd.pivot_table(D_vs_R.dropna(subset=['DEATH']), index='DATE', 
                         columns='DTH', values='DEATH', aggfunc=np.sum).fillna(method='ffill').reset_index()
D_vs_R_df = pd.merge(recovered,death,on='DATE')
D_vs_R_df['RATIO'] = round(D_vs_R_df['DTH'] / (D_vs_R_df['DTH'] + D_vs_R_df['REC'])*100)
D_vs_R_df.head()

Unnamed: 0,DATE,REC,DTH,RATIO
0,2020-01-22,28.0,17.0,38.0
1,2020-01-23,30.0,18.0,38.0
2,2020-01-24,36.0,26.0,42.0
3,2020-01-25,39.0,42.0,52.0
4,2020-01-26,52.0,56.0,52.0


+ Based on these analysis we see that initailly deathrate reached out up to 57% which is most likely due to quicker fatality than recovery rate since individuals in critical condition could pass away within first 7 days while it take several week to recover. 

In [32]:
# ploting Current Deat Rate around the world

cur_ratio = D_vs_R_df[D_vs_R_df['DATE'] == D_vs_R_df['DATE'].max()]
fig_dr = go.Figure()
fig_dr.add_trace(go.Scatter(x=D_vs_R_df.DATE, y=D_vs_R_df.RATIO, mode="lines+markers", line_color='Red', name = 'Current Death Rate' + ' ' + f"{int(cur_ratio['RATIO']):,d}%"))
fig_dr.update_layout(template="ggplot2",title_text = '<b>Death Rate % Around The World </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans", color='black'), showlegend=True) 
fig_dr.update_layout(
    legend=dict(
        x=.75,
        y=0.95,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="Black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Red",
        borderwidth=2
    ))
fig_dr.show()

#### Confirmed Vs. Recovered Vs Death case around the world

In [5]:
# Spread, death and recovered over the time around the world
covid_all = covid.groupby('DATE')['CONFIRMED', 'DEATH', 'RECOVERED'].sum().reset_index()
covid_all= covid_all[covid_all['DATE'] > '2020-01-22']


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [6]:
# Plotting Values for Confirmed, deaths and reocvered cases
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"colspan": 2}, None],[{}, {}]],
    subplot_titles=(f"{int(covid_all.CONFIRMED.max()):,d}" +' ' + "CONFIRMED",
                    f"{int(covid_all.RECOVERED.max()):,d}" +' ' +"RECOVERED",
                    f"{int(covid_all.DEATH.max()):,d}" +' ' +"DEATHS"))

fig.add_trace(go.Bar(x=covid_all['DATE'], y=covid_all['CONFIRMED'], text = covid_all['CONFIRMED'],
                     marker_color='Orange'), row=1, col=1)

fig.add_trace(go.Bar(x=covid_all['DATE'], y=covid_all['RECOVERED'], marker_color='Green'), row=2, col=1)

fig.add_trace(go.Bar(x=covid_all['DATE'], y=covid_all['DEATH'], marker_color='Red'), row=2, col=2)

fig.update_traces(marker_line_color='rgb(8,48,107)',marker_line_width=1.5, opacity=0.8,
                  texttemplate='%{text:.2s}', textposition='outside')

fig['layout']['yaxis1'].update(title='Count', range=[0, covid_all['CONFIRMED'].max() + 10000])
fig['layout']['yaxis2'].update(title='Count', range=[0, covid_all['RECOVERED'].max() + 10000])
fig['layout']['yaxis3'].update(title='Count', range=[0, covid_all['DEATH'].max() + 1000])
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')

fig.update_layout(template="ggplot2",title_text = '<b>CurrentConfirmed Vs. Death Vs Recovered Around The World </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=False)
fig.show()

#### Outbreak Forecasting with fbProphete
+ Using fbProphet algorythm we will atempt to predict total number of cases around the world in next 7 days. We see that upper limit is around 116k where lower end is at 103k.

In [7]:
#Runing fbprophet algorythm on confirmed cases outside of MainLand China. Forecasting 7 days.
covid_nc =  covid[(covid['COUNTRY'] != 'Mainland China') & (covid['COUNTRY'] != 'Others')]
all_df = covid.groupby('DATE')['CONFIRMED', 'DEATH', 'RECOVERED'].sum().reset_index()
all_df = all_df[all_df['DATE'] > '2020-01-22']

df_prophet = all_df.loc[:,["DATE", 'CONFIRMED']]
df_prophet.columns = ['ds','y']
m_d = Prophet(
    yearly_seasonality=False,
    weekly_seasonality = False,
    daily_seasonality = False,
    seasonality_mode = 'additive')
m_d.fit(df_prophet)
future_d = m_d.make_future_dataframe(periods=7)
fcst_daily = m_d.predict(future_d)
fcst_daily[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
39,2020-03-02,102603.612549,95511.729565,110152.659082
40,2020-03-03,105396.708267,98481.951866,113050.879034
41,2020-03-04,108189.803985,100870.657268,115445.678215
42,2020-03-05,110982.899703,103374.754032,118089.8572
43,2020-03-06,113775.995422,106337.895021,121011.123311


In [8]:
# Plotting the predictions
fig_prpht = go.Figure()
trace1 = {
  "fill": None, 
  "mode": "markers",
  "marker_size": 10,
  "name": "Confirmed", 
  "type": "scatter", 
  "x": df_prophet.ds, 
  "y": df_prophet.y
}
trace2 = {
  "fill": "tonexty", 
  "line": {"color": "red"}, 
  "mode": "lines", 
  "name": "upper_band", 
  "type": "scatter", 
  "x": fcst_daily.ds, 
  "y": fcst_daily.yhat_upper
}
trace3 = {
  "fill": "tonexty", 
  "line": {"color": "dimgray"}, 
  "mode": "lines", 
  "name": "lower_band", 
  "type": "scatter", 
  "x": fcst_daily.ds, 
  "y": fcst_daily.yhat_lower
}
trace4 = {
  "line": {"color": "blue"}, 
  "mode": "lines+markers",
  "marker_size": 4,
  "name": "prediction", 
  "type": "scatter", 
  "x": fcst_daily.ds, 
  "y": fcst_daily.yhat
}
data = [trace1, trace2, trace3, trace4]
layout = {
  "title": "Confirmed Cases Time Series", 
  "xaxis": {
      "title": "Dates", 
    "ticklen": 5, 
    "gridcolor": "rgb(255, 255, 255)", 
    "gridwidth": 2, 
    "zerolinewidth": 1
  }, 
  "yaxis": {
    "title": "Confirmed Cases", 
    "ticklen": 5, 
    "gridcolor": "rgb(255, 255, 255)", 
    "gridwidth": 2, 
    "zerolinewidth": 1
  }, 
}
fig_prpht = go.Figure(data=data, layout=layout)
fig_prpht.update_layout(template="ggplot2",title_text = '<b>Forecastng of Spread around the world </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=True)
fig_prpht.update_layout(
    legend=dict(
        x=0.01,
        y=.99,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="Black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Orange",
        borderwidth=2
    ))
fig_prpht.show()

In [9]:
# Creating geographical dataframe in order to plot location
world_geo = covid_geo[covid_geo['DATE'] == covid_geo['DATE'].max()]
world_geo = world_geo.groupby('COUNTRY')['CONFIRMED','DEATH','RECOVERED'].sum().reset_index()
world_geo_df = pd.merge(world_geo,covid_geo[['COUNTRY','LAT','LONG']],on='COUNTRY', how='left')
world_geo_df = world_geo_df.drop_duplicates()

# Ploting confirmed cases in China using free source librarry kepler.gl 
import openpyxl
import geopandas as gpd
from keplergl import KeplerGl
world_geo_conf = world_geo_df.loc[:,['CONFIRMED','LAT','LONG']]  
gdf = gpd.GeoDataFrame(world_geo_conf, geometry = gpd.points_from_xy(world_geo_conf.LONG, world_geo_conf.LAT))
# Run config file created for China
%run C:\Users\slavi\PycharmProjects\COVID19\world_hex_config.py
map_1 = KeplerGl()
map_1.add_data(data = gdf, name='Construction')
map_1.config = config
#map_1 = KeplerGl(height=400, data={'China Recovered': gdf}, config=config)
map_1


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': 'kgu4okl', 'type': …

In [64]:
world_geo_conf.dropna()
map = folium.Map(location=[30, 100], tiles = "CartoDB dark_matter", zoom_start=4.2)
for i in range(0,len(world_geo_conf)):
    folium.Circle(location=[world_geo_conf.iloc[i]['LAT'], world_geo_conf.iloc[i]['LONG']],radius=(math.sqrt(world_geo_conf.iloc[i]['CONFIRMED'])*1200+2 ),
      color='crimson',
      fill=True,
      fill_color='crimson'
   ).add_to(map)


map

ValueError: Location values cannot contain NaNs.

In [75]:
nan_rows = world_geo_conf[world_geo_conf.isnull().any(1)]
nan_rows

Unnamed: 0,CONFIRMED,LAT,LONG,geometry
392,23.0,,,POINT (nan nan)
431,69.0,,,POINT (nan nan)
2065,14.0,,,POINT (nan nan)
4296,1104.0,,,POINT (nan nan)
5994,161.0,,,POINT (nan nan)
6110,92.0,,,POINT (nan nan)
7840,46.0,,,POINT (nan nan)
11282,705.0,,,POINT (nan nan)
18334,62.0,,,POINT (nan nan)


In [10]:
# Save map_1 config to a file if changed
#with open('world_hex_config.py', 'w') as f:
   #f.write('config = {}'.format(map_1.config))

### Analysis On China
+ Based on analysis on death rate in China we can conlude that Mainland China drives death rate number due to vastly predominant number of the cases. 

In [11]:
# Death rate analysis on global level
china_drr = covid[(covid['COUNTRY'] == 'Mainland China')]
china_drr['REC'] = 'REC'
china_drr['DTH'] = 'DTH'
ch_recovered = pd.pivot_table(china_drr.dropna(subset=['RECOVERED']), index='DATE', 
                         columns='REC', values='RECOVERED', aggfunc=np.sum).fillna(method='ffill').reset_index()

ch_death = pd.pivot_table(china_drr.dropna(subset=['DEATH']), index='DATE', 
                         columns='DTH', values='DEATH', aggfunc=np.sum).fillna(method='ffill').reset_index()
china_drr_df = pd.merge(ch_recovered,ch_death,on='DATE')
china_drr_df['RATIO'] = round(china_drr_df['DTH'] / (china_drr_df['DTH'] + china_drr_df['REC'])*100)
china_drr_df.head()

cur_ch_ratio = china_drr_df[china_drr_df['DATE'] == china_drr_df['DATE'].max()]
# ploting Current Deat Rate around the world
fig_dr = go.Figure()
fig_dr.add_trace(go.Scatter(x=china_drr_df.DATE, y=china_drr_df.RATIO, mode="lines+markers", line_color='Red', name = 'Current Death Rate' + ' ' + f"{int(cur_ch_ratio['RATIO']):,d}%"))
fig_dr.update_layout(template="ggplot2",title_text = '<b>Death Rate % In Mainland China </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans", color='black'), showlegend=True) 
fig_dr.update_layout(
    legend=dict(
        x=.75,
        y=0.95,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="Black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Red",
        borderwidth=2
    ))
fig_dr.show()

### Confirmed Vs. Recovered Vs Death in Mainland China

In [12]:
# Spread, death and recovered over the time outside of MainLand China
covid_ch =  covid[(covid['COUNTRY'] == 'Mainland China')]
ch_df = covid_ch.groupby('DATE')['CONFIRMED', 'DEATH', 'RECOVERED'].sum().reset_index()
ch_df = ch_df[ch_df['DATE'] > '2020-01-22']

fig = make_subplots(rows=1, cols=3, subplot_titles=(f"{int(ch_df.CONFIRMED.max()):,d}" +' ' + "CONFIRMED",
                                                    f"{int(ch_df.DEATH.max()):,d}" +' ' + "DEATHS",
                                                    f"{int(ch_df.RECOVERED.max()):,d}" +' ' + "RECOVERED"))

trace1 = go.Scatter(
                x=ch_df['DATE'],
                y=ch_df['CONFIRMED'],
                name="CONFIRMED",
                line_color='orange',
                opacity=0.8)
trace2 = go.Scatter(
                x=ch_df['DATE'],
                y=ch_df['DEATH'],
                name="DEATH",
                line_color='dimgray',
                opacity=0.8)

trace3 = go.Scatter(
                x=ch_df['DATE'],
                y=ch_df['RECOVERED'],
                name="RECOVERED",
                line_color='deepskyblue',
                opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.update_layout(template="ggplot2",title_text = '<b>Spread Vs. Death Vs Recovered within Mainland China </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=False)
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



### Distribution of cases in Chinas provincies

In [13]:
# Isolating the max values based on last date for china
china_cur  = covid[(covid['COUNTRY'] == 'Mainland China')]
china_cur_st = china_cur[china_cur['DATE'] == china_cur['DATE'].max()]
china_cur_st = china_cur_st.groupby('STATE')['CONFIRMED','DEATH','RECOVERED'].sum().reset_index()
china_cur_st.head()



Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,STATE,CONFIRMED,DEATH,RECOVERED
0,Anhui,990.0,6.0,821.0
1,Beijing,410.0,7.0,257.0
2,Chongqing,576.0,6.0,422.0
3,Fujian,296.0,1.0,235.0
4,Gansu,91.0,2.0,82.0


In [14]:
# Ploting distribution between provinces in China
fig = px.treemap(china_cur_st.sort_values(by='CONFIRMED', ascending=False).reset_index(drop=True), 
                 path=["STATE"], values="CONFIRMED", 
                 title='Number of Confirmed Cases in US Cities',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.update_layout(template="ggplot2",title_text = '<b>Current confirmed cases within China`s Provincies </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=True)
fig.show()

### Ploting locations of confirmed cases on map

In [15]:
# Creating geographical dataframe in order to plot location
china_all  = covid_geo[(covid_geo['COUNTRY'] == 'Mainland China')]
china_geo = china_all[china_all['DATE'] == china_all['DATE'].max()]
china_geo = china_geo.groupby('STATE')['CONFIRMED','DEATH','RECOVERED'].sum().reset_index()
china_geo = pd.merge(china_geo,china_all[['STATE','LAT','LONG']],on='STATE', how='left')
china_geo = china_geo.drop_duplicates()
china_geo.head()

# Ploting confirmed cases in China using free source librarry kepler.gl 
import openpyxl
import geopandas as gpd
from keplergl import KeplerGl
china_geo_rec = china_geo.loc[:,['RECOVERED','LAT','LONG']]  
gdf = gpd.GeoDataFrame(china_geo_rec, geometry = gpd.points_from_xy(china_geo_rec.LONG, china_geo_rec.LAT))
# Run config file created for China
%run C:\Users\slavi\PycharmProjects\COVID19\ch_hex_config.py
map_1 = KeplerGl()
map_1.add_data(data = gdf, name='Construction')
map_1.config = config
#map_1 = KeplerGl(height=400, data={'China Recovered': gdf}, config=config)
# Save map_1 config to a file if changed
#with open('ch_hex_config.py', 'w') as f:
   #f.write('config = {}'.format(map_1.config))
map_1

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md



Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': 'twoc80m', 'type': …

### Analysis of outside Mainland China

In [16]:
# Death rate analysis on global level
oc_drr = covid[(covid['COUNTRY'] != 'Mainland China')]
oc_drr['REC'] = 'REC'
oc_drr['DTH'] = 'DTH'
oc_recovered = pd.pivot_table(oc_drr.dropna(subset=['RECOVERED']), index='DATE', 
                         columns='REC', values='RECOVERED', aggfunc=np.sum).fillna(method='ffill').reset_index()

oc_death = pd.pivot_table(oc_drr.dropna(subset=['DEATH']), index='DATE', 
                         columns='DTH', values='DEATH', aggfunc=np.sum).fillna(method='ffill').reset_index()
oc_drr_df = pd.merge(oc_recovered,oc_death,on='DATE')
oc_drr_df['RATIO'] = round(oc_drr_df['DTH'] / (oc_drr_df['DTH'] + oc_drr_df['REC'])*100)

oc_ratio = oc_drr_df[D_vs_R_df['DATE'] == oc_drr_df['DATE'].max()]
# ploting Current Deat Rate around the world
fig_dr = go.Figure()
fig_dr.add_trace(go.Scatter(x=oc_drr_df.DATE, y=oc_drr_df.RATIO, mode="lines+markers", line_color='Red', name = 'Current Death Rate' + ' ' + f"{int(oc_ratio['RATIO']):,d}%"))
fig_dr.update_layout(template="ggplot2",title_text = '<b>Death Rate % outside Mainland China </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans", color='black'), showlegend=True) 
fig_dr.update_layout(
    legend=dict(
        x=.75,
        y=0.95,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="Black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Red",
        borderwidth=2
    ))
fig_dr.show()

### Death rate by Country outside of Mainland China
+ Using previous logic to calculate death ratio per country, we see that at the beginig of outbrake death rate is much higher because there is lag of week or two between death and recovered. It seems that death apears within first week or two and while individuals taking 3 or more weeks to recover. 
+ If the coutnry did not report any recovered cases while reported death cases, the death rate will be 100% at that moment since we still dont know the outcome of all cases. 

In [17]:
# Death rate analysis by country
oc_dr_s = covid[(covid['COUNTRY'] != 'Mainland China')]
oc_dr_s = oc_dr_s[oc_dr_s['DATE'] == oc_dr_s['DATE'].max()]
oc_dr_s = oc_dr_s.groupby('COUNTRY')['CONFIRMED','DEATH','RECOVERED'].sum().reset_index()
oc_dr_s['RATIO'] = round(oc_dr_s['DEATH'] / (oc_dr_s['DEATH'] + oc_dr_s['RECOVERED'])*100)

# Ploting confirmed cased outside of Mainland China
oc_dr_s= oc_dr_s.sort_values(by='RATIO', ascending= False)
fig = go.Figure()
fig.add_trace(go.Bar(x=oc_dr_s.COUNTRY,
                     y=oc_dr_s.RATIO,            
                     opacity=0.8,
                     text = oc_dr_s.RATIO,
                     textposition='outside',
                     marker={'color':'orange'}))
fig.update_layout(title={'text': 'Countries outside of MainLand China with their current death rate',
                         'y':0.95, 'x':0.5,'xanchor': 'center', 'yanchor': 'top'},
                         showlegend=False, xaxis_title_text='Country',
                         yaxis_title_text='Count', bargap=0.3)
fig.update_yaxes(range = [0, oc_dr_s['RATIO'].max() + 10])
fig.update_traces(marker_color='Red', marker_line_color='orange',
                  marker_line_width=1, opacity=0.6   )
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



### Recovery Rate by coutry outside Mainland China
+ Based on analysis below there is several countries who successfully recovering their infected cases with no death reported so far. 

In [18]:
# Death rate analysis by country
oc_drr_s = covid[(covid['COUNTRY'] != 'Mainland China')]
oc_drr_s = oc_drr_s[oc_drr_s['DATE'] == oc_drr_s['DATE'].max()]
oc_drr_s = oc_drr_s.groupby('COUNTRY')['CONFIRMED','DEATH','RECOVERED'].sum().reset_index()
oc_drr_s['RATIO'] = round(oc_drr_s['RECOVERED'] / (oc_drr_s['DEATH'] + oc_drr_s['RECOVERED'])*100)

# Ploting confirmed cased outside of Mainland China
oc_drr_s= oc_drr_s.sort_values(by='RATIO', ascending= False)
fig = go.Figure()
fig.add_trace(go.Bar(x=oc_drr_s.COUNTRY,
                     y=oc_drr_s.RATIO,            
                     opacity=0.8,
                     text = oc_drr_s.RATIO,
                     textposition='outside',
                     marker={'color':'orange'}))
fig.update_layout(title={'text': 'Countries outside of MainLand China with their current recovery rate',
                         'y':0.95, 'x':0.5,'xanchor': 'center', 'yanchor': 'top'},
                         showlegend=False, xaxis_title_text='Country',
                         yaxis_title_text='Count', bargap=0.3)
fig.update_yaxes(range = [0, oc_drr_s['RATIO'].max() + 10])
fig.update_traces(marker_color='green', marker_line_color='orange',
                  marker_line_width=1, opacity=0.6   )
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [19]:
# Spread, death and recovered over the time outside of MainLand China
covid_nc =  covid[(covid['COUNTRY'] != 'Mainland China')]
sotoc = covid_nc.groupby('DATE')['CONFIRMED', 'DEATH', 'RECOVERED'].sum().reset_index()
sotoc = sotoc[sotoc['DATE'] > '2020-01-22']

fig = make_subplots(rows=1, cols=3, subplot_titles=(f"{int(sotoc.CONFIRMED.max()):,d}" +' ' + "CONFIRMED",
                                                    f"{int(sotoc.DEATH.max()):,d}" +' ' + "DEATHS",
                                                    f"{int(sotoc.RECOVERED.max()):,d}" +' ' + "RECOVERED"))

trace1 = go.Scatter(
                x=sotoc['DATE'],
                y=sotoc['CONFIRMED'],
                name="CONFIRMED",
                line_color='orange',
                opacity=0.8)
trace2 = go.Scatter(
                x=sotoc['DATE'],
                y=sotoc['DEATH'],
                name="DEATH",
                line_color='dimgray',
                opacity=0.8)

trace3 = go.Scatter(
                x=sotoc['DATE'],
                y=sotoc['RECOVERED'],
                name="RECOVERED",
                line_color='deepskyblue',
                opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.update_layout(template="ggplot2",title_text = '<b>Spread Vs. Death Vs Recovered outside Chine </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=False)
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [20]:
# Isolating max values based on last date for locations outside of Mainland China
current = covid[covid['DATE'] == covid['DATE'].max()]
current = current.groupby('COUNTRY')['CONFIRMED','DEATH','RECOVERED'].sum().reset_index()
nonchina  = current[(current['COUNTRY'] != 'Mainland China')]

# Ploting confirmed cased outside of Mainland China
nonchina= nonchina.sort_values(by='CONFIRMED', ascending= False)
fig = go.Figure()
fig.add_trace(go.Bar(x=nonchina.COUNTRY,
                     y=nonchina.CONFIRMED,            
                     opacity=0.8,
                     text = nonchina.CONFIRMED,
                     textposition='outside',
                     marker={'color':'orange'}))
fig.update_layout(title={'text': 'Current count of confirmed cases outside of MainLand China',
                         'y':0.95, 'x':0.5,'xanchor': 'center', 'yanchor': 'top'},
                         showlegend=False, xaxis_title_text='Country',
                         yaxis_title_text='Count', bargap=0.3)
fig.update_yaxes(range = [0, nonchina['CONFIRMED'].max() + 100])
fig.update_traces(marker_color='orange', marker_line_color='red',
                  marker_line_width=1, opacity=0.6   )
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [21]:
# US breakdown by state with heat map
import plotly.express as px
nonchina = nonchina.sort_values(by='CONFIRMED', ascending=True).reset_index(drop=True)
fig = px.bar(nonchina, x='COUNTRY', y='CONFIRMED',
             hover_data=['COUNTRY', 'CONFIRMED'], color='CONFIRMED',text = nonchina.CONFIRMED,
             labels={'pop':'Confirmed Cases in US'}, height=600)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=5, uniformtext_mode='hide')
fig.show()

In [22]:
# Display number of confirmed cased outside of MainLand China
fig_conf = go.Figure()
fig_conf.add_trace(go.Scatter(x=sotoc.DATE, y=sotoc.CONFIRMED, mode="lines+markers", line_color='Orange', name = f"{int(sotoc.CONFIRMED.max()):,d}" + ' ' + 'Confirmed'))
fig_conf.update_layout(template="ggplot2",title_text = '<b>Confiermed cases outside of MainLand China </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans", color='black'), showlegend=True) 
fig_conf.update_layout(
    legend=dict(
        x=0.01,
        y=.98,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="Black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Orange",
        borderwidth=2
    ))
fig_conf.show()


In [23]:
# Display Recover vs Deaths outside of MainLand China
fig_rd = go.Figure()
fig_rd.add_trace(go.Scatter(x=sotoc.DATE, y=sotoc.RECOVERED, mode="lines+markers", name=f"{int(sotoc.RECOVERED.max()):,d}" + ' ' + "RECOVERED",line_color='deepskyblue'))
fig_rd.add_trace(go.Scatter(x=sotoc.DATE, y=sotoc.DEATH, mode="lines+markers", name=f"{int(sotoc.DEATH.max()):,d}" + ' ' + "DEATHS",line_color='dimgray'))
fig_rd.update_layout(template="ggplot2",title_text = '<b>Recovered Vs Death outside of Main China </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=True)
fig_rd.update_layout(
    legend=dict(
        x=0.01,
        y=.98,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="Black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="dimgray",
        borderwidth=2
    ))
fig_rd.show()

### Analysis on US 

In [24]:
# Analyzing US
us_cur = covid[(covid['COUNTRY'] == 'US')]
us_cur_st = us_cur[us_cur['DATE'] == us_cur['DATE'].max()]
us_cur_st = us_cur_st.groupby('STATE')['CONFIRMED','DEATH','RECOVERED'].sum().reset_index()

# Ploting the distribution of cases over the US
fig = px.treemap(us_cur_st.sort_values(by='CONFIRMED', ascending=False).reset_index(drop=True), 
                 path=["STATE"], values="CONFIRMED", 
                 title='Number of Confirmed Cases in US Cities',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.update_layout(template="ggplot2",title_text = '<b>Current breakdown of confirmed cases within US </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=True)
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



### Confirmed Vs. Recovered Vs. Death in US

In [25]:
# Spread, death and recovered over the time for US
us_cur = us_cur.groupby('DATE')['CONFIRMED', 'DEATH', 'RECOVERED'].sum().reset_index()

fig = make_subplots(rows=1, cols=3, subplot_titles=(f"{int(us_cur.CONFIRMED.max()):,d}" +' ' + "CONFIRMED",
                                                    f"{int(us_cur.DEATH.max()):,d}" +' ' + "DEATHS",
                                                    f"{int(us_cur.RECOVERED.max()):,d}" +' ' + "RECOVERED"))

trace1 = go.Scatter(
                x=us_cur['DATE'],
                y=us_cur['CONFIRMED'],
                name="CONFIRMED",
                line_color='orange',
                opacity=0.8)
trace2 = go.Scatter(
                x=us_cur['DATE'],
                y=us_cur['DEATH'],
                name="DEATH",
                line_color='dimgray',
                opacity=0.8)

trace3 = go.Scatter(
                x=us_cur['DATE'],
                y=us_cur['RECOVERED'],
                name="RECOVERED",
                line_color='deepskyblue',
                opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.update_layout(template="ggplot2",title_text = '<b>Spread Vs. Death Vs Recovered in US </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=False)
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



### Analysis on Italy

In [26]:
# Itally Analysis
italy_cur = covid[(covid['COUNTRY'] == 'Italy')]
italy_cur = italy_cur.groupby('DATE')['CONFIRMED', 'DEATH', 'RECOVERED'].sum().reset_index()
italy_cur.tail()

# Ploting the values for Italy
fig_rd = go.Figure()
fig_rd.add_trace(go.Scatter(x=italy_cur.DATE, y=italy_cur.RECOVERED, mode="lines+markers", name="RECOVERED",line_color='green'))
fig_rd.add_trace(go.Scatter(x=italy_cur.DATE, y=italy_cur.DEATH, mode="lines+markers", name="DEATHS",line_color='dimgray'))
fig_rd.add_trace(go.Scatter(x=italy_cur.DATE, y=italy_cur.CONFIRMED, mode="lines+markers", name="CONFIRMED",line_color='orange'))
fig_rd.update_layout(template="ggplot2",title_text = '<b>Current Confirmed Vs Recovered Vs Deaths in Itally </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=True)
fig_rd.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [27]:
# Spread, death and recovered over the time for Italy
fig = make_subplots(rows=1, cols=3, subplot_titles=(f"{int(italy_cur.CONFIRMED.max()):,d}" +' ' + "CONFIRMED",
                                                    f"{int(italy_cur.DEATH.max()):,d}" +' ' + "DEATHS",
                                                    f"{int(italy_cur.RECOVERED.max()):,d}" +' ' + "RECOVERED"))

trace1 = go.Scatter(
                x=italy_cur['DATE'],
                y=italy_cur['CONFIRMED'],
                name="CONFIRMED",
                line_color='orange',
                opacity=0.8)
trace2 = go.Scatter(
                x=italy_cur['DATE'],
                y=italy_cur['DEATH'],
                name="DEATH",
                line_color='dimgray',
                opacity=0.8)

trace3 = go.Scatter(
                x=italy_cur['DATE'],
                y=italy_cur['RECOVERED'],
                name="RECOVERED",
                line_color='deepskyblue',
                opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 1, 3)
fig.update_layout(template="ggplot2",title_text = '<b>Spread Vs. Death Vs Recovered in Italy </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='black'), showlegend=False)
fig.show()

In [34]:
map = folium.Map(location=[30, 100], tiles = "CartoDB dark_matter", zoom_start=4.2)
folium.circle(location = [])
map
