COVID_19 ANALYSIS

Analysis Part Two - Diagnosis Rate Research

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
#Read Data
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
df = pd.read_csv(url)

In [3]:
# Process Data
df.drop(columns = ['Province/State', 'Lat', 'Long'], inplace=True)
df = df.groupby('Country/Region', sort=True).sum()
df.drop(df.columns.difference(['Country/Region','6/30/20']), axis=1, inplace=True)
df.reset_index(inplace=True)
df = df.rename(columns={"6/30/20": "Total Confirm Number"})

In [4]:
df.head(20)

Unnamed: 0,Country/Region,Total Confirm Number
0,Afghanistan,31517
1,Albania,2535
2,Algeria,13907
3,Andorra,855
4,Angola,284
5,Antigua and Barbuda,69
6,Argentina,64530
7,Armenia,25542
8,Australia,7920
9,Austria,17766


In [5]:
#Get Totally Confirmed Number of China
df_China = df.loc[df['Country/Region']== 'China']
df_China

Unnamed: 0,Country/Region,Total Confirm Number
36,China,84785


In [6]:
#Get Totally Confirmed Number of Japan
df_Japan= df.loc[df['Country/Region']== 'Japan']
df_Japan

Unnamed: 0,Country/Region,Total Confirm Number
87,Japan,18615


In [7]:
#Get Totally Confirmed Number of Italy
df_Italy= df.loc[df['Country/Region']== 'Italy']
df_Italy

Unnamed: 0,Country/Region,Total Confirm Number
85,Italy,240578


In [8]:
#Get Totally Confirmed Number of Spain
df_Spain= df.loc[df['Country/Region']== 'Spain']
df_Spain

Unnamed: 0,Country/Region,Total Confirm Number
158,Spain,249271


In [9]:
#Get Totally Confirmed Number of UK
df_UK= df.loc[df['Country/Region']== 'United Kingdom']
df_UK

Unnamed: 0,Country/Region,Total Confirm Number
178,United Kingdom,314160


In [10]:
#Get Totally Confirmed Number of Germany
df_Germany= df.loc[df['Country/Region']== 'Germany']
df_Germany

Unnamed: 0,Country/Region,Total Confirm Number
66,Germany,195418


In [11]:
#Get Totally Confirmed Number of Australia
df_Australia= df.loc[df['Country/Region']== 'Australia']
df_Australia

Unnamed: 0,Country/Region,Total Confirm Number
8,Australia,7920


In [12]:
#Get Totally Confirmed Number of US

In [13]:
#Read US Data
url_US = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
df_US = pd.read_csv(url_US)

In [14]:
#Process US Data
df_US.drop(columns = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State', 'Combined_Key','Lat', 'Long_'], inplace=True)
df_US = df_US.groupby('Country_Region', sort=True).sum()
df_US.drop(df_US.columns.difference(['Country_Region','6/30/20']), axis=1, inplace=True)
df_US.reset_index(inplace=True)
df_US = df_US.rename(columns={"Country_Region":"Country/Region", "6/30/20": "Total Confirm Number"})

In [15]:
df_US

Unnamed: 0,Country/Region,Total Confirm Number
0,US,2635603


In [16]:
#Create Data table for Countries That I Want to Do Research
df_table = pd.DataFrame(columns=['Country/Region', 'Total Confirm Number', 'Population'])
df_table = df_table.append(df_China)
df_table = df_table.append(df_Japan)
df_table = df_table.append(df_Italy)
df_table = df_table.append(df_Spain)
df_table = df_table.append(df_UK)
df_table = df_table.append(df_Germany)
df_table = df_table.append(df_Australia)
df_table = df_table.append(df_US)
df_table

Unnamed: 0,Country/Region,Total Confirm Number,Population
36,China,84785,
87,Japan,18615,
85,Italy,240578,
158,Spain,249271,
178,United Kingdom,314160,
66,Germany,195418,
8,Australia,7920,
0,US,2635603,


In [17]:
df_table.reset_index(drop=True, inplace=True)
df_table

Unnamed: 0,Country/Region,Total Confirm Number,Population
0,China,84785,
1,Japan,18615,
2,Italy,240578,
3,Spain,249271,
4,United Kingdom,314160,
5,Germany,195418,
6,Australia,7920,
7,US,2635603,


In [18]:
#Get All those Country's Population 2020 from Wiki https://www.worldometers.info/world-population/population-by-country/

#China: 1,439,323,776
#US: 331,002,651
#Japan: 126,476,461
#Italy: 60,461,826
#Spain: 46,754,778
#UK: 67,886,011
#Germany: 83,783,942
#Australia: 25,499,884

In [19]:
df_table.loc[df.index[0], 'Population'] = 1439323776
df_table.loc[df.index[1], 'Population'] = 126476461
df_table.loc[df.index[2], 'Population'] = 60461826
df_table.loc[df.index[3], 'Population'] = 46754778
df_table.loc[df.index[4], 'Population'] = 67886011
df_table.loc[df.index[5], 'Population'] = 83783942
df_table.loc[df.index[6], 'Population'] = 25499884
df_table.loc[df.index[7], 'Population'] = 331002651
df_table

Unnamed: 0,Country/Region,Total Confirm Number,Population
0,China,84785,1439323776
1,Japan,18615,126476461
2,Italy,240578,60461826
3,Spain,249271,46754778
4,United Kingdom,314160,67886011
5,Germany,195418,83783942
6,Australia,7920,25499884
7,US,2635603,331002651


In [20]:
df_table['Diagnosis Rate'] = (df_table['Total Confirm Number']/df_table['Population'])
df_table

Unnamed: 0,Country/Region,Total Confirm Number,Population,Diagnosis Rate
0,China,84785,1439323776,5.89061e-05
1,Japan,18615,126476461,0.000147182
2,Italy,240578,60461826,0.00397901
3,Spain,249271,46754778,0.00533146
4,United Kingdom,314160,67886011,0.00462776
5,Germany,195418,83783942,0.0023324
6,Australia,7920,25499884,0.00031059
7,US,2635603,331002651,0.00796248


In [21]:
# Set CSS properties for th elements in dataframe
th_props = [('font-size', '12px'), ('text-align', 'center'), ('font-weight', 'bold'), ('color', '#000000'), ('background-color', '#F5F5F5')]

# Set CSS properties for td elements in dataframe
td_props = [('font-size', '12px'), ('color', '#000000')]

# Set Style
styles = [dict(selector="th", props=th_props), dict(selector="td", props=td_props)]

cm = sns.light_palette("red", as_cmap=True)
df_tb1 = df_table.style.background_gradient(cmap=cm, subset=['Total Confirm Number', 'Population','Diagnosis Rate']).format({'Diagnosis Rate': "{:.8%}"}).set_table_styles(styles)
df_tb1

Unnamed: 0,Country/Region,Total Confirm Number,Population,Diagnosis Rate
0,China,84785,1439323776,0.00589061%
1,Japan,18615,126476461,0.01471815%
2,Italy,240578,60461826,0.39790065%
3,Spain,249271,46754778,0.53314551%
4,United Kingdom,314160,67886011,0.46277576%
5,Germany,195418,83783942,0.23324040%
6,Australia,7920,25499884,0.03105896%
7,US,2635603,331002651,0.79624831%


In [22]:
#Get China Hubei Data
#Population of Hubei Province 2019: 59,270,000

In [23]:
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
df_h1 = pd.read_csv(url)
df_h1.drop(columns = ['Lat', 'Long'], inplace=True)
df_h2 = df_h1.loc[df_h1['Country/Region']== 'China']
df_h2 = df_h2.loc[df_h2['Province/State']== 'Hubei']
df_h2.replace({'China': 'China/Hubei'}, inplace=True)
df_h2

Unnamed: 0,Province/State,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,6/22/20,6/23/20,6/24/20,6/25/20,6/26/20,6/27/20,6/28/20,6/29/20,6/30/20,7/1/20
62,Hubei,China/Hubei,444,444,549,761,1058,1423,3554,3554,...,68135,68135,68135,68135,68135,68135,68135,68135,68135,68135


In [24]:
df_h2.drop(df_h2.columns.difference(['Country/Region','6/30/20']), axis=1, inplace=True)
df_h2.reset_index(inplace=True, drop=True)
df_h2 = df_h2.rename(columns={"6/30/20": "Total Confirm Number"})
df_h2

Unnamed: 0,Country/Region,Total Confirm Number
0,China/Hubei,68135


In [25]:
#Get US New York State Data
#Population of New York State 2019: 19,450,000

In [26]:
url_US = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
df_n1 = pd.read_csv(url_US)
df_n1.drop(columns = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Combined_Key','Country_Region','Lat', 'Long_'], inplace=True)
df_n1 = df_n1.groupby('Province_State', sort=True).sum()
df_n1.reset_index(inplace=True)
df_n1

Unnamed: 0,Province_State,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,6/22/20,6/23/20,6/24/20,6/25/20,6/26/20,6/27/20,6/28/20,6/29/20,6/30/20,7/1/20
0,Alabama,0,0,0,0,0,0,0,0,0,...,30454,31097,32064,33206,34183,35083,35441,37175,38045,38962
1,Alaska,0,0,0,0,0,0,0,0,0,...,758,760,789,813,833,851,880,901,937,975
2,American Samoa,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Arizona,0,0,0,0,1,1,1,1,1,...,54599,58378,60190,63281,66659,70124,73920,74545,79228,84105
4,Arkansas,0,0,0,0,0,0,0,0,0,...,16083,16678,17375,18062,18740,19310,19818,20257,20777,21197
5,California,0,0,0,0,2,2,2,2,2,...,184765,191477,196024,201112,206844,210692,215487,223646,231418,238681
6,Colorado,0,0,0,0,0,0,0,0,0,...,30689,30877,31139,31463,31780,32006,32290,32494,32698,33012
7,Connecticut,0,0,0,0,0,0,0,0,0,...,45782,45899,45913,45994,46059,46206,46303,46362,46514,46572
8,Delaware,0,0,0,0,0,0,0,0,0,...,10820,10847,10889,10980,11017,11091,11226,11376,11474,11510
9,Diamond Princess,0,0,0,0,0,0,0,0,0,...,49,49,49,49,49,49,49,49,49,49


In [27]:
df_n2 = df_n1.loc[df_n1['Province_State']== 'New York']
df_n2.drop(df_n2.columns.difference(['Province_State','6/30/20']), axis=1, inplace=True)
df_n2 = df_n2.rename(columns={"Province_State":"Country/Region", "6/30/20": "Total Confirm Number"})
df_n2.reset_index(inplace=True, drop=True)
df_n2.replace({'New York': 'US/New York'}, inplace=True)
df_n2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Country/Region,Total Confirm Number
0,US/New York,393454


In [28]:
#Replace China with Hubei_China, Replace US with New York_US
#Create New Table

In [29]:
df_table = df_table.append(df_h2)
df_table = df_table.append(df_n2)
df_table.reset_index(inplace=True, drop=True)

In [30]:
df_table.loc[df.index[8], 'Population'] = 59270000
df_table.loc[df.index[9], 'Population'] = 19450000
df_table

Unnamed: 0,Country/Region,Total Confirm Number,Population,Diagnosis Rate
0,China,84785,1439323776,5.89061e-05
1,Japan,18615,126476461,0.000147182
2,Italy,240578,60461826,0.00397901
3,Spain,249271,46754778,0.00533146
4,United Kingdom,314160,67886011,0.00462776
5,Germany,195418,83783942,0.0023324
6,Australia,7920,25499884,0.00031059
7,US,2635603,331002651,0.00796248
8,China/Hubei,68135,59270000,
9,US/New York,393454,19450000,


In [31]:
df_table.drop([0,7], inplace=True)
df_table.reset_index(inplace=True, drop=True)
df_table['Diagnosis Rate'] = (df_table['Total Confirm Number']/df_table['Population'])

In [32]:
# Set CSS properties for th elements in dataframe
th_props = [('font-size', '12px'), ('text-align', 'center'), ('font-weight', 'bold'), ('color', '#000000'), ('background-color', '#F5F5F5')]

# Set CSS properties for td elements in dataframe
td_props = [('font-size', '12px'), ('color', '#000000')]

# Set Style
styles = [dict(selector="th", props=th_props), dict(selector="td", props=td_props)]

cm = sns.light_palette("red", as_cmap=True)
df_tb2 = df_table.style.background_gradient(cmap=cm, subset=['Total Confirm Number', 'Population','Diagnosis Rate']).format({'Diagnosis Rate': "{:.8%}"}).set_table_styles(styles)
df_tb2

Unnamed: 0,Country/Region,Total Confirm Number,Population,Diagnosis Rate
0,Japan,18615,126476461,0.01471815%
1,Italy,240578,60461826,0.39790065%
2,Spain,249271,46754778,0.53314551%
3,United Kingdom,314160,67886011,0.46277576%
4,Germany,195418,83783942,0.23324040%
5,Australia,7920,25499884,0.03105896%
6,China/Hubei,68135,59270000,0.11495698%
7,US/New York,393454,19450000,2.02289974%


Analysis Part Three - Mortality Rate Analysis

In [33]:
#Read Data
url_dh = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
df_dh = pd.read_csv(url_dh)
url_dh_us = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'
df_dh_us = pd.read_csv(url_dh_us)

In [34]:
#Get Deaths Number of Hubei China

df_dh_hubei = df_dh.drop(columns = ['Lat', 'Long'])
df_dh_hubei = df_dh_hubei.loc[df_dh_hubei['Country/Region']== 'China']
df_dh_hubei = df_dh_hubei.loc[df_dh_hubei['Province/State']== 'Hubei']
df_dh_hubei.replace({'China': 'China/Hubei'}, inplace=True)

df_dh_hubei.drop(df_dh_hubei.columns.difference(['Country/Region','6/30/20']), axis=1, inplace=True)
df_dh_hubei.reset_index(inplace=True, drop=True)
df_dh_hubei = df_dh_hubei.rename(columns={"6/30/20": "Total Deaths Number"})
df_dh_hubei

Unnamed: 0,Country/Region,Total Deaths Number
0,China/Hubei,4512


In [35]:
# Get Deaths Number of New York
df_dh_us.drop(columns = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Combined_Key','Country_Region','Lat', 'Long_'], inplace=True)
df_dh_us = df_dh_us.groupby('Province_State', sort=True).sum()
df_dh_us.reset_index(inplace=True)

df_dh_ny = df_dh_us.loc[df_dh_us['Province_State']== 'New York']
df_dh_ny.drop(df_dh_ny.columns.difference(['Province_State','6/30/20']), axis=1, inplace=True)
df_dh_ny = df_dh_ny.rename(columns={"Province_State":"Country/Region", "6/30/20": "Total Deaths Number"})
df_dh_ny.reset_index(inplace=True, drop=True)
df_dh_ny.replace({'New York': 'US/New York'}, inplace=True)
df_dh_ny

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Country/Region,Total Deaths Number
0,US/New York,32032


In [36]:
# Get Deaths Number of other Country
df_dh.drop(columns = ['Province/State', 'Lat', 'Long'], inplace=True)
df_dh = df_dh.groupby('Country/Region', sort=True).sum()
df_dh.drop(df_dh.columns.difference(['Country/Region','6/30/20']), axis=1, inplace=True)
df_dh.reset_index(inplace=True)
df_dh = df_dh.rename(columns={"6/30/20": "Total Deaths Number"})

In [37]:
df_Japan_dh= df_dh.loc[df_dh['Country/Region']== 'Japan']
df_Italy_dh= df_dh.loc[df_dh['Country/Region']== 'Italy']
df_Spain_dh= df_dh.loc[df_dh['Country/Region']== 'Spain']
df_Germany_dh= df_dh.loc[df_dh['Country/Region']== 'Germany']
df_UK_dh= df_dh.loc[df_dh['Country/Region']== 'United Kingdom']
df_Australia_dh= df_dh.loc[df_dh['Country/Region']== 'Australia']

In [38]:
#Create a New Table
df_table_1 = pd.DataFrame(columns=['Country/Region', 'Total Confirm Number', 'Total Deaths Number', 'Population'])
df_table_1 = df_table_1.append(df_h2)
df_table_1 = df_table_1.append(df_Japan)
df_table_1 = df_table_1.append(df_Italy)
df_table_1 = df_table_1.append(df_Spain)
df_table_1 = df_table_1.append(df_UK)
df_table_1 = df_table_1.append(df_Germany)
df_table_1 = df_table_1.append(df_Australia)
df_table_1 = df_table_1.append(df_n2)
df_table_1.reset_index(drop=True, inplace=True)
df_table_1

Unnamed: 0,Country/Region,Total Confirm Number,Total Deaths Number,Population
0,China/Hubei,68135,,
1,Japan,18615,,
2,Italy,240578,,
3,Spain,249271,,
4,United Kingdom,314160,,
5,Germany,195418,,
6,Australia,7920,,
7,US/New York,393454,,


In [39]:
df_table_1.loc[df.index[0], 'Population'] = 59270000
df_table_1.loc[df.index[1], 'Population'] = 126476461
df_table_1.loc[df.index[2], 'Population'] = 60461826
df_table_1.loc[df.index[3], 'Population'] = 46754778
df_table_1.loc[df.index[4], 'Population'] = 67886011
df_table_1.loc[df.index[5], 'Population'] = 83783942
df_table_1.loc[df.index[6], 'Population'] = 25499884
df_table_1.loc[df.index[7], 'Population'] = 19450000
df_table_1

Unnamed: 0,Country/Region,Total Confirm Number,Total Deaths Number,Population
0,China/Hubei,68135,,59270000
1,Japan,18615,,126476461
2,Italy,240578,,60461826
3,Spain,249271,,46754778
4,United Kingdom,314160,,67886011
5,Germany,195418,,83783942
6,Australia,7920,,25499884
7,US/New York,393454,,19450000


In [40]:
df_table_1.loc[df.index[0], 'Total Deaths Number'] = df_dh_hubei['Total Deaths Number'].values[0]
df_table_1.loc[df.index[1], 'Total Deaths Number'] = df_Japan_dh['Total Deaths Number'].values[0]
df_table_1.loc[df.index[2], 'Total Deaths Number'] = df_Italy_dh['Total Deaths Number'].values[0]
df_table_1.loc[df.index[3], 'Total Deaths Number'] = df_Spain_dh['Total Deaths Number'].values[0]
df_table_1.loc[df.index[4], 'Total Deaths Number'] = df_UK_dh['Total Deaths Number'].values[0]
df_table_1.loc[df.index[5], 'Total Deaths Number'] = df_Germany_dh['Total Deaths Number'].values[0]
df_table_1.loc[df.index[6], 'Total Deaths Number'] = df_Australia_dh['Total Deaths Number'].values[0]
df_table_1.loc[df.index[7], 'Total Deaths Number'] = df_dh_ny['Total Deaths Number'].values[0]
df_table_1

Unnamed: 0,Country/Region,Total Confirm Number,Total Deaths Number,Population
0,China/Hubei,68135,4512,59270000
1,Japan,18615,972,126476461
2,Italy,240578,34767,60461826
3,Spain,249271,28355,46754778
4,United Kingdom,314160,43815,67886011
5,Germany,195418,8990,83783942
6,Australia,7920,104,25499884
7,US/New York,393454,32032,19450000


In [41]:
#Add Calculated Columns and Drop Basic Data

df_table_1['Mortality Rate by Confirmed Number'] = df_table_1['Total Deaths Number']/df_table_1['Total Confirm Number']
df_table_1['Mortality Rate by Population'] = df_table_1['Total Deaths Number']/df_table_1['Population']
df_table_1.drop(columns = ['Population', 'Total Confirm Number'], inplace=True)
df_table_1

Unnamed: 0,Country/Region,Total Deaths Number,Mortality Rate by Confirmed Number,Mortality Rate by Population
0,China/Hubei,4512,0.0662215,7.61262e-05
1,Japan,972,0.052216,7.68522e-06
2,Italy,34767,0.144514,0.000575024
3,Spain,28355,0.113752,0.000606462
4,United Kingdom,43815,0.139467,0.00064542
5,Germany,8990,0.046004,0.0001073
6,Australia,104,0.0131313,4.07845e-06
7,US/New York,32032,0.0814123,0.00164689


In [42]:
# Set CSS properties for th elements in dataframe
th_props = [('font-size', '12px'), ('text-align', 'center'), ('font-weight', 'bold'), ('color', '#000000'), ('background-color', '#F5F5F5')]

# Set CSS properties for td elements in dataframe
td_props = [('font-size', '12px'), ('color', '#000000')]

# Set Style
styles = [dict(selector="th", props=th_props), dict(selector="td", props=td_props)]

cm = sns.light_palette("purple", as_cmap=True)
df_tb3 = df_table_1.style.background_gradient(cmap=cm, subset=['Total Deaths Number', 'Mortality Rate by Confirmed Number', 'Mortality Rate by Population']).format({'Mortality Rate by Confirmed Number': "{:.8%}", 'Mortality Rate by Population': "{:.8%}"}).set_table_styles(styles)
df_tb3

Unnamed: 0,Country/Region,Total Deaths Number,Mortality Rate by Confirmed Number,Mortality Rate by Population
0,China/Hubei,4512,6.62214721%,0.00761262%
1,Japan,972,5.22159549%,0.00076852%
2,Italy,34767,14.45144610%,0.05750240%
3,Spain,28355,11.37517000%,0.06064621%
4,United Kingdom,43815,13.94671505%,0.06454202%
5,Germany,8990,4.60039505%,0.01072998%
6,Australia,104,1.31313131%,0.00040784%
7,US/New York,32032,8.14123125%,0.16468895%
