# Covid Around The World

![Drag Racing](../images/covid.jpeg)

## Install Widgets
ipwidgets is a library that allows you to interact with your data.

In [12]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

## Import Data Analysis Libraries
These are Python libraries specifically designed to manuplate  eg import, join datasets, analyise, filter, and clean data.

In [13]:
import pandas as pd
import numpy as np

## Get Source Data
Data is sourced from a git hub repository for Johns Hopkins University COVID Data. The information is updated daily. The link is here:https://github.com/CSSEGISandData. <br>
<b>Note</b> Data is updated daily.


In [47]:
#Recorded deaths since 1/1/2020
death_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
#Confirmed Cases since 1/1/2020
confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
#Recoveries since C since 1/1/2020
recovered_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
#Summanation of data by Country
country_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv", index_col=[0])

In [48]:
# Check country data (Just type in the name of the dataframe and how many rows from the top(head))
country_df.head(3)

Unnamed: 0_level_0,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Afghanistan,2021-02-06 11:22:36,33.93911,67.709953,55265.0,2407.0,48027.0,4831.0,141.965978,,,4.355379,4,AFG
Albania,2021-02-06 11:22:36,41.1533,20.1683,83082.0,1433.0,50219.0,31430.0,2886.997012,,,1.724802,8,ALB
Algeria,2021-02-06 11:22:36,28.0339,1.6596,108629.0,2909.0,74330.0,31390.0,247.722728,,,2.677922,12,DZA


In [49]:
# Check data has been imported correctly
confirmed_df.head(3)


Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/27/21,1/28/21,1/29/21,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,54854,54891,54939,55008,55023,55059,55121,55174,55231,55265
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,74567,75454,76350,77251,78127,78992,79934,80941,81993,83082
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,106359,106610,106887,107122,107339,107578,107841,108116,108381,108629


In [50]:
#Get the sape of the data
print (confirmed_df.shape)
print (recovered_df.shape)
print (death_df.shape)

(273, 385)
(258, 385)
(273, 385)


## Data Cleansing
Data Cleansing is an important step in analysing data:
Here we change the column headings to lower case and simplify column heading names, and eliminate missing data. (Note normally you would not eliminate missing data without carrying out a missing data analysis)

In [51]:
# Convert to Lowercase
#country_df
country_df.columns = map(str.lower,country_df.columns)
#confirmed_df
confirmed_df.columns = map(str.lower,confirmed_df.columns)
#recovered_df
recovered_df.columns = map(str.lower,recovered_df.columns)
#death_df
death_df.columns = map(str.lower,death_df.columns)


In [52]:
# Check 
# country_df.head(2)
#confirmed_df.head(2)
recovered_df.head(2)
# death_df.head(2)

Unnamed: 0,province/state,country/region,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/27/21,1/28/21,1/29/21,1/30/21,1/31/21,2/1/21,2/2/21,2/3/21,2/4/21,2/5/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,47549,47583,47606,47609,47679,47723,47798,47982,47995,48018
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,45383,45906,46420,46927,47424,47922,48377,48958,49543,50219


In [53]:
# Rename columns to make it easier to refer to them
country_df = country_df.rename(columns={"country_region":"country","long_":"long"})
confirmed_df =confirmed_df.rename(columns = {'province/state':'state','country/region':'country'} )
recovered_df = recovered_df.rename(columns = {'province/state':'state','country/region':'country' } )
death_df =death_df.rename(columns = {'province/state':'state','country/region':'country' } )

In [54]:
# displaying the datatypes and converting to integers
#display(country_df.dtypes) 
  
# converting 'Field_2' and 'Field_3' from float to int 
country_df['confirmed'] = country_df['confirmed'].apply(np.int64) 
country_df['deaths'] = country_df['deaths'].apply(np.int64) 
country_df['recovered'] = country_df['confirmed'].apply(np.int64) 
country_df['active'] = country_df['deaths'].apply(np.int64)   
# displaying the datatypes 
#display(country_df.dtypes) 

In [55]:
# Recheck
country_df.head(2)
# confirmed_df.head(2)
# recovered_df.head(2)
# death_df.head(2)

Unnamed: 0_level_0,last_update,lat,long,confirmed,deaths,recovered,active,incident_rate,people_tested,people_hospitalized,mortality_rate,uid,iso3
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Afghanistan,2021-02-06 11:22:36,33.93911,67.709953,55265,2407,55265,2407,141.965978,,,4.355379,4,AFG
Albania,2021-02-06 11:22:36,41.1533,20.1683,83082,1433,83082,1433,2886.997012,,,1.724802,8,ALB


## Sort Country Data 
Data is sorted in order of the number of confirmed cases. <br>
**Note** we are creating a new dataframe.

In [56]:
sorted_country_df = country_df.sort_values('confirmed', ascending = False)

In [57]:
sorted_country_df.head(10)

Unnamed: 0_level_0,last_update,lat,long,confirmed,deaths,recovered,active,incident_rate,people_tested,people_hospitalized,mortality_rate,uid,iso3
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
US,2021-02-06 11:22:36,40.0,-100.0,26814845,459571,26814845,459571,8138.873804,,,1.713868,840,USA
India,2021-02-06 11:22:36,20.593684,78.96288,10814304,154918,10814304,154918,783.642727,,,1.432529,356,IND
Brazil,2021-02-06 11:22:36,-14.235,-51.9253,9447165,230034,9447165,230034,4444.482154,,,2.434953,76,BRA
United Kingdom,2021-02-06 11:22:36,55.0,-3.0,3922910,111477,3922910,111477,5778.672729,,,2.841691,826,GBR
Russia,2021-02-06 11:22:36,61.524,105.3188,3907653,75010,3907653,75010,2677.676678,,,1.919567,643,RUS
France,2021-02-06 11:22:36,46.2276,2.2137,3355678,78749,3355678,78749,5140.949058,,,2.346739,250,FRA
Spain,2021-02-06 11:22:36,40.463667,-3.74922,2941990,61386,2941990,61386,6292.382963,,,2.086547,724,ESP
Italy,2021-02-06 11:22:36,41.8719,12.5674,2611659,90618,2611659,90618,4319.517101,,,3.469749,380,ITA
Turkey,2021-02-06 11:22:36,38.9637,35.2433,2516889,26577,2516889,26577,2984.250466,,,1.055946,792,TUR
Germany,2021-02-06 11:22:36,51.165691,10.451526,2282684,61338,2282684,61338,2724.488564,,,2.6871,276,DEU


In [58]:
# Create a function that will colour some of the columns for emphasis
def highlight_col(x):
    r = 'background-color: #7ED5EA'
    p = 'background-color: #63BCE5'
    g = 'background-color: #4B9FE1'
    temp_df = pd.DataFrame ('',index = x.index, columns = x.columns)
    temp_df.iloc[:,4]  = p
    temp_df.iloc[:,5]  = r
    temp_df.iloc[:,6]  = g
    return temp_df

In [59]:
# Print the first 5 and style (calling the function)
sorted_country_df.head(5).style.apply(highlight_col, axis = None)

Unnamed: 0_level_0,last_update,lat,long,confirmed,deaths,recovered,active,incident_rate,people_tested,people_hospitalized,mortality_rate,uid,iso3
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
US,2021-02-06 11:22:36,40.0,-100.0,26814845,459571,26814845,459571,8138.873804,,,1.713868,840,USA
India,2021-02-06 11:22:36,20.593684,78.96288,10814304,154918,10814304,154918,783.642727,,,1.432529,356,IND
Brazil,2021-02-06 11:22:36,-14.235,-51.9253,9447165,230034,9447165,230034,4444.482154,,,2.434953,76,BRA
United Kingdom,2021-02-06 11:22:36,55.0,-3.0,3922910,111477,3922910,111477,5778.672729,,,2.841691,826,GBR
Russia,2021-02-06 11:22:36,61.524,105.3188,3907653,75010,3907653,75010,2677.676678,,,1.919567,643,RUS


In [60]:

sorted_country_df.style.format({
    'confirmed': '{:,.2f}'.format,
    'deaths': '{:,.2f}'.format,
    'recovered': '{:,.2%}'.format,
})
#sorted_country_df.head(3)

Unnamed: 0_level_0,last_update,lat,long,confirmed,deaths,recovered,active,incident_rate,people_tested,people_hospitalized,mortality_rate,uid,iso3
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
US,2021-02-06 11:22:36,40.0,-100.0,26814845.0,459571.0,"2,681,484,500.00%",459571,8138.873804,,,1.713868,840,USA
India,2021-02-06 11:22:36,20.593684,78.96288,10814304.0,154918.0,"1,081,430,400.00%",154918,783.642727,,,1.432529,356,IND
Brazil,2021-02-06 11:22:36,-14.235,-51.9253,9447165.0,230034.0,"944,716,500.00%",230034,4444.482154,,,2.434953,76,BRA
United Kingdom,2021-02-06 11:22:36,55.0,-3.0,3922910.0,111477.0,"392,291,000.00%",111477,5778.672729,,,2.841691,826,GBR
Russia,2021-02-06 11:22:36,61.524,105.3188,3907653.0,75010.0,"390,765,300.00%",75010,2677.676678,,,1.919567,643,RUS
France,2021-02-06 11:22:36,46.2276,2.2137,3355678.0,78749.0,"335,567,800.00%",78749,5140.949058,,,2.346739,250,FRA
Spain,2021-02-06 11:22:36,40.463667,-3.74922,2941990.0,61386.0,"294,199,000.00%",61386,6292.382963,,,2.086547,724,ESP
Italy,2021-02-06 11:22:36,41.8719,12.5674,2611659.0,90618.0,"261,165,900.00%",90618,4319.517101,,,3.469749,380,ITA
Turkey,2021-02-06 11:22:36,38.9637,35.2433,2516889.0,26577.0,"251,688,900.00%",26577,2984.250466,,,1.055946,792,TUR
Germany,2021-02-06 11:22:36,51.165691,10.451526,2282684.0,61338.0,"228,268,400.00%",61338,2724.488564,,,2.6871,276,DEU


In [61]:
# import ipywidgets as widgets
# from ipywidgets import interactive
 
# items = ['All']+sorted(sorted_country_df['country'].unique().tolist())
 
def view2(y=3):
    return sorted_country_df.head(y).style.apply(highlight_col, axis = None) 
a_slider = widgets.IntSlider(min=0, max=30, step=1, value=5)

widgets.interact(view2, y= a_slider)                           

interactive(children=(IntSlider(value=5, description='y', max=30), Output()), _dom_classes=('widget-interact',…

<function __main__.view2(y=3)>

## Import Data Visulation Library
There are many Python Graphical libraries that can be used. In this case we are going to use a library called Plotly. It can be installed using Pip Install or Conda install. It can be found @ https://plotly.com/python/ 

In [62]:
import plotly.express as px

In [63]:
fig = px.scatter(sorted_country_df.head(10), x = 'country', y = 'confirmed', size = 'confirmed',
                color = 'country', hover_name = 'country', size_max = 60)

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['last_update', 'lat', 'long', 'confirmed', 'deaths', 'recovered', 'active', 'incident_rate', 'people_tested', 'people_hospitalized', 'mortality_rate', 'uid', 'iso3'] but received: country

In [64]:
fig.show()

In [65]:
import plotly.graph_objects as go
def plot_cases_for_country(country):
    labels = ['confirmed', 'deaths']
    colors = ['blue', 'red']
    mode_size = [6,8]
    line_size = [4,6]
    df_list = [confirmed_df,death_df]
    fig = go.Figure()
    for i, df in enumerate(df_list):
        if country == 'World' or country == 'world':
            x_data = np.array(list(df.iloc[:,5:].columns))
            y_data = np.sum(np.asarray(df.iloc[:,5:]), axis=0)
        else:
            x_data = np.array(list(df.iloc[:,5:].columns))
            y_data = np.sum(np.asarray(df[df['country']==country].iloc[:,5:]), axis=0)
        fig.add_trace(go.Scatter(x=x_data, y= y_data, mode = 'lines+markers',
                                    name=labels[i],
                                    line = dict(color = colors[i], width = line_size[i]),
                                     connectgaps = True,
                                     text = "Total "+ str(labels[i])+ ": "+ str(y_data[-1])
                                               ))
    fig.show()

In [66]:
# plot_cases_for_country('Ireland')
interact(plot_cases_for_country, country ='World');


interactive(children=(Text(value='World', description='country'), Output()), _dom_classes=('widget-interact',)…

## World Maps
Folium is a Python library used for visualizing geospatial data. In other words, Folium is a Python Library that enables you to import maps from around the world and allows you to represent data on them. For this excercise we will be using the Longitude(long) and Latitude(lat) to identify where our data comes from. Again you will have to install Folium using eithe Pip install or Conda install.

In [67]:
import folium
confirmed_df=confirmed_df.dropna(subset=['long'])

confirmed_df=confirmed_df.dropna(subset=['lat'])

In [68]:
world_map = folium.Map(location =[11,0],tiles = 'cartodbpositron', zoom_start = 2, max_zoom = 6, min_zoom = 2)


for i in range(0,len(confirmed_df)):
    folium.Circle(
        location=[confirmed_df.iloc[i]['lat'], confirmed_df.iloc[i]['long']],
        fill=True,
        radius=(int((np.log(confirmed_df.iloc[i,-1]+1.00001)))+0.2)*50000,
        color='red',
        fill_color='indigo',
        tooltip = "<div style='margin: 0; background-color: black; color: white;'>"+
                    "<h4 style='text-align:center;font-weight: bold'>"+confirmed_df.iloc[i]['country'] + "</h4>"
                    "<hr style='margin:10px;color: white;'>"+
                    "<ul style='color: white;;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
                        "<li>Confirmed: "+str(confirmed_df.iloc[i,-1])+"</li>"+
                        "<li>Deaths:   "+str(death_df.iloc[i,-1])+"</li>"+
                        "<li>Death Rate: "+ str(np.round(death_df.iloc[i,-1]/(confirmed_df.iloc[i,-1]+1.00001)*100,2))+ "</li>"+
                    "</ul></div>",
        ).add_to(world_map)

world_map
                             
                             

In [None]:
pd.set_option('display.max_rows', confirmed_df.shape[0]+1)
confirmed_df