# COVID Data Visualization
## Source: https://github.com/owid/covid-19-data/tree/master/public/data

In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [18, 11]
import plotly.express as px
from plotly.subplots import make_subplots

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv',parse_dates=['date'])

In [3]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,


In [4]:
df.describe()

Unnamed: 0,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
count,114508.0,114505.0,113490.0,103733.0,103888.0,113490.0,113905.0,113902.0,112892.0,103143.0,...,83345.0,82135.0,53636.0,97058.0,114378.0,106842.0,4129.0,4129.0,4275.0,4129.0
mean,1422928.0,6460.530387,6478.195278,36144.06,145.552374,132.53503,17599.630363,83.248566,83.243541,368.820669,...,10.592976,32.7373,50.741617,3.023784,73.259529,0.726512,28797.991911,8.189964,15.036756,701.842569
std,9531309.0,39383.642226,38973.282855,215404.7,793.439125,741.948274,30051.437448,193.749834,163.764583,625.768309,...,10.505467,13.504572,31.753267,2.45328,7.541998,0.150224,83446.267382,16.122688,31.190051,1097.894609
min,1.0,-74347.0,-6223.0,1.0,-1918.0,-232.143,0.001,-3125.829,-272.971,0.001,...,0.1,7.7,1.188,0.1,53.28,0.394,-31959.4,-27.35,-95.92,-1728.844404
25%,1962.0,3.0,9.143,67.0,0.0,0.0,341.505,0.279,1.537,10.179,...,1.9,21.6,19.351,1.3,67.92,0.602,-292.6,-1.6,-1.555,-65.792976
50%,20280.5,92.0,114.143,599.0,2.0,1.714,2672.154,10.6155,14.545,67.531,...,6.3,31.4,49.839,2.4,74.62,0.744,1279.2,4.52,5.64,271.653432
75%,210404.5,922.0,972.96425,5109.0,19.0,15.714,21107.351,81.031,90.47875,444.398,...,19.3,41.1,82.502,3.861,78.74,0.848,17379.8,13.17,21.185,1194.68616
max,234286500.0,905948.0,826439.429,4791862.0,17977.0,14722.857,217440.097,8620.69,3385.473,5978.013,...,44.0,78.1,100.0,13.8,86.75,0.957,675683.4,106.85,372.82,6084.213407


In [23]:
death_country = df[['new_deaths','location']]\
                    .drop(df[df.location == 'World'].index)\
                    .groupby('location')\
                    .sum()\
                    .sort_values('new_deaths',ascending=False)
# death_country = death_country[(death_country['location'] != 'World')]
death_country = death_country
fig = px.bar(death_country.reset_index(), x="new_deaths", y="location",
             color='location', 
             hover_data=["new_deaths", "location"],
             title=f'Deaths due to covid from {df.date.min().date()} to {df.date.max().date()}',
             labels={'new_deaths':'Total Deaths','location':'Country'},
             orientation='h')
fig.show()

In [26]:
df_2020 = df[(df['date'] > '2020-01-01') & (df['date'] < '2020-12-31')]
df_2020_italy = df_2020[df_2020['location'] == 'Italy']
df_2020_italy.head()
subfig = make_subplots(specs=[[{"secondary_y": True}]])

# create two independent figures with px.line each containing data from multiple columns
fig = px.line(df_2020_italy,x="date", y="new_cases", render_mode="webgl",)
fig2 = px.line(df_2020_italy,x="date", y="total_cases", render_mode="webgl",)

fig2.update_traces(yaxis="y2")

subfig.add_traces(fig.data + fig2.data)
subfig.layout.xaxis.title="Date"
subfig.layout.yaxis.title="New cases"
# subfig.layout.yaxis2.type="log"
subfig.layout.yaxis2.title="Total Cases"
subfig.layout.title ='Daily Covid Cases (New & Cummulative) in Italy'
# recoloring is necessary otherwise lines from fig und fig2 would share each color
# e.g. Linear-, Log- = blue; Linear+, Log+ = red... we don't want this
subfig.for_each_trace(lambda t: t.update(line=dict(color=t.marker.color)))
subfig.show()

In [32]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
def filter_by_date(start_date,end_date):
    df_filtered = df[(df['date'] >= str(start_date)) & (df['date'] <= str(end_date))]
    df_filtered.to_csv('data/filtered_date.csv')
#     return df_filtered

def filter_by_country(columns):
#     df_date_filtered = pd.read_csv('data/filtered_date.csv',parse_dates=['date'])
    df_location_filtered = df_date_filtered[(df_date_filtered['location'].isin(columns))]
    df_location_filtered.to_csv('data/filtered_location.csv')
#     return df_location_filtered
    
interact(filter_by_date,
        start_date=widgets.DatePicker(value=df.date.min().date(),description='From'),
        end_date=widgets.DatePicker(value=df.date.max().date(),description='To'))

df_date_filtered = pd.read_csv('data/filtered_date.csv',parse_dates=['date'])
interact(filter_by_country,
        columns = widgets.SelectMultiple(
            options=df_date_filtered['location'].unique(),
            #rows=10,
            description='Country',
            disabled=False))

interactive(children=(DatePicker(value=datetime.date(2020, 1, 1), description='From'), DatePicker(value=dateti…

interactive(children=(SelectMultiple(description='Country', options=('Afghanistan', 'Africa', 'Albania', 'Alge…

<function __main__.filter_by_country(columns)>

In [63]:
df_filtered = pd.read_csv('data/filtered_location.csv',parse_dates=['date'])
df_filtered = df_filtered.loc[:,~df_filtered.columns.str.match("Unnamed")]


In [64]:
fig = px.bar(df_filtered.resample('W', on='date').agg({'new_cases': np.sum, 'positive_rate': np.mean}).reset_index(), 
             x='date', y='new_cases', color='positive_rate',
             labels={'new_cases':'Daily new cases','date':'Date','positive_rate':'Test Positivity Rate'}, 
             title=f'Weekly new cases + Test positivity Rate in {df_filtered.location.unique()[0]}')
fig.show()