# Examing Data from COVID-19 Outbreak

#### Import Libraries

In [1]:
import pandas as pd
from datetime import date, timedelta
import xlrd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

#### Import Latest XLS from ECDC website

In [4]:
today = date(2020, 3, 24)

In [5]:
day = today.strftime("%d")
month = today.strftime("%m")

In [6]:
url  = "https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-{}-{}.xlsx".format(month, day)

In [7]:
url

'https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-03-24.xlsx'

#### Load into DataFrame

In [8]:
df = pd.read_excel(url)

In [9]:
df

Unnamed: 0,DateRep,Day,Month,Year,Cases,Deaths,Countries and territories,GeoId
0,2020-03-24,24,3,2020,6,1,Afghanistan,AF
1,2020-03-23,23,3,2020,10,0,Afghanistan,AF
2,2020-03-22,22,3,2020,0,0,Afghanistan,AF
3,2020-03-21,21,3,2020,2,0,Afghanistan,AF
4,2020-03-20,20,3,2020,0,0,Afghanistan,AF
...,...,...,...,...,...,...,...,...
6546,2020-03-19,19,3,2020,2,0,Zambia,ZM
6547,2020-03-24,24,3,2020,0,1,Zimbabwe,ZW
6548,2020-03-23,23,3,2020,0,0,Zimbabwe,ZW
6549,2020-03-22,22,3,2020,1,0,Zimbabwe,ZW


#### Fix Date format from Excel to DateTime

In [10]:
#def read_date(date):
#    return xlrd.xldate.xldate_as_datetime(date, 0)

In [11]:
#df['DateRep'] = pd.to_datetime(df['DateRep'].apply(read_date), errors='coerce')

In [12]:
df

Unnamed: 0,DateRep,Day,Month,Year,Cases,Deaths,Countries and territories,GeoId
0,2020-03-24,24,3,2020,6,1,Afghanistan,AF
1,2020-03-23,23,3,2020,10,0,Afghanistan,AF
2,2020-03-22,22,3,2020,0,0,Afghanistan,AF
3,2020-03-21,21,3,2020,2,0,Afghanistan,AF
4,2020-03-20,20,3,2020,0,0,Afghanistan,AF
...,...,...,...,...,...,...,...,...
6546,2020-03-19,19,3,2020,2,0,Zambia,ZM
6547,2020-03-24,24,3,2020,0,1,Zimbabwe,ZW
6548,2020-03-23,23,3,2020,0,0,Zimbabwe,ZW
6549,2020-03-22,22,3,2020,1,0,Zimbabwe,ZW


#### Set Date as Index

In [13]:
df.set_index('DateRep', inplace=True)
df = df.sort_index()

In [14]:
df

Unnamed: 0_level_0,Day,Month,Year,Cases,Deaths,Countries and territories,GeoId
DateRep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-12-31,31,12,2019,0,0,Brazil,BR
2019-12-31,31,12,2019,0,0,Canada,CA
2019-12-31,31,12,2019,0,0,New_Zealand,NZ
2019-12-31,31,12,2019,0,0,Kuwait,KW
2019-12-31,31,12,2019,0,0,Japan,JP
...,...,...,...,...,...,...,...
2020-03-24,24,3,2020,20,1,Hungary,HU
2020-03-24,24,3,2020,20,1,Iceland,IS
2020-03-24,24,3,2020,53,2,India,IN
2020-03-24,24,3,2020,1,0,Guyana,GY


#### Fix Inconsistent Capitalisation of Country Names

In [None]:
countries = df['Countries and territories'].values
np.unique(countries)

In [None]:
df.replace(['United kingdom', 'Czech republic', 'switzerland', 'CANADA'], ['United Kingdom', 'Czech Republic', 'Switzerland', 'Canada'], inplace=True)

In [None]:
countries = df['Countries and territories'].values
np.unique(countries)

#### Examine Global Numbers

In [None]:
df_global = df[['Cases', 'Deaths']].groupby(df.index).sum()

In [None]:
df_global.head()

In [None]:
df_global['Total_Cases'] = df_global['Cases'].cumsum()
df_global['Total_Deaths'] = df_global['Deaths'].cumsum()

In [None]:
df_global

#### Total Confirmed Cases and Deaths Globally

In [None]:
df_global.Cases.sum()

In [None]:
df_global.Deaths.sum()

In [None]:
#plot data
fig, ax = plt.subplots(figsize=(15,7))
ax.plot(df_global.index, df_global['Total_Cases'])
ax.plot(df_global.index, df_global['Total_Deaths'])

#set ticks every week
ax.xaxis.set_major_locator(mdates.WeekdayLocator())
#set major ticks format
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))

# Turn on the grid
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')
# Customize the minor grid
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='red')
ax.set_ylabel('No. of People')

plt.title("Total No. of Confirmed Cases and Deaths Globally")

plt.legend(['Total Confirmed Cases', 'Total Deaths'])

plt.savefig('Images/' + str(today) + ' Total No. of Confirmed Cases and Deaths Globally.png')

plt.show()

#### Daily New Confirmed Cases and Deaths Globally

In [None]:
#plot data
fig, ax = plt.subplots(figsize=(15,7))
ax.bar(df_global.index, df_global['Cases'])
ax.bar(df_global.index, df_global['Deaths'])

#set ticks every week
ax.xaxis.set_major_locator(mdates.WeekdayLocator())
#set major ticks format
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))

# Turn on the grid
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')
# Customize the minor grid
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='red')
ax.set_ylabel('No. of People')

plt.title("Daily No. of New Confirmed Cases and Deaths Globally")

plt.legend(['New Confirmed Cases', 'New Deaths'])

plt.savefig('Images/' + str(today) + ' Daily No. of New Confirmed Cases and Deaths Globally.png')

plt.show()

#### Top 20 Countries with the Most Confirmed Cases and Deaths

In [None]:
df_countries = df.set_index('Countries and territories')

In [None]:
df_countries = df_countries.groupby('Countries and territories').sum()

In [None]:
df_countries = df_countries.sort_values('Cases', ascending=True)

In [None]:
#plot data
labels = df_countries.index[-20:]
Cases = df_countries['Cases'][-20:]
Deaths = df_countries['Deaths'][-20:]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots(figsize=(15, 7))
rects1 = ax.bar(x - width/2, Cases, width, label='New Confirmed Cases')
rects2 = ax.bar(x + width/2, Deaths, width, label='New Deaths')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('No. of People')
ax.set_title('Top 20 Countries with the Most Confirmed Cases and Deaths')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=90)

ax.minorticks_on()
ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')
ax.grid(which='minor', linestyle=':', linewidth='0.5', color='red')
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.savefig('Images/' + str(today) + ' Top 20 Countries with the Most Confirmed Cases and Deaths.png')

plt.show()

  #### Find Latest Data from Yesterday

In [None]:
yesterday = today - timedelta(days = 1)

In [None]:
df_yesterday = df.loc[yesterday]
df_yesterday.set_index('Countries and territories', inplace=True)

In [None]:
df_yesterday = df_yesterday.sort_values('Cases', ascending=True)

In [None]:
df_yesterday

#### Total No. of Confirmed Cases and Deaths Yesterday Globally

In [None]:
df_yesterday.Cases.sum()

In [None]:
df_yesterday.Deaths.sum()

In [None]:
#plot data
labels = ['Yesterday']
Cases = df_yesterday.Cases.sum()
Deaths = df_yesterday.Deaths.sum()

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots(figsize=(7, 7))
rects1 = ax.bar(x - width/2, Cases, width, label='Total New Confirmed Cases')
rects2 = ax.bar(x + width/2, Deaths, width, label='Total New Deaths')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('No. of People')
ax.set_title('Total No. of Confirmed Cases and Deaths Globally Yesterday (' + str(yesterday) + ')')
ax.set_xticks(x)
ax.set_xticklabels(labels)

ax.minorticks_on()
ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')
ax.grid(which='minor', linestyle=':', linewidth='0.5', color='red')
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.savefig('Images/' + str(today) + ' Total No. of Confirmed Cases and Deaths Globally Yesterday (' + str(yesterday) + ').png')

plt.show()

#### Top 20 Countries with the Most New Confirmed Cases and Deaths Yesterday

In [None]:
#plot data
labels = df_yesterday.index[-20:]
Cases = df_yesterday['Cases'][-20:]
Deaths = df_yesterday['Deaths'][-20:]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots(figsize=(15, 7))
rects1 = ax.bar(x - width/2, Cases, width, label='New Confirmed Cases')
rects2 = ax.bar(x + width/2, Deaths, width, label='New Deaths')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('No. of People')
ax.set_title('Top 20 Countries with the Most New Confirmed Cases and Deaths Yesterday (' + str(yesterday) + ')')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=90)

ax.minorticks_on()
ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')
ax.grid(which='minor', linestyle=':', linewidth='0.5', color='red')
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.savefig('Images/' + str(today) + ' Top 20 Countries with the Most New Confirmed Cases and Deaths Yesterday ('
            + str(yesterday) + ').png')

plt.show()

#### Find Data for Specific Countries (e.g. DE, UK, CN) and Compare

(Can also be EU-wide too by using ```df.loc[df['EU'] == "EU"]``` )

In [None]:
df_sub1 = df.loc[df['GeoId'] == "UK"]
df_sub2 = df.loc[df['GeoId'] == "US"]

#### Daily No. of New Confirmed Cases and Deaths in X & Y

In [None]:
#plot data
fig, axs = plt.subplots(2, 1, figsize=(15, 10))
axs[0].bar(df_sub1.index, df_sub1['Cases'])
axs[0].bar(df_sub1.index, df_sub1['Deaths'])

#Set 
axs[1].bar(df_sub2.index, df_sub2['Cases'])
axs[1].bar(df_sub2.index, df_sub2['Deaths'])

#set ticks every week
axs[0].xaxis.set_major_locator(mdates.WeekdayLocator())
axs[0].xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
axs[1].xaxis.set_major_locator(mdates.WeekdayLocator())
axs[1].xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))

#Gridlines
axs[0].minorticks_on()
axs[0].grid(which='major', linestyle='-', linewidth='0.5', color='black')
axs[0].grid(which='minor', linestyle=':', linewidth='0.5', color='red')
axs[0].set_ylabel('No. of People')

axs[1].minorticks_on()
axs[1].grid(which='major', linestyle='-', linewidth='0.5', color='black')
axs[1].grid(which='minor', linestyle=':', linewidth='0.5', color='red')
axs[1].set_ylabel('No. of People')


#Titles
title1 = df_sub1['Countries and territories'].unique()[0]
title2 = df_sub2['Countries and territories'].unique()[0]
axs[0].title.set_text("Daily No. of New Confirmed Cases and Deaths in " + title1)
axs[1].title.set_text("Daily No. of New Confirmed Cases and Deaths in " + title2)

#Legends
axs[0].legend(['New Confirmed Cases', 'New Deaths'], loc=2)
axs[1].legend(['New Confirmed Cases', 'New Deaths'], loc=2)

plt.savefig('Images/' + str(today) + ' Daily No. of New Confirmed Cases and Deaths in ' + title1 + ' & ' + title2 + '.png')

plt.show()

#### Total No. of Confirmed Cases and Deaths in X & Y

In [None]:
df_sub1['Total_Cases'] = df_sub1.loc[:,'Cases'].cumsum()
df_sub1['Total_Deaths'] = df_sub1.loc[:,'Deaths'].cumsum()

df_sub2['Total_Cases'] = df_sub2.loc[:,'Cases'].cumsum()
df_sub2['Total_Deaths'] = df_sub2.loc[:,'Deaths'].cumsum()


In [None]:
#plot data
fig, axs = plt.subplots(2, 1, figsize=(15, 10))
axs[0].plot(df_sub1.index, df_sub1['Total_Cases'])
axs[0].plot(df_sub1.index, df_sub1['Total_Deaths'])

#Set 
axs[1].plot(df_sub2.index, df_sub2['Total_Cases'])
axs[1].plot(df_sub2.index, df_sub2['Total_Deaths'])

#set ticks every week
axs[0].xaxis.set_major_locator(mdates.WeekdayLocator())
axs[0].xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
axs[1].xaxis.set_major_locator(mdates.WeekdayLocator())
axs[1].xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))

#Gridlines
axs[0].minorticks_on()
axs[0].grid(which='major', linestyle='-', linewidth='0.5', color='black')
axs[0].grid(which='minor', linestyle=':', linewidth='0.5', color='red')
axs[0].set_ylabel('No. of People')

axs[1].minorticks_on()
axs[1].grid(which='major', linestyle='-', linewidth='0.5', color='black')
axs[1].grid(which='minor', linestyle=':', linewidth='0.5', color='red')
axs[1].set_ylabel('No. of People')


#Titles
title1 = df_sub1['Countries and territories'].unique()[0]
title2 = df_sub2['Countries and territories'].unique()[0]
axs[0].title.set_text("Total No. of Confirmed Cases and Deaths in " + title1)
axs[1].title.set_text("Total No. of Confirmed Cases and Deaths in " + title2)

#Legends
axs[0].legend(['Total Confirmed Cases', 'Total Deaths'], loc=2)
axs[1].legend(['Total Confirmed Cases', 'Total Deaths'], loc=2)

plt.savefig('Images/'+str(today) + ' Total No. of Confirmed Cases and Deaths in ' + title1 + ' & ' + title2 + '.png')

plt.show()