# COVID-19 Visualizations

This notebook provides some basic visuals on the COVID-19 coronavirus. Data is from the European Centre for Disease Prevention and Control.

# Table of Contents
- [Import Modules](#import)
- [Setup](#setup)
- [Scrape & Download](#scrape)
- [Load Data](#load)

# Import Modules <div id='import'/>

In [2]:
import pandas as pd
import numpy as np
from multiprocessing import Pool
import os
import configparser

# Setup <div id='setup'/>

In [3]:
# Bind directory references
REPO_DIR  = "/repos/covid19"
DATA_DIR  = REPO_DIR + "/data"
SOURCE_DIR = REPO_DIR + "/source"
CONFIG_DIR = REPO_DIR + "/config"

# import functions from helpers.py
os.chdir(SOURCE_DIR)
from helpers import dir_df, from_url, scrape_link
os.chdir(REPO_DIR)

# dataframe display setup
pd.set_option('display.max_columns', None)

# parse config file to get url of ECDC website (having a config file is overkill for this but it is what it is)
config = configparser.ConfigParser()
config.read(CONFIG_DIR+"/covid19.cfg")
url = config['specs']['Webpage']

# Scrape & Download <div id='scrape'/>

In [8]:
url_down = scrape_link(url, ".xls")
assert len(url_down)==1, "Scrape returned links to more than one .xls file"
url_down = url_down[0]
from_url(url=url_down,dest=DATA_DIR+"/2020_03_15_covid19.xls")

'File could not be downloaded. Check URL'

In [11]:
import wget
wget.download(url_down,DATA_DIR+"/2020_03_15_covid19.xls")

AttributeError: 'list' object has no attribute 'decode'

# Load Data <div id='load'/>

In [155]:
# Build dataframe of all data files available - usually 1 per day

file_df = dir_df(DATA_DIR)
file_df['FILE_DATE'] = file_df.FILE_NAME.str[0:10]
file_df.sort_values(by='FILE_DATE', ascending=False)

# Read top file & rename columns (lord knows what Gaul1Nuts1 means
df = pd.read_excel(file_df['FILE_PATH'][0], 
                   names=["DATE_REP","COUNTRY_NAME",
                          "NEW_CASES","NEW_DEATHS",
                          "COUNTRY_ABR","GAUL1NUTS1","EU"])
# Data cleanup
df['AS_OF_DATE'] = pd.to_datetime(file_df['FILE_DATE'][0],
                                  format="%Y_%m_%d")

In [70]:
file_df

Unnamed: 0,FILE_NAME,FILE_PATH,FILE_SIZE,FILE_DATE
0,2020_03_13_covid19.xls,/repos/covid19/data/2020_03_13_covid19.xls,577536,2020_03_13
1,2020_03_12_covid19.xls,/repos/covid19/data/2020_03_12_covid19.xls,567808,2020_03_12


In [69]:
# Let's view our dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4905 entries, 0 to 4904
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   DATE_REP      4905 non-null   datetime64[ns]
 1   COUNTRY_NAME  4905 non-null   object        
 2   NEW_CASES     4905 non-null   int64         
 3   NEW_DEATHS    4905 non-null   int64         
 4   COUNTRY_ABR   4905 non-null   object        
 5   GAUL1NUTS1    485 non-null    object        
 6   EU            4905 non-null   object        
 7   AS_OF_DATE    4905 non-null   datetime64[ns]
dtypes: datetime64[ns](2), int64(2), object(4)
memory usage: 306.7+ KB


In [27]:
# Let's check out the top 5 rows of our dataframe
df.head()

Unnamed: 0,DATE_REP,COUNTRY_NAME,NEW_CASES,NEW_DEATHS,COUNTRY_ABR,GAUL1NUTS1,EU,AS_OF_DATE
0,2020-03-11,Afghanistan,3,0,AF,,Non-EU/EEA,2020-03-13
1,2020-03-08,Afghanistan,3,0,AF,,Non-EU/EEA,2020-03-13
2,2020-03-02,Afghanistan,0,0,AF,,Non-EU/EEA,2020-03-13
3,2020-03-01,Afghanistan,0,0,AF,,Non-EU/EEA,2020-03-13
4,2020-02-29,Afghanistan,0,0,AF,,Non-EU/EEA,2020-03-13


In [87]:
test = DATA_DIR+"/"+"2020_03_13_covid19.xls"
os.path.dirname(test)
os.path.exists(test)

True

In [106]:
test_url = "https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-ssssss2020-03-14_1.xls"
file = DATA_DIR + "/test/test.xls"
from_url(url = test_url, dest = file, overwrite = True,make_dir = True)

'File could not be downloaded. Check URL'

In [161]:
os.getcwd()

'/repos/covid19'

In [195]:
config = configparser.ConfigParser()
config.read(CONFIG_DIR+"/covid19.cfg")
url = config['specs']['Webpage']