# Cryptocurrencies and COVID-19

In [1]:
%%capture
# Install the packages from where the data is going to be obtained
!pip install covid19dh
!pip install --upgrade covid19dh
!pip install yfinance

In [2]:
# Import the required libraries
import numpy as np
import pandas as pd
from covid19dh import covid19
import yfinance as yf
import datetime as dt

In [3]:
# Import COVID data
Covid_data, src = covid19(raw = False,verbose = False)

In [4]:
# Check the dataset
Covid_data.head()

Unnamed: 0,id,date,vaccines,tests,confirmed,recovered,deaths,hosp,vent,icu,...,currency,administrative_area_level,administrative_area_level_1,administrative_area_level_2,administrative_area_level_3,latitude,longitude,key,key_apple_mobility,key_google_mobility
0,AFG,2020-01-01,0.0,0,0,0,0,0.0,0,0,...,AFN,1,Afghanistan,,,33.0,65.0,,,AF
1,AFG,2020-01-02,0.0,0,0,0,0,0.0,0,0,...,AFN,1,Afghanistan,,,33.0,65.0,,,AF
2,AFG,2020-01-03,0.0,0,0,0,0,0.0,0,0,...,AFN,1,Afghanistan,,,33.0,65.0,,,AF
3,AFG,2020-01-04,0.0,0,0,0,0,0.0,0,0,...,AFN,1,Afghanistan,,,33.0,65.0,,,AF
4,AFG,2020-01-05,0.0,0,0,0,0,0.0,0,0,...,AFN,1,Afghanistan,,,33.0,65.0,,,AF


In [5]:
# Keep cuntry ID, date, number of cases and number of deaths
Covid_data = Covid_data[['id','date','confirmed','deaths']]

In [6]:
# Check the new dataset
Covid_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 93331 entries, 0 to 93330
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   id         93331 non-null  object        
 1   date       93331 non-null  datetime64[ns]
 2   confirmed  93331 non-null  int64         
 3   deaths     93331 non-null  int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 3.6+ MB


In [7]:
# Obtain the list of countries
Covid_data.id.unique()

array(['AFG', 'AGO', 'ALB', 'AND', 'ARE', 'ARG', 'ARM', 'ASM', 'ATG',
       'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR',
       'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA', 'BRB',
       'BRN', 'BTN', 'BWA', 'CAC', 'CAF', 'CAN', 'CHE', 'CHL', 'CHN',
       'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CUB',
       'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK', 'DOM', 'DPC', 'DZA',
       'ECU', 'EGY', 'ERI', 'ESP', 'EST', 'ETH', 'FIN', 'FJI', 'FRA',
       'GAB', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB', 'GNB', 'GNQ', 'GPC',
       'GRC', 'GRD', 'GTM', 'GUM', 'GUY', 'HND', 'HRV', 'HTI', 'HUN',
       'IDN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM',
       'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KNA', 'KOR', 'KWT',
       'LAO', 'LBN', 'LBR', 'LBY', 'LCA', 'LIE', 'LKA', 'LSO', 'LTU',
       'LUX', 'LVA', 'MAR', 'MCO', 'MDA', 'MDG', 'MDV', 'MEX', 'MHL',
       'MKD', 'MLI', 'MLT', 'MMR', 'MNE', 'MNG', 'MNP', 'MOZ', 'MRT',
       'MSZ', 'MUS',

In [8]:
# Generate a list with the 38 OECD countries
OECD_list = ['AUS','AUT','BEL','CAN','CHL','COL','CRI','CZE','DNK','ESP','EST','FIN','FRA','GBR','DEU','GRC','HUN','IRL','ISL',
            'ISR', 'ITA','JPN','KOR','LVA','LTU','LUX','MEX','NLD','NOR', 'POL', 'NZL','PRT','SVK','SVN','SWE','CHE','TUR','USA']

In [9]:
# Make a mask to keep only OECD countries
mask_OECD = Covid_data['id'].apply(lambda x: any(country in x for country in OECD_list))

In [10]:
# Apply the mask and check that we have only have the desired countries
Covid_OECD = Covid_data[mask_OECD]
Covid_OECD['id'].unique()

array(['AUS', 'AUT', 'BEL', 'CAN', 'CHE', 'CHL', 'COL', 'CRI', 'CZE',
       'DEU', 'DNK', 'ESP', 'EST', 'FIN', 'FRA', 'GBR', 'GRC', 'HUN',
       'IRL', 'ISL', 'ISR', 'ITA', 'JPN', 'KOR', 'LTU', 'LUX', 'LVA',
       'MEX', 'NLD', 'NOR', 'NZL', 'POL', 'PRT', 'SVK', 'SVN', 'SWE',
       'TUR', 'USA'], dtype=object)

In [11]:
# Check the new dataset. We get 469 days of data for 38 countries with no missing values
Covid_OECD.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17822 entries, 4221 to 87702
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   id         17822 non-null  object        
 1   date       17822 non-null  datetime64[ns]
 2   confirmed  17822 non-null  int64         
 3   deaths     17822 non-null  int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 696.2+ KB


In [12]:
# Generate the OECD sum of cases
Covid_OECD_final = Covid_OECD.groupby('date').agg({'confirmed':'sum', 'deaths': 'sum'}).reset_index()
Covid_OECD_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 469 entries, 0 to 468
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       469 non-null    datetime64[ns]
 1   confirmed  469 non-null    int64         
 2   deaths     469 non-null    int64         
dtypes: datetime64[ns](1), int64(2)
memory usage: 11.1 KB


In [13]:
print(f"start date: {Covid_OECD_final['date'].min()} ")
print(f"end date: {Covid_OECD_final['date'].max()}")

start date: 2020-01-01 00:00:00 
end date: 2021-04-13 00:00:00


## Crypto data