# Data collecting and saving as CSV files

In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import codecs
import pandas as pd
import datetime

Collecting data of international borders from Wikipedia

In [None]:
WIKI_URL = "https://en.wikipedia.org/wiki/List_of_countries_and_territories_by_land_and_maritime_borders"

In [None]:
#Getting content of table with list of actual international borders
website_url = requests.get(WIKI_URL).text
soup = BeautifulSoup(website_url, 'lxml')
data_table = soup.find('table', {'class':'wikitable sortable'})
rows = data_table.findAll('tr')[3:]

In [None]:
countries = []
for i in rows:
    row = []
    country = i.td.a.string
    if country == None:
        continue
    row.append(country)
    if i.findAll("td")[-1].small != None:
        i.findAll("td")[-1].small.decompose()
    for j in i.findAll("td")[-1].findAll("a"):
        neig = j.string
        if neig != None and neig[0] != '[':
            row.append(neig)
    countries.append(row)

In [None]:
#this code was used to save data about borders to CSV, do not re-run (after first running this data was changed for cohesion)
Saving data as CSV file
with codecs.open("../data/borders_raw.csv", "w", "utf-8-sig") as file:
    writer = csv.writer(file)
    writer.writerows(countries)

Downloading COVID-19 data

In [4]:
COVID_URL = "https://opendata.ecdc.europa.eu/covid19/casedistribution/csv"

In [5]:
r = requests.get(COVID_URL, allow_redirects=True)
open("../data/covid_data.csv", "wb").write(r.content)

1123627

Loading data

In [2]:
cases = pd.read_csv("../data/covid_data.csv")

In [3]:
cases['countriesAndTerritories'] = cases['countriesAndTerritories'].map(str.upper)
cases = cases[['day', 'month', 'year', 'cases', 'deaths', 'countriesAndTerritories']]
cases = cases.set_index('countriesAndTerritories')

In [4]:
#numpy arrays will be used in graph representation in dictionary with country names as keys 
#example of array for Afghanistan
cases.loc['AFGHANISTAN'].to_numpy()

array([[  25,    5, 2020,  584,    2],
       [  24,    5, 2020,  782,   11],
       [  23,    5, 2020,  540,   12],
       [  22,    5, 2020,  531,    6],
       [  21,    5, 2020,  492,    9],
       [  20,    5, 2020,  581,    5],
       [  19,    5, 2020,  408,    4],
       [  18,    5, 2020,  262,    1],
       [  17,    5, 2020,    0,    0],
       [  16,    5, 2020, 1063,   32],
       [  15,    5, 2020,  113,    6],
       [  14,    5, 2020,  259,    3],
       [  13,    5, 2020,  280,    5],
       [  12,    5, 2020,  285,    2],
       [  11,    5, 2020,  369,    5],
       [  10,    5, 2020,  255,    6],
       [   9,    5, 2020,  215,    3],
       [   8,    5, 2020,  171,    2],
       [   7,    5, 2020,  168,    9],
       [   6,    5, 2020,  330,    5],
       [   5,    5, 2020,  190,    5],
       [   4,    5, 2020,  235,   13],
       [   3,    5, 2020,  134,    4],
       [   2,    5, 2020,  164,    4],
       [   1,    5, 2020,  222,    4],
       [  30,    4, 2020,

In [9]:
with open("../data/borders.csv", mode="r", encoding="utf-8-sig") as file:
    borders = {}
    for line in file:
        neighbours = line.replace('\n','').replace(' ', '_').upper().split(',')
        borders[neighbours[0]] = neighbours[1:]
    file.close()

In [11]:
keys = list(borders.keys())

Creating mapping between names of countries in COVID dataset and borders dataset

In [21]:
countries_from_covid_dataset = cases.index.unique().tolist()

In [22]:
for country in countries_from_covid_dataset:
    if country.upper() not in borders.keys():
        print(country)

BERMUDA
BONAIRE, SAINT EUSTATIUS AND SABA
BRUNEI_DARUSSALAM
CASES_ON_AN_INTERNATIONAL_CONVEYANCE_JAPAN
CONGO
COTE_DIVOIRE
CZECHIA
ESWATINI
FALKLAND_ISLANDS_(MALVINAS)
GAMBIA
GUINEA_BISSAU
HOLY_SEE
SAO_TOME_AND_PRINCIPE
TIMOR_LESTE
UNITED_REPUBLIC_OF_TANZANIA
UNITED_STATES_OF_AMERICA


In [24]:
mapping_countries_names = {
    'Bermuda' : 'United_Kingdom',
    'Bonaire, Saint Eustatius and Saba' : 'Netherlands',
    'Brunei_Darussalam' : 'Brunei',
    #'Cases_on_an_international_conveyance_Japan'  this is Diamond Princess - not a country,
    'Congo' : 'Republic_of_the_Congo',
    'Cote_dIvoire' : "Côte_d'Ivoire",
    'Czechia' : 'Czech_Republic',
    'Eswatini' : 'Eswatini_(Swaziland)' ,
    'Falkland_Islands_(Malvinas)' : 'Falkland_Islands',
    'Gambia' : 'The_Gambia',
    'Guinea_Bissau' : 'Guinea-Bissau',
    'Holy_See' : 'Vatican_City',
    'Sao_Tome_and_Principe' : 'São_Tomé_and_Príncipe' ,
    'Timor_Leste' : 'East_Timor',
    'United_Republic_of_Tanzania' : 'Tanzania' ,
    'United_States_of_America' : 'United_States',
}