# Spain Covid Cases

In [1]:
import datetime
import os
import time
import joblib
import numpy as np
import pandas as pd

# choose font - can be deactivated
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Inconsolata']
# need many figures for index.ipynb and germany.ipynb
rcParams['figure.max_open_warning'] = 50

import matplotlib.pyplot as plt
plt.style.use('ggplot')

# suppress warning
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

LW = 3   # line width

# set up joblib memory to avoid re-fetching files
joblib_location = "./cachedir"
joblib_memory = joblib.Memory(joblib_location, verbose=0)

%config InlineBackend.figure_formats = ['svg']
%matplotlib inline
# Alternative plotting backend for interative data exploration
# %matplotlib notebook

from coronavirus import overview, fetch_data_germany, germany_get_region

# If you want to edit the source in the notebook, try "%load coronavirus.py" 
# and comment out the import statement above.

In [10]:
def clear_cache():
    """Need to run this before new data for the day is created"""
    joblib_memory.clear()

In [None]:
#spanish_cases_web = "https://covid19.isciii.es/resources/serie_historica_acumulados.csv"
#cases_web = pd.read_csv(spanish_cases_web, encoding="ISO-8859-1", engine="python", skipfooter=4)

In [3]:
spanish_regions = ["Melilla", "Islas Baleares", "Navarra", "Asturias", "Extremadura", "Canarias",
                   "Cataluña", "País Vasco", "Castilla y León", "Com. Valenciana", "Castilla-La Mancha",
                   "Aragón", "Madrid", "Galicia", "Ceuta", "La Rioja", "Murcia", "Cantabria",
                   "Andalucía"]

In [None]:
#acronyms = list(set(cases_web["CCAA"]))
#regions = dict(zip(acronyms, spanish_regions))

In [4]:
def rename_columns(spanish_data):
    """Rename columns for non-spanish speakers.    
    """
    return spanish_data.rename(columns={'CCAA': 'Admin. region code',
                                        'FECHA': 'Date',
                                        'CASOS': 'Cases',
                                        'Hospitalizados': 'Hospitalized',
                                        'UCI': 'ICU',
                                        'Fallecidos': 'Deceases',
                                        'Recuperados': 'Recovered'}, inplace=True)

In [11]:
#@joblib_memory.cache
def fetch_data_germany_last_execution():
    return datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S")

#@joblib_memory.cache
def fetch_data_spain():
    """Data source is https://covid19.isciii.es. The text on the webpage implies that 
    the data comes from the Minitry of Health. """

    datasource = "https://covid19.isciii.es/resources/serie_historica_acumulados.csv"
    t0 = time.time()
    print(f"Please be patient - downloading data from {datasource} ...")
    spain = pd.read_csv(datasource, encoding="ISO-8859-1", engine="python", skipfooter=4)
    rename_columns(spain)
    delta_t = time.time() - t0
    print(f"Completed downloading {len(spain)} rows in {delta_t:.1f} seconds.")

    g2 = spain.set_index(pd.to_datetime(spain['Date']))
    g2.drop(columns=['Date'],inplace=True)
    g2.index.name = 'date'
    last_day = g2.index.max()
    sel = g2.index == last_day
    cleaned = g2.drop(g2[sel].index, inplace=False)
    fetch_data_germany_last_execution()
    return cleaned

In [12]:
spain = fetch_data_spain()
spain

Please be patient - downloading data from https://covid19.isciii.es/resources/serie_historica_acumulados.csv ...
Completed downloading 1045 rows in 0.3 seconds.


Unnamed: 0_level_0,Admin. region code,Cases,Hospitalized,ICU,Deceases,Recovered
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-02-20,AN,,,,,
2020-02-20,AR,,,,,
2020-02-20,AS,,,,,
2020-02-20,IB,1.0,,,,
2020-02-20,CN,1.0,,,,
...,...,...,...,...,...,...
2020-04-14,ML,102.0,43.0,3.0,2.0,24.0
2020-04-14,MC,1520.0,574.0,97.0,109.0,513.0
2020-04-14,NC,4246.0,1731.0,124.0,252.0,808.0
2020-04-14,PV,11475.0,5750.0,471.0,902.0,5428.0


In [13]:
def map_regions(spanish_data):
    acronyms = list(set(spanish_data['Admin. region code']))
    regions = dict(zip(acronyms, spanish_regions))
    spanish_data['Admin. region'] = spanish_data['Admin. region code'].map(regions)
    #print(spanish_data.head())
    spanish_data.drop(columns=['Admin. region code'], inplace=True)
    return spanish_data

In [14]:
map_regions(spain)

Unnamed: 0_level_0,Cases,Hospitalized,ICU,Deceases,Recovered,Admin. region
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-02-20,,,,,,Cantabria
2020-02-20,,,,,,Castilla y León
2020-02-20,,,,,,Castilla-La Mancha
2020-02-20,1.0,,,,,Com. Valenciana
2020-02-20,1.0,,,,,Melilla
...,...,...,...,...,...,...
2020-04-14,102.0,43.0,3.0,2.0,24.0,Aragón
2020-04-14,1520.0,574.0,97.0,109.0,513.0,Navarra
2020-04-14,4246.0,1731.0,124.0,252.0,808.0,Murcia
2020-04-14,11475.0,5750.0,471.0,902.0,5428.0,País Vasco


In [None]:
def spain_get_region(adm_region=None):
    spain = fetch_data_spain()
    """Returns two time series: (cases, deaths)"""
    assert adm_region, "Need to provide a value for the administrative region"
    
    if adm_region:
        assert state in germany['Bundesland'].values, \
            f"{state} not in available German states. These are {sorted(germany['Bundesland'].drop_duplicates())}"

        land = germany[germany['Bundesland'] == state]
        land = land.set_index(pd.to_datetime(land['Meldedatum']))
        land.index.name = 'date'
        land.sort_index(inplace=True)

        # group over multiple rows for the same date
        # (this will also group over the different landkreise in the state)
        cases = land["AnzahlFall"].groupby('date').agg('sum').cumsum()
        cases.country = f'Germany-{state}'
        cases.label = 'cases'

        # group over all multiple entries per day
        deaths = land["AnzahlTodesfall"].groupby('date').agg('sum').cumsum()
        deaths.country = f'Germany-{state}'
        deaths.label = 'deaths'

        return cases, deaths

    if landkreis:
        assert landkreis in germany['Landkreis'].values, \
            f"{state} not in available German states. These are {sorted(germany['Landkreis'].drop_duplicates())}"

        lk = germany[germany["Landkreis"] == landkreis]
        lk.index = pd.to_datetime(lk['Meldedatum'])
        lk.index.name = 'date'
        lk = lk.sort_index()

        cases = lk["AnzahlFall"].groupby('date').agg('sum').cumsum()
        cases.country = f'Germany-{landkreis}'
        cases.label = 'cases'

        deaths = lk["AnzahlTodesfall"].groupby('date').agg('sum').cumsum()
        deaths.country = f'Germany-{landkreis}'
        deaths.label = 'deaths'

        return cases, deaths

In [None]:
overview("Spain");

In [None]:
# Convert dates to datetime objects
cases['Date'] = pd.to_datetime(cases.Date, dayfirst=True)

In [None]:
# Choose a region, e.g. Andalucia == AN
cases_Andalucia = cases[cases["ISO code adm. region"] == "AN"]

In [None]:
deaths_Andalucia = cases_Andalucia["Deceases"].values

# Apparently this is the cumulative sum

In [None]:
%matplotlib notebook

fig, ax = plt.subplots()
ax.plot(deaths_Andalucia)
ax.set_ylabel("Number of deaths")
ax.set_xlabel("Days")
plt.tight_layout();

In [None]:
cases_Andalucia_daily = cases_Andalucia.sort_values("Date").groupby(["Date"])["Deceases"].sum()

In [None]:
%matplotlib notebook

fig, ax = plt.subplots()
ax.plot(cases_Andalucia_daily, 'o')
# to make the tick on x plot more sparse
#plt.xticks(range(cases_Andalucia_daily.shape[0])[::3],
#           [str(x) for x in cases_Andalucia_daily.index.tolist()][::3],
#           rotation = 45)
ax.xaxis.set_tick_params(rotation=45)
fig.tight_layout();

In [None]:
cases_Andalucia_daily

In [None]:
counties_hamburg_cases = counties_hamburg["cases"].values[0]
cases_hamburg_sum = sum(cases_hamburg["AnzahlFall"])

if counties_hamburg_cases != cases_hamburg_sum:
    print("Warning, inconsistent case numbers!")
    print(f"Reported total cases in Hamburg {counties_hamburg_cases}, sum of cases {cases_hamburg_sum}")