# COVID-19 Situation in Spain
I will zoom into Spain in this kernel to visualize how COVID-19 spread in each CCAA.

**Please upvote both kernel and dataset** if you find it useful!


## Libraries

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
py.offline.init_notebook_mode(connected=True)

import geopandas

## Data Load

In [None]:
infected = pd.read_csv('/kaggle/input/covid19-in-spain/ccaa_covid19_casos_long.csv')
uci_beds = pd.read_csv('/kaggle/input/covid19-in-spain/ccaa_camas_uci_2017.csv')
recovered = pd.read_csv('/kaggle/input/covid19-in-spain/ccaa_covid19_altas_long.csv')
death = pd.read_csv('/kaggle/input/covid19-in-spain/ccaa_covid19_fallecidos_long.csv')
hospitalized = pd.read_csv('/kaggle/input/covid19-in-spain/ccaa_covid19_hospitalizados_long.csv')
masks = pd.read_csv('/kaggle/input/covid19-in-spain/ccaa_covid19_mascarillas.csv')
uci = pd.read_csv('/kaggle/input/covid19-in-spain/ccaa_covid19_uci_long.csv')
national = pd.read_csv('/kaggle/input/covid19-in-spain/nacional_covid19.csv')
age_range = pd.read_csv('/kaggle/input/covid19-in-spain/nacional_covid19_rango_edad.csv')
gdf = geopandas.read_file('/kaggle/input/spain-geojson/shapefiles_ccaa_espana.geojson')

Coordinates for mapping each CCAA

In [None]:
locations = {'Andalucía':[37.38,-5.97],
            'Aragón':[41.64,-0.88],
            'Asturias':[43.36,-5.85],
            'Baleares':[39.57,2.65],
            'Canarias':[28.09,-15.41],
            'Cantabria':[43.46,-3.8],
            'Castilla-La Mancha':[38.98,-3.92],
            'Castilla y León':[41.65,-4.77],
            'Cataluña':[41.39,2.17],
            'Ceuta':[35.89,-5.34],
            'C. Valenciana':[39.37,-0.8],
            'Extremadura':[39.71,-6.16],
            'Galicia':[43.12,-8.46],
            'Madrid':[40.49,-3.71],
            'Melilla':[35.29,-2.95],
            'Murcia':[38.00,-1.48],
            'Navarra':[42.66,-1.64],
            'País Vasco':[43.23,-2.85],
            'La Rioja':[42.27,-2.51]}

In [None]:
max_date = infected['fecha'].max()

# General analysis over time

We will see how the virus evolved over time in each CCAA:

In [None]:
map_df = infected[['fecha','CCAA','total']]
map_df = map_df[map_df['CCAA']!= 'Total']

In [None]:
lat = []
long = []
for ca in map_df['CCAA']:
    lat=lat+ [locations[ca][0]]
    long =long +[locations [ca][1]]

In [None]:
map_df['lat']= lat
map_df['long'] = long

In [None]:
map_df['size'] = map_df['total'].pow(0.3)
fig = px.scatter_geo(
    map_df, lat = 'lat', lon = 'long', color='total', color_continuous_scale="portland",size='size',
    animation_frame='fecha', range_color=[0, map_df['total'].max()],projection="mercator",
    center={'lat': 40, 'lon': -3})
fig.update_layout(title = 'COVID-19 Total infected people over time')
fig.update_layout(height=600, margin={"r":0,"t":0,"l":0,"b":0})
fig.layout.geo.projection = go.layout.geo.Projection(scale=7.)
fig.show()

In [None]:
def dateplot(x, y, **kwargs):
    ax = plt.gca()
    data = kwargs.pop("data")
    data.plot(x=x, y=y, ax=ax, grid=False, **kwargs)

In [None]:
infected['fecha'] = pd.to_datetime(infected['fecha'])
hospitalized['fecha'] = pd.to_datetime(hospitalized['fecha'])
uci['fecha'] = pd.to_datetime(uci['fecha'])
recovered['fecha'] = pd.to_datetime(recovered['fecha'])
death['fecha'] = pd.to_datetime(death['fecha'])

As the following variables follow an exponential line,the following plots will be displayed using logarithmic scale in order to analyze the lineal trend.

### Infected over time

In [None]:
infected = infected[infected['CCAA']!= 'Total']
g = sns.FacetGrid(infected, col="CCAA", col_wrap=5, height=3.5)
g = g.map_dataframe(dateplot, "fecha", "total").set(yscale='log')
g = g.map(plt.fill_between, 'fecha', 'total', alpha=0.2).set_titles("{col_name} CCAA")
g = g.set_titles("{col_name}")
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of total infected in CCAA (log scale)')

### UCI over time

In [None]:
uci = uci[uci['CCAA']!= 'Total']
g = sns.FacetGrid(uci, col="CCAA", col_wrap=5, height=3.5)
g = g.map_dataframe(dateplot, "fecha", "total").set(yscale='log')
g = g.map(plt.fill_between, 'fecha', 'total', alpha=0.2).set_titles("{col_name} CCAA")
g = g.set_titles("{col_name}")
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of total UCI patients in CCAA (log scale)')

### Hospitalized over time

In [None]:
hospitalized = hospitalized[hospitalized['CCAA']!= 'Total']
g = sns.FacetGrid(hospitalized, col="CCAA", col_wrap=5, height=3.5)
g = g.map_dataframe(dateplot, "fecha", "total").set(yscale='log')
g = g.map(plt.fill_between, 'fecha', 'total', alpha=0.2).set_titles("{col_name} CCAA")
g = g.set_titles("{col_name}")
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of total hospitalized in CCAA (Log Scale) ')

## Recovered over time

In [None]:
recovered = recovered[recovered['CCAA']!= 'Total']
g = sns.FacetGrid(recovered, col="CCAA", col_wrap=5, height=3.5)
g = g.map_dataframe(dateplot, "fecha", "total").set(yscale='log')
g = g.map(plt.fill_between, 'fecha', 'total', alpha=0.2).set_titles("{col_name} CCAA")
g = g.set_titles("{col_name}")
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of total recovered in CCAA (Log Scale)')

## Deaths over time

In [None]:
death = death[death['CCAA']!= 'Total']
g = sns.FacetGrid(death, col="CCAA", col_wrap=5, height=3.5)
g = g.map_dataframe(dateplot, "fecha", "total").set(yscale='log')
g = g.map(plt.fill_between, 'fecha', 'total', alpha=0.2).set_titles("{col_name} CCAA")
g = g.set_titles("{col_name}")
plt.subplots_adjust(top=0.92)
g = g.fig.suptitle('Evolution of total deaths in CCAA (log scale)')

# Last Report Situation

In [None]:
infected_last = infected[infected['fecha']== max_date]
recovered_last = recovered[recovered['fecha']== max_date]
hospitalized_last = hospitalized[hospitalized['fecha']== max_date]
death_last = death[death['fecha']== max_date]
uci_last = uci[uci['fecha']== max_date]

In [None]:
df_an = pd.DataFrame(data ={'Infected': infected_last['total'].values,
                            'Hospitalized':hospitalized_last['total'].values,
                            'UCI':uci_last['total'].values,
                            'Recovered': recovered_last['total'].values,
                            'Death':death_last['total'].values},
                             index = infected_last['CCAA'])

In [None]:
df_total = df_an[df_an.index=='Total'] 
df_an= df_an[df_an.index!='Total']

In [None]:
d = pd.to_datetime(str(max_date)).strftime('%Y-%m-%d')
title = 'COVID-2019'
chart_title = title + ' as of ' + d
ccaa = df_an.index.to_list()
print('Number of CCAA with confirmed cases = ',len(ccaa))

# Looks lot have hit a limit of Sunburst chart
max_ccaa = df_an.index.unique()
ids = ccaa
labels = ccaa
parents = [title] * len(ccaa)
values = df_an['Infected'].to_list()

classifications = df_an.columns.drop('Infected').values

for cty in ccaa: 
    for c in classifications:
        ids = ids + [cty + '-' + c]
        parents = parents + [cty]
        labels = labels + [c]
        values = values + [df_an.loc[cty][c]]

trace = go.Sunburst(
    ids=ids,
    labels=labels,
    parents=parents,
    values=values,
    outsidetextfont={"size": 20, "color": "#377eb8"},
#     leaf={"opacity": 0.4},
    marker={"line": {"width": 2}}
)

layout = go.Layout(
    title = chart_title + "<br>(click on CCAA)",
    margin = go.layout.Margin(t=100, l=0, r=0, b=0),
    sunburstcolorway=["#636efa","#ef553b","#00cc96"]
)

fig = go.Figure([trace], layout)

py.iplot(fig)

# Patients analysis

As a first aproach, I will compare the effects of COVID-19 in each age group without taking into account gender.

### Preparing the DataFrame

In [None]:
age_range= age_range[age_range['rango_edad']!='Total']
age_range= age_range[age_range['rango_edad']!='80 y +']
no_gender = age_range[age_range['sexo']=='ambos']

In [None]:
g = sns.catplot(x="rango_edad", y="casos_confirmados", hue="sexo", data=no_gender, kind="bar", height=5,aspect=3,palette="muted")
g.despine(left=True)
g.set_ylabels("Total infected")

In [None]:
last = age_range[age_range.iloc[:,0]== age_range.iloc[:,0].max()]

In case dataframe format is wrong:

In [None]:
for i in range(last['ingresos_uci'].shape[0]):
    if last.iloc[i,5] == 'i':
        last.iloc[i,5] = 0
        
last['ingresos_uci']= last['ingresos_uci'].astype(int)

In order to compare between different categories, we should normalize the data:

In [None]:
last['casos_confirmados'] = last['casos_confirmados'] / np.linalg.norm(last['casos_confirmados'])
last['hospitalizados'] = last['hospitalizados'] / np.linalg.norm(last['hospitalizados'])
last['ingresos_uci'] = last['ingresos_uci'] / np.linalg.norm(last['ingresos_uci'])
last['fallecidos'] = last['fallecidos'] / np.linalg.norm(last['fallecidos'])

In [None]:
last_ambos = last[last['sexo']=='ambos']
last_gender = last[last['sexo']!='ambos']

### COVID-19 vs age groups

In [None]:
plt.figure(figsize=(15,5))
plt.plot(last_ambos['rango_edad'], last_ambos['casos_confirmados'],color = 'green',label='Total infected')
plt.plot(last_ambos['rango_edad'], last_ambos['hospitalizados'],color = 'red',label='Hospitalized')
plt.plot( last_ambos['rango_edad'], last_ambos['ingresos_uci'],color = 'yellow',label='UCI')
plt.plot( last_ambos['rango_edad'], last_ambos['fallecidos'],color = 'black',label='Death')
plt.title('COVID-19 vs age groups')
plt.legend()

The results are as expected. virus affects mostly older people. We can see that the death´s curve peak is around 80-89yo.
The most surprising result is the "Total infected" line as the 'peak' goes from 40 to 80 years whick means the disease is present in most of the population.


### COVID-19 vs Gender

In [None]:
plt.figure(figsize= (10,5))
sns.relplot(x='rango_edad',y ='casos_confirmados', hue = 'sexo',kind='line',data = last_gender,height=5,aspect=4)
plt.title('Comparison between men and women: Total infections')

In [None]:
plt.figure(figsize= (10,5))
sns.relplot(x='rango_edad',y ='hospitalizados', hue = 'sexo',kind='line',data = last_gender,height=5,aspect=4)
plt.title('Comparison between men and women: Hospitalized')

In [None]:
plt.figure(figsize= (10,5))
sns.relplot(x='rango_edad',y ='ingresos_uci', hue = 'sexo',kind='line',data = last_gender,height=5,aspect=4)
plt.title('Comparison between men and women: UCI')

In [None]:
plt.figure(figsize= (10,5))
sns.relplot(x='rango_edad',y ='fallecidos', hue = 'sexo',kind='line',data = last_gender,height=5,aspect=4)
plt.title('Comparison between men and women: Death')

As we can see, in general the virus is more dangerous in men than woman altough the total infected people is higher in woman util 60yo.
This can be easily explained with the previous chart as the age ranges in which women are more infected than men are the less dangerous ages.
One the other hand, the are more men than woman infected in the dangerous ages (+60yo) 

---
To be continued...