# Master BigData UCM 2023

## 1. Datos COVID



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import plotnine
from plotnine import *

### Carga de datos

A partir del fichero ya guardado en disco de su lectura y primer pre-proceso

In [None]:
covid = pd.read_pickle("datos_covid.pkl")

In [None]:
covid.shape

In [None]:
covid.describe(include = 'all')

# 2. Visualizaciones con ggplot

Además del contenido recogido en la docu. teórica, es aconsejable seguir este excelente (y práctico) tutorial:

https://realpython.com/ggplot-python/


## 2.1 Barplot por sexo y edad



In [None]:
covid2 = covid.groupby(['sexo', 'grupo_edad']).sum(numeric_only = True)
covid2 = pd.DataFrame(covid2.to_records())


In [None]:
covid2

In [None]:
mascara_sexo = covid2['sexo'] != 'NC'
mascara_grupo_edad = covid2['grupo_edad'] != 'NC'
covid2 = covid2[mascara_sexo]
covid2 = covid2[mascara_grupo_edad]


In [None]:
covid2

In [None]:
(
ggplot(covid2)
+ aes(x = 'grupo_edad',
      y = 'num_hosp')
    + geom_bar(stat = "identity")
)

In [None]:
(
ggplot(covid2)
+ aes(x = 'grupo_edad',
      y = 'num_uci')
    + geom_bar(stat = "identity")
)

In [None]:
(
    ggplot(covid2)  # What data to use
    + aes('grupo_edad', 
          'num_hosp',
          fill = 'sexo')  # What variable to use
    + geom_bar(stat = "identity",
              position = 'dodge') # Geometric object to use for drawing
   
)

In [None]:
(
    ggplot(covid2)  # What data to use
    + aes('grupo_edad', 
          'num_casos',
          fill = 'sexo')  # What variable to use
    + geom_bar(stat = "identity",
              position = 'dodge') # Geometric object to use for drawing
   
)

In [None]:
grafico = (
    ggplot(covid2)  # What data to use
    + aes('grupo_edad', 
          'num_casos',
          fill = 'sexo')  # What variable to use
    + geom_bar(stat = "identity",
              position = 'dodge') # Geometric object to use for drawing
   
)

In [None]:
grafico2 = grafico + ( coord_flip())

In [None]:
grafico2

In [None]:
(
    ggplot(covid2)  # What data to use
    + aes('grupo_edad', 
          'num_uci',
          fill = 'sexo')  # What variable to use
    + geom_bar(stat = "identity",
              position = 'dodge') # Geometric object to use for drawing
    + coord_flip()
    + scale_fill_manual(values = ("blue", "pink"))
)

In [None]:
(
    ggplot(covid2)  # What data to use
    + aes('grupo_edad', 
          'num_uci',
          fill = 'sexo')  # What variable to use
    + geom_bar(position = "dodge",
               stat = "identity")
    + coord_flip()# Geometric object to use for drawing
   
)

In [None]:
(
    ggplot(covid2)  # What data to use
    + aes('grupo_edad', 
          'num_def',
          fill = 'sexo')  # What variable to use
    + geom_bar(position = "dodge",
               stat = "identity") # Geometric object to use for drawing
    + coord_flip()
    + scale_fill_manual(values = ("blue", "pink"))
   
)

In [None]:
(
    ggplot(covid2)  # What data to use
    + aes('grupo_edad', 
          'num_def',
          fill = 'sexo')  # What variable to use
    + geom_bar(position = "dodge",
               stat = "identity")
    + scale_y_log10()# Geometric object to use for drawing
   
)

## 2.2 Olas (series temporales)



In [None]:
covid3 = covid.groupby(['sexo', 'fecha']).sum()
covid3 = pd.DataFrame(covid3.to_records())
mascara_sexo = covid3['sexo'] != 'NC'
#mascara_grupo_edad = covid3['grupo_edad'] != 'NC'
covid3 = covid3[mascara_sexo]
#covid2 = covid3[mascara_grupo_edad]


In [None]:
covid3.head()

In [None]:
(
    ggplot(covid3)  # What data to use
    + aes(x = "fecha", 
          y = "num_casos")  # What variable to use
    + geom_line() 
    + theme(axis_text_x = element_text(angle = 45, 
                                       hjust = 1,
                                       size = 10))# Geometric object to use for drawing
)

In [None]:
g = (
    ggplot(covid3)  # What data to use
    + aes(x = "fecha", 
          y = "num_casos",
         color = 'sexo')  # What variable to use
    + geom_smooth(method = "lowess", 
                  span = 0.02)  # Geometric object to use for drawing
)

In [None]:
g

In [None]:
g = g + theme(figure_size=(12, 10))

In [None]:
g

In [None]:
g.save('olas01.png', dpi=600)

In [None]:
g + theme(axis_text_x = element_text(angle = -45, 
                                     vjust = 1))

In [None]:
(
    ggplot(covid3)  # What data to use
    + aes(x = "fecha", 
          y = "num_casos",
          color = 'sexo')  # What variable to use
    + geom_line()  # Geometric object to use for drawing
)

In [None]:
(
    ggplot(covid3)  # What data to use
    + aes(x = "fecha", 
          y = "num_casos",
          color = 'sexo')  # What variable to use
    + geom_smooth(method = "lowess", 
                  span = 0.05)  # Geometric object to use for drawing
)

In [None]:
(
    ggplot(covid3)  # What data to use
    + aes(x = "fecha", 
          y = "num_uci",
          color = 'sexo')  # What variable to use
    + geom_smooth(method = "lowess", 
                  span = 0.10)  # Geometric object to use for drawing
)

In [None]:
(
    ggplot(covid3)  # What data to use
    + aes(x = "fecha", 
          y = "num_def",
          color = 'sexo')  # What variable to use
    + geom_smooth(method = "loess", 
                  span = 0.05)  # Geometric object to use for drawing
)