# Visualización de datos con matplotlib y seaborn

In [1]:
# Carga de módulos de matplotlib
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

# Carga de seaborn
import seaborn as sns

# Carga de pandas
import pandas as pd

## Conjuntos de datos

### COVID

#### Datos generales

In [2]:
# Carga de datos generales
covid_general = pd.read_csv("datos/05_30_22_CSV_GENERAL.csv", sep=";")

In [3]:
covid_general

Unnamed: 0,FECHA,SE,positivos,nue_posi,conf_lab,conf_nexo,muj_posi,hom_posi,extranj_posi,costar_posi,...,CEACOINS_UCI,CEACOINS_SALON,PAUT_UCI,PAUT_SALON,HVITO_UCI,HVITO_SALON,UNICAR_SAL,UNICAR_UCI,ARS-SIQ_SAL,ARS_SIQ_UCI
0,06/03/2020,10,2,2,,,,,,,...,,,,,,,,,,
1,07/03/2020,10,7,5,,,,,,,...,,,,,,,,,,
2,08/03/2020,11,10,3,,,,,,,...,,,,,,,,,,
3,09/03/2020,11,12,2,,,,,,,...,,,,,,,,,,
4,10/03/2020,11,13,1,,,7.0,6.0,3.0,10.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
811,26/05/2022,21,896712,3499,3272.0,227.0,460886.0,435826.0,102478.0,794234.0,...,0.0,0.0,0.0,2.0,0.0,4.0,0.0,0.0,0.0,0.0
812,27/05/2022,21,899404,2692,2491.0,201.0,462372.0,437032.0,102727.0,796677.0,...,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0
813,28/05/2022,21,901542,2138,1907.0,231.0,463595.0,437947.0,103010.0,798532.0,...,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0
814,29/05/2022,22,903213,1671,1603.0,68.0,464547.0,438666.0,103241.0,799972.0,...,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0


In [4]:
covid_general.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 816 entries, 0 to 815
Columns: 144 entries, FECHA to ARS_SIQ_UCI
dtypes: float64(118), int64(11), object(15)
memory usage: 918.1+ KB


In [5]:
# Reducción de columnas
covid_general = covid_general[["FECHA", "positivos", "activos", "RECUPERADOS", "fallecidos", 
                               "nue_posi", "nue_falleci", "salon", "UCI"]]

In [6]:
covid_general

Unnamed: 0,FECHA,positivos,activos,RECUPERADOS,fallecidos,nue_posi,nue_falleci,salon,UCI
0,06/03/2020,2,2,0,0,2,0,,
1,07/03/2020,7,7,0,0,5,0,,
2,08/03/2020,10,10,0,0,3,0,,
3,09/03/2020,12,12,0,0,2,0,,
4,10/03/2020,13,13,0,0,1,0,,
...,...,...,...,...,...,...,...,...,...
811,26/05/2022,896712,31441,856766,8505,3499,6,322.0,47.0
812,27/05/2022,899404,33124,857772,8508,2692,3,328.0,47.0
813,28/05/2022,901542,34163,858862,8517,2138,9,338.0,44.0
814,29/05/2022,903213,34507,860184,8522,1671,5,334.0,51.0


In [7]:
# Cambio de nombre de las columnas a minúsculas y a nombres más claros
covid_general = covid_general.rename(columns={"FECHA": "fecha",
                                              "RECUPERADOS": "recuperados",
                                              "nue_posi": "nuevos_positivos",
                                              "nue_falleci": "nuevos_fallecidos",
                                              "UCI": "uci"})

In [8]:
covid_general

Unnamed: 0,fecha,positivos,activos,recuperados,fallecidos,nuevos_positivos,nuevos_fallecidos,salon,uci
0,06/03/2020,2,2,0,0,2,0,,
1,07/03/2020,7,7,0,0,5,0,,
2,08/03/2020,10,10,0,0,3,0,,
3,09/03/2020,12,12,0,0,2,0,,
4,10/03/2020,13,13,0,0,1,0,,
...,...,...,...,...,...,...,...,...,...
811,26/05/2022,896712,31441,856766,8505,3499,6,322.0,47.0
812,27/05/2022,899404,33124,857772,8508,2692,3,328.0,47.0
813,28/05/2022,901542,34163,858862,8517,2138,9,338.0,44.0
814,29/05/2022,903213,34507,860184,8522,1671,5,334.0,51.0


In [12]:
covid_general.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 816 entries, 0 to 815
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   fecha              816 non-null    object 
 1   positivos          816 non-null    int64  
 2   activos            816 non-null    int64  
 3   recuperados        816 non-null    int64  
 4   fallecidos         816 non-null    int64  
 5   nuevos_positivos   816 non-null    int64  
 6   nuevos_fallecidos  816 non-null    int64  
 7   salon              791 non-null    float64
 8   uci                791 non-null    float64
dtypes: float64(2), int64(6), object(1)
memory usage: 57.5+ KB


In [13]:
# Cambio del tipo de datos del campo de fecha
covid_general["fecha"] = pd.to_datetime(covid_general["fecha"], format="%d/%m/%Y")

In [14]:
covid_general

Unnamed: 0,fecha,positivos,activos,recuperados,fallecidos,nuevos_positivos,nuevos_fallecidos,salon,uci
0,2020-03-06,2,2,0,0,2,0,,
1,2020-03-07,7,7,0,0,5,0,,
2,2020-03-08,10,10,0,0,3,0,,
3,2020-03-09,12,12,0,0,2,0,,
4,2020-03-10,13,13,0,0,1,0,,
...,...,...,...,...,...,...,...,...,...
811,2022-05-26,896712,31441,856766,8505,3499,6,322.0,47.0
812,2022-05-27,899404,33124,857772,8508,2692,3,328.0,47.0
813,2022-05-28,901542,34163,858862,8517,2138,9,338.0,44.0
814,2022-05-29,903213,34507,860184,8522,1671,5,334.0,51.0


In [15]:
covid_general.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 816 entries, 0 to 815
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   fecha              816 non-null    datetime64[ns]
 1   positivos          816 non-null    int64         
 2   activos            816 non-null    int64         
 3   recuperados        816 non-null    int64         
 4   fallecidos         816 non-null    int64         
 5   nuevos_positivos   816 non-null    int64         
 6   nuevos_fallecidos  816 non-null    int64         
 7   salon              791 non-null    float64       
 8   uci                791 non-null    float64       
dtypes: datetime64[ns](1), float64(2), int64(6)
memory usage: 57.5 KB


#### Datos cantonales

In [21]:
covid_cantonal_positivos = pd.read_csv("datos/05_30_22_CSV_POSITIVOS.csv", 
                                       sep=";",
                                       encoding="iso-8859-1")

# Carga de casos activos
covid_cantonal_activos = pd.read_csv("datos/05_30_22_CSV_ACTIVOS.csv", 
                                     sep=";", 
                                     encoding="iso-8859-1") # para leer tildes y otros caracteres

# Carga de casos recuperados
covid_cantonal_recuperados = pd.read_csv("datos/05_30_22_CSV_RECUP.csv", 
                                         sep=";", 
                                         encoding="iso-8859-1") # para leer tildes y otros caracteres

# Carga de casos fallecidos
covid_cantonal_fallecidos = pd.read_csv("datos/05_30_22_CSV_FALLECIDOS.csv", 
                                        sep=";", 
                                        encoding="iso-8859-1") # para leer tildes y otros caracteres

In [22]:
covid_cantonal_fallecidos

Unnamed: 0,cod_provin,provincia,cod_canton,canton,21/04/2020,22/04/2020,23/04/2020,24/04/2020,25/04/2020,26/04/2020,...,21/05/2022,22/05/2022,23/05/2022,24/05/2022,25/05/2022,26/05/2022,27/05/2022,28/05/2022,29/05/2022,30/05/2022
0,1.0,San José,112.0,Acosta,0.0,0.0,0.0,0.0,0.0,0.0,...,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0
1,1.0,San José,110.0,Alajuelita,0.0,0.0,0.0,0.0,0.0,0.0,...,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0
2,1.0,San José,106.0,Aserrí,0.0,0.0,0.0,0.0,0.0,0.0,...,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0
3,1.0,San José,118.0,Curridabat,0.0,0.0,0.0,0.0,0.0,0.0,...,143.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0,144.0,145.0
4,1.0,San José,103.0,Desamparados,1.0,1.0,1.0,1.0,1.0,1.0,...,445.0,445.0,445.0,445.0,445.0,446.0,446.0,446.0,446.0,446.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,7.0,Limón,702.0,Pococí,0.0,0.0,0.0,0.0,0.0,0.0,...,193.0,193.0,193.0,193.0,193.0,193.0,193.0,194.0,195.0,195.0
80,7.0,Limón,703.0,Siquirres,0.0,0.0,0.0,0.0,0.0,0.0,...,87.0,87.0,87.0,87.0,87.0,87.0,87.0,87.0,87.0,87.0
81,7.0,Limón,704.0,Talamanca,0.0,0.0,0.0,0.0,0.0,0.0,...,72.0,72.0,72.0,72.0,72.0,73.0,73.0,73.0,73.0,73.0
82,9.0,Otros,999.0,Otros,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0


In [23]:
# Reducción de columnas
covid_cantonal_positivos = covid_cantonal_positivos[["provincia", "canton", "30/05/2022"]]
covid_cantonal_fallecidos = covid_cantonal_fallecidos[["provincia", "canton", "30/05/2022"]]
covid_cantonal_recuperados = covid_cantonal_recuperados[["provincia", "canton", "30/05/2022"]]
covid_cantonal_activos = covid_cantonal_activos[["provincia", "canton", "30/05/2022"]]

In [24]:
covid_cantonal_positivos

Unnamed: 0,provincia,canton,30/05/2022
0,San José,Acosta,4104.0
1,San José,Alajuelita,18973.0
2,San José,Aserrí,10880.0
3,San José,Curridabat,14518.0
4,San José,Desamparados,43283.0
...,...,...,...
79,Limón,Pococí,20449.0
80,Limón,Siquirres,10349.0
81,Limón,Talamanca,5468.0
82,Otros,Otros,352.0


In [25]:
# Eliminación de fila con valores nulos
covid_cantonal_positivos = covid_cantonal_positivos.dropna(how='all')
covid_cantonal_fallecidos = covid_cantonal_fallecidos.dropna(how='all')
covid_cantonal_recuperados = covid_cantonal_recuperados.dropna(how='all')
covid_cantonal_activos = covid_cantonal_activos.dropna(how='all')

In [26]:
covid_cantonal_activos

Unnamed: 0,provincia,canton,30/05/2022
0,San José,Acosta,278.0
1,San José,Alajuelita,575.0
2,San José,Aserrí,416.0
3,San José,Curridabat,472.0
4,San José,Desamparados,1550.0
...,...,...,...
78,Limón,Matina,63.0
79,Limón,Pococí,505.0
80,Limón,Siquirres,133.0
81,Limón,Talamanca,63.0


In [27]:
# Eliminación de fila con canton=="Otros"
covid_cantonal_positivos = covid_cantonal_positivos[covid_cantonal_positivos["canton"] != "Otros"]
covid_cantonal_fallecidos = covid_cantonal_fallecidos[covid_cantonal_fallecidos["canton"] != "Otros"]
covid_cantonal_recuperados = covid_cantonal_recuperados[covid_cantonal_recuperados["canton"] != "Otros"]
covid_cantonal_activos = covid_cantonal_activos[covid_cantonal_activos["canton"] != "Otros"]


In [28]:
covid_cantonal_activos

Unnamed: 0,provincia,canton,30/05/2022
0,San José,Acosta,278.0
1,San José,Alajuelita,575.0
2,San José,Aserrí,416.0
3,San José,Curridabat,472.0
4,San José,Desamparados,1550.0
...,...,...,...
77,Limón,Limón,65.0
78,Limón,Matina,63.0
79,Limón,Pococí,505.0
80,Limón,Siquirres,133.0


In [29]:
# Cambio de nombre de columnas
covid_cantonal_positivos = covid_cantonal_positivos.rename(columns={"30/05/2022": "positivos"})
covid_cantonal_fallecidos = covid_cantonal_fallecidos.rename(columns={"30/05/2022": "fallecidos"})
covid_cantonal_recuperados = covid_cantonal_recuperados.rename(columns={"30/05/2022": "recuperados"})
covid_cantonal_activos = covid_cantonal_activos.rename(columns={"30/05/2022": "activos"})

In [30]:
covid_cantonal_recuperados

Unnamed: 0,provincia,canton,recuperados
0,San José,Acosta,3807.0
1,San José,Alajuelita,18217.0
2,San José,Aserrí,10382.0
3,San José,Curridabat,13901.0
4,San José,Desamparados,41287.0
...,...,...,...
77,Limón,Limón,16565.0
78,Limón,Matina,5709.0
79,Limón,Pococí,19749.0
80,Limón,Siquirres,10129.0


### Pasajeros del Titanic

In [32]:
# Pasajeros en el conjunto de datos de entrenamiento
titanic = pd.read_csv("datos/entrenamiento.csv")

# Despliegue de los datos
titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
