# Analytics on GeodataFrames - COVID 19 CASE

In the selected case, we will focus on positive cases, centering on the population most vulnerable to COVID-19, which includes middle-aged adults (40-59 years) and older adults (60+ years).

First, we read the data stored in Google Drive.








In [3]:
import pandas as pd

# Lee el archivo especificando el delimitador como ";"
covid19 = pd.read_csv("positivos_covid.csv", delimiter=';')

covid19.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4585360 entries, 0 to 4585359
Data columns (total 10 columns):
 #   Column           Dtype  
---  ------           -----  
 0   FECHA_CORTE      int64  
 1   DEPARTAMENTO     object 
 2   PROVINCIA        object 
 3   DISTRITO         object 
 4   METODODX         object 
 5   EDAD             float64
 6   SEXO             object 
 7   FECHA_RESULTADO  float64
 8   UBIGEO           float64
 9   id_persona       float64
dtypes: float64(4), int64(1), object(5)
memory usage: 349.8+ MB


In [5]:
#check
covid19.head()

Unnamed: 0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona
0,20241203,TUMBES,TUMBES,TUMBES,AG,46.0,FEMENINO,20221207.0,240101.0,203499.0
1,20241203,LIMA,LIMA,JESUS MARIA,AG,69.0,FEMENINO,20230822.0,150113.0,221397.0
2,20241203,SAN MARTIN,MOYOBAMBA,MOYOBAMBA,AG,55.0,FEMENINO,20240108.0,220101.0,295651.0
3,20241203,AREQUIPA,CAYLLOMA,COPORAQUE,AG,50.0,MASCULINO,20230824.0,40506.0,851625.0
4,20241203,LIMA,LIMA,JESUS MARIA,AG,58.0,MASCULINO,20221217.0,150113.0,287786.0


## We begin data cleaning

In [7]:
covid19 = covid19.drop(columns=['FECHA_CORTE', 'METODODX', 'id_persona'])

#check
covid19.head()


Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,EDAD,SEXO,FECHA_RESULTADO,UBIGEO
0,TUMBES,TUMBES,TUMBES,46.0,FEMENINO,20221207.0,240101.0
1,LIMA,LIMA,JESUS MARIA,69.0,FEMENINO,20230822.0,150113.0
2,SAN MARTIN,MOYOBAMBA,MOYOBAMBA,55.0,FEMENINO,20240108.0,220101.0
3,AREQUIPA,CAYLLOMA,COPORAQUE,50.0,MASCULINO,20230824.0,40506.0
4,LIMA,LIMA,JESUS MARIA,58.0,MASCULINO,20221217.0,150113.0


In [8]:
# Extraemos solo el año de la columna FECHA_RESULTADO en el DataFrame covid19
covid19['FECHA_RESULTADO'] = covid19['FECHA_RESULTADO'].astype(str).str[:4]

# Eliminar filas con NaN en EDAD
covid19 = covid19.dropna(subset=['EDAD'])
covid19['EDAD'] = covid19['EDAD'].astype(int)


# Convertimos la columna EDAD a enteros para remover el ".0"
covid19['EDAD'] = covid19['EDAD'].astype(int)

#check
covid19.head()



Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,EDAD,SEXO,FECHA_RESULTADO,UBIGEO
0,TUMBES,TUMBES,TUMBES,46,FEMENINO,2022,240101.0
1,LIMA,LIMA,JESUS MARIA,69,FEMENINO,2023,150113.0
2,SAN MARTIN,MOYOBAMBA,MOYOBAMBA,55,FEMENINO,2024,220101.0
3,AREQUIPA,CAYLLOMA,COPORAQUE,50,MASCULINO,2023,40506.0
4,LIMA,LIMA,JESUS MARIA,58,MASCULINO,2022,150113.0


In [9]:
# years in data
covid19.FECHA_RESULTADO.value_counts()

FECHA_RESULTADO
2022    2132009
2021    1307581
2020    1022565
2023      93361
2024      27074
nan        2023
1899        394
Name: count, dtype: int64

In [10]:
# Primero eliminamos los valores NaN de la columna FECHA_RESULTADO y luego filtramos los valores no deseados como '1899'
covid19 = covid19[~covid19['FECHA_RESULTADO'].isin(['nan'])]

# Convertimos FECHA_RESULTADO a string por seguridad y filtramos los valores no deseados
covid19 = covid19[~covid19['FECHA_RESULTADO'].isin(['1899'])]

In [11]:
# Verificamos que tenemos la periodización correcta
covid19.FECHA_RESULTADO.value_counts()

FECHA_RESULTADO
2022    2132009
2021    1307581
2020    1022565
2023      93361
2024      27074
Name: count, dtype: int64

In [12]:
# Mostramos los valores mínimo y máximo en la columna 'EDAD' del DataFrame, para verificar que está todo ok
edad_min = covid19['EDAD'].min()
edad_max = covid19['EDAD'].max()

edad_min, edad_max


(0, 125)

In [13]:
# Creamos una nueva columna 'Grupo_Edad' en el DataFrame covid19 con las categorías de edad especificadas
covid19['Grupo_Edad'] = pd.cut(
    covid19['EDAD'],
    bins=[0, 17, 39, 59, float('inf')],
    labels=["Niños y adolescentes (0-17 años)", "Adultos jóvenes (18-39 años)", "Adultos de mediana edad (40-59 años)", "Personas mayores (60+ años)"]
)
covid19.head()

Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,Grupo_Edad
0,TUMBES,TUMBES,TUMBES,46,FEMENINO,2022,240101.0,Adultos de mediana edad (40-59 años)
1,LIMA,LIMA,JESUS MARIA,69,FEMENINO,2023,150113.0,Personas mayores (60+ años)
2,SAN MARTIN,MOYOBAMBA,MOYOBAMBA,55,FEMENINO,2024,220101.0,Adultos de mediana edad (40-59 años)
3,AREQUIPA,CAYLLOMA,COPORAQUE,50,MASCULINO,2023,40506.0,Adultos de mediana edad (40-59 años)
4,LIMA,LIMA,JESUS MARIA,58,MASCULINO,2022,150113.0,Adultos de mediana edad (40-59 años)


In [14]:
covid19.Grupo_Edad.value_counts()

Grupo_Edad
Adultos jóvenes (18-39 años)            2044590
Adultos de mediana edad (40-59 años)    1485974
Personas mayores (60+ años)              726577
Niños y adolescentes (0-17 años)         308273
Name: count, dtype: int64

In [15]:
# Filtrar el DataFrame para excluir los grupos etarios especificados
covid19_vulnerables = covid19[~covid19['Grupo_Edad'].isin(["Niños y adolescentes (0-17 años)", "Adultos jóvenes (18-39 años)"])]
covid19_vulnerables.Grupo_Edad.value_counts()

Grupo_Edad
Adultos de mediana edad (40-59 años)    1485974
Personas mayores (60+ años)              726577
Niños y adolescentes (0-17 años)              0
Adultos jóvenes (18-39 años)                  0
Name: count, dtype: int64

## Reshaping to Long


We keep only the two most vulnerable groups, People per level, by distrit by year:

In [17]:
indexList=['FECHA_RESULTADO','DEPARTAMENTO','PROVINCIA','Grupo_Edad']
aggregator={'Grupo_Edad':[len]}
covid19_vulnerables=covid19_vulnerables.groupby(indexList,observed=True).agg(aggregator)
covid19_vulnerables

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Grupo_Edad
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,len
FECHA_RESULTADO,DEPARTAMENTO,PROVINCIA,Grupo_Edad,Unnamed: 4_level_2
2020,AMAZONAS,BAGUA,Adultos de mediana edad (40-59 años),2580
2020,AMAZONAS,BAGUA,Personas mayores (60+ años),1521
2020,AMAZONAS,BONGARA,Adultos de mediana edad (40-59 años),129
2020,AMAZONAS,BONGARA,Personas mayores (60+ años),69
2020,AMAZONAS,CHACHAPOYAS,Adultos de mediana edad (40-59 años),696
...,...,...,...,...
2024,TUMBES,ZARUMILLA,Adultos de mediana edad (40-59 años),5
2024,TUMBES,ZARUMILLA,Personas mayores (60+ años),4
2024,UCAYALI,CORONEL PORTILLO,Adultos de mediana edad (40-59 años),38
2024,UCAYALI,CORONEL PORTILLO,Personas mayores (60+ años),19


Sending the counts to wide columns:

In [19]:
Covid19Draft=covid19_vulnerables.unstack(3).fillna(0) #leftmost index in rows
Covid19Draft

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Grupo_Edad,Grupo_Edad
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,len,len
Unnamed: 0_level_2,Unnamed: 1_level_2,Grupo_Edad,Adultos de mediana edad (40-59 años),Personas mayores (60+ años)
FECHA_RESULTADO,DEPARTAMENTO,PROVINCIA,Unnamed: 3_level_3,Unnamed: 4_level_3
2020,AMAZONAS,BAGUA,2580.0,1521.0
2020,AMAZONAS,BONGARA,129.0,69.0
2020,AMAZONAS,CHACHAPOYAS,696.0,262.0
2020,AMAZONAS,CONDORCANQUI,922.0,288.0
2020,AMAZONAS,EN INVESTIGACIÓN,17.0,18.0
...,...,...,...,...
2024,TUMBES,CONTRALMIRANTE VILLAR,0.0,4.0
2024,TUMBES,TUMBES,17.0,15.0
2024,TUMBES,ZARUMILLA,5.0,4.0
2024,UCAYALI,CORONEL PORTILLO,38.0,19.0


In [20]:
Covid19Draft['ALARMA_pct']=Covid19Draft.iloc[:,1]/(Covid19Draft.iloc[:,0] + Covid19Draft.iloc[:,1])
covid19_vulnerables_Alarm_w=Covid19Draft['ALARMA_pct'].unstack('FECHA_RESULTADO').fillna(0)
covid19_vulnerables_Alarm_w

Unnamed: 0_level_0,FECHA_RESULTADO,2020,2021,2022,2023,2024
DEPARTAMENTO,PROVINCIA,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AMAZONAS,BAGUA,0.370885,0.391144,0.339266,0.533333,0.458333
AMAZONAS,BONGARA,0.348485,0.363825,0.305233,0.500000,0.600000
AMAZONAS,CHACHAPOYAS,0.273486,0.321394,0.268201,0.417476,0.440860
AMAZONAS,CONDORCANQUI,0.238017,0.339367,0.205714,0.000000,0.000000
AMAZONAS,EN INVESTIGACIÓN,0.514286,0.392857,0.458333,0.333333,0.000000
...,...,...,...,...,...,...
UCAYALI,ATALAYA,0.325243,0.241379,0.344828,0.000000,0.000000
UCAYALI,CORONEL PORTILLO,0.387321,0.342441,0.328023,0.404255,0.333333
UCAYALI,EN INVESTIGACIÓN,0.335516,0.375000,0.255208,0.500000,0.000000
UCAYALI,PADRE ABAD,0.309686,0.332174,0.279487,0.071429,0.000000


Notice the data type:

In [22]:
covid19_vulnerables_Alarm_w.columns

Index(['2020', '2021', '2022', '2023', '2024'], dtype='object', name='FECHA_RESULTADO')

We should have text not numbers:

In [24]:
covid19_vulnerables_Alarm_w.columns=['year'+str(x) for x in covid19_vulnerables_Alarm_w.columns]

In [25]:
#then
covid19_vulnerables_Alarm_w

Unnamed: 0_level_0,Unnamed: 1_level_0,year2020,year2021,year2022,year2023,year2024
DEPARTAMENTO,PROVINCIA,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AMAZONAS,BAGUA,0.370885,0.391144,0.339266,0.533333,0.458333
AMAZONAS,BONGARA,0.348485,0.363825,0.305233,0.500000,0.600000
AMAZONAS,CHACHAPOYAS,0.273486,0.321394,0.268201,0.417476,0.440860
AMAZONAS,CONDORCANQUI,0.238017,0.339367,0.205714,0.000000,0.000000
AMAZONAS,EN INVESTIGACIÓN,0.514286,0.392857,0.458333,0.333333,0.000000
...,...,...,...,...,...,...
UCAYALI,ATALAYA,0.325243,0.241379,0.344828,0.000000,0.000000
UCAYALI,CORONEL PORTILLO,0.387321,0.342441,0.328023,0.404255,0.333333
UCAYALI,EN INVESTIGACIÓN,0.335516,0.375000,0.255208,0.500000,0.000000
UCAYALI,PADRE ABAD,0.309686,0.332174,0.279487,0.071429,0.000000


In [26]:
# as usual
covid19_vulnerables_Alarm_w.reset_index(inplace=True)
covid19_vulnerables_Alarm_w

Unnamed: 0,DEPARTAMENTO,PROVINCIA,year2020,year2021,year2022,year2023,year2024
0,AMAZONAS,BAGUA,0.370885,0.391144,0.339266,0.533333,0.458333
1,AMAZONAS,BONGARA,0.348485,0.363825,0.305233,0.500000,0.600000
2,AMAZONAS,CHACHAPOYAS,0.273486,0.321394,0.268201,0.417476,0.440860
3,AMAZONAS,CONDORCANQUI,0.238017,0.339367,0.205714,0.000000,0.000000
4,AMAZONAS,EN INVESTIGACIÓN,0.514286,0.392857,0.458333,0.333333,0.000000
...,...,...,...,...,...,...,...
216,UCAYALI,ATALAYA,0.325243,0.241379,0.344828,0.000000,0.000000
217,UCAYALI,CORONEL PORTILLO,0.387321,0.342441,0.328023,0.404255,0.333333
218,UCAYALI,EN INVESTIGACIÓN,0.335516,0.375000,0.255208,0.500000,0.000000
219,UCAYALI,PADRE ABAD,0.309686,0.332174,0.279487,0.071429,0.000000


In [136]:
!pip install geopandas

Defaulting to user installation because normal site-packages is not writeable


Let's call a map:

In [139]:
mapLink='https://github.com/SocialAnalytics-StrategicIntelligence/GeoDF_Analytics/raw/main/maps/ProvsINEI2023.zip'

import geopandas as gpd

provmap=gpd.read_file(mapLink)

provmap.info()

ModuleNotFoundError: No module named 'geopandas'

Let me create a column, concatenating two:

In [None]:
provmap['location']=['+'.join(x[0]) for x in zip(provmap.iloc[:,3:5].values)]
provmap.head(10)

I will do the same with the data frame:

In [None]:
covid19_vulnerables_Alarm_w['location']=['+'.join(x[0]) for x in zip(covid19_vulnerables_Alarm_w.iloc[:,:2].values)]
covid19_vulnerables_Alarm_w.head()

## Preprocessing

The names from non-english speaking countries may come with some symbols that may cause trouble (', ~). Let's get rid of those:

In [None]:
!pip install unidecode

In [None]:
import unidecode


byePunctuation=lambda x: unidecode.unidecode(x)
covid19_vulnerables_Alarm_w['location']=covid19_vulnerables_Alarm_w['location'].apply(byePunctuation)
provmap['location']=provmap['location'].apply(byePunctuation)

In [None]:
# replacing dashes and multiple spaces by a simple space
covid19_vulnerables_Alarm_w['location']=covid19_vulnerables_Alarm_w.location.str.replace("\-|\_|\s+","",regex=True)
provmap['location']=provmap.location.str.replace("\-|\_|\s+","",regex=True)

## Merging

We need to merge both tables now. That can happen effectively if both tables have a **key** column: a column (or collection of them) whose values in one table are the same in the other one.

The match need not be exact, but only common values in the *key* are merged.

Let's find out what is NOT matched in each table:

In [None]:
nomatch_df=set(covid19_vulnerables_Alarm_w.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid19_vulnerables_Alarm_w.location)

This is what could not be matched:

In [None]:
len(nomatch_df), len(nomatch_gdf)

The right way to go is using fuzzy merging (remember we need the fuzz):

In [None]:
!pip install thefuzz

In [None]:
# pick the closest match from nomatch_gdf for a value in nomatch_df
from thefuzz import process
[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]

If you are comfortable, you prepare a dictionary of changes:

In [None]:
# is this OK?
{dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}

In [None]:
# then:
changesinDF={dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}

Now, make the replacements:

In [None]:
covid19_vulnerables_Alarm_w.replace({'location': changesinDF}, inplace=True)

Is it over?

In [None]:
nomatch_df=set(covid19_vulnerables_Alarm_w.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid19_vulnerables_Alarm_w.location)

[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]

Now the merge can happen:

In [None]:
covid19_vulnerables_Alarm_map=provmap.merge(covid19_vulnerables_Alarm_w, on='location',how='left',indicator='flag')

In [None]:
# check
covid19_vulnerables_Alarm_map.info()

In [None]:
# avoid poblems with fillna()
covid19_vulnerables_Alarm_map['flag']=covid19_vulnerables_Alarm_map.flag.astype(str)

We can get rid of some columns:

In [None]:
covid19_vulnerables_Alarm_map.info()

In [None]:
bye=['DEPARTAMENTO', 'CCPP','CCDD']
covid19_vulnerables_Alarm_map.drop(columns=bye,inplace=True)

# keeping
covid19_vulnerables_Alarm_map.head()


In [None]:
# filling with zeroes
covid19_vulnerables_Alarm_map.fillna(0,inplace=True)

We can save this geoDF:

In [None]:
import os
covid19_vulnerables_Alarm_map.to_file(
    os.path.join('/content/drive/MyDrive/2024-2/Herramientas cuantitativas', "provinciasPeru.gpkg"),
    layer='provinciasCovid19',
    driver="GPKG"
)

## Exploring one variable

This time, we explore statistically one variable in the map:

In [None]:
# statistics
covid19_vulnerables_Alarm_map.year2022.describe()

A visual look:

In [None]:
import seaborn as sea

sea.boxplot(covid19_vulnerables_Alarm_map.year2022, color='yellow',orient='h')

In [None]:
from sklearn.preprocessing import QuantileTransformer
qt = QuantileTransformer(n_quantiles=100, random_state=0,output_distribution='normal')
qt_result=qt.fit_transform(covid19_vulnerables_Alarm_map[['year2022']])
sea.boxplot(qt_result, color='yellow',orient='h')

In [None]:
covid19_vulnerables_Alarm_map['year_2022_qt']=qt_result

## Spatial Correlation

### Neighboorhood

We can compute the neighborhood in a map using different algorithms:

In [None]:
!pip install libpysal

In [None]:
from libpysal.weights import Queen, Rook, KNN

# rook

w_rook = Rook.from_dataframe(covid19_vulnerables_Alarm_map,use_index=False)

In [None]:
# rook
w_queen = Queen.from_dataframe(covid19_vulnerables_Alarm_map,use_index=False)

In [None]:
# k nearest neighbors
w_knn = KNN.from_dataframe(covid19_vulnerables_Alarm_map, k=8)

Let's understand the differences:

In [None]:
# first one
covid19_vulnerables_Alarm_map.head(1)

In [None]:
# amount neighbors of that district
w_rook.neighbors[0]

In [None]:
# see
base=covid19_vulnerables_Alarm_map[covid19_vulnerables_Alarm_map.PROVINCIA_x=="CHACHAPOYAS"].plot()
covid19_vulnerables_Alarm_map.iloc[w_rook.neighbors[0] ,].plot(ax=base,facecolor="yellow",edgecolor='k')
covid19_vulnerables_Alarm_map.head(1).plot(ax=base,facecolor="red")

Let's do the same:

In [None]:
w_queen.neighbors[0]

In [None]:
base=covid19_vulnerables_Alarm_map[covid19_vulnerables_Alarm_map.PROVINCIA_x=="CHACHAPOYAS"].plot()
covid19_vulnerables_Alarm_map.iloc[w_queen.neighbors[0] ,].plot(ax=base,facecolor="yellow",edgecolor='k')
covid19_vulnerables_Alarm_map.head(1).plot(ax=base,facecolor="red")

In [None]:
w_knn.neighbors[0]

In [None]:
base=covid19_vulnerables_Alarm_map[covid19_vulnerables_Alarm_map.PROVINCIA_x=="CHACHAPOYAS"].plot()
covid19_vulnerables_Alarm_map.iloc[w_knn.neighbors[0] ,].plot(ax=base,facecolor="yellow",edgecolor='k')
covid19_vulnerables_Alarm_map.head(1).plot(ax=base,facecolor="red")

Let me pay attention to the queen results:

In [None]:
# all the neighbors by row
w_queen.neighbors

In [None]:
# the matrix of neighboorhood:

pd.DataFrame(*w_queen.full()).astype(int) # 1 means both are neighbors

In [None]:
# pct of neighboorhood (density)
w_queen.pct_nonzero

In [None]:
# a province with NO neighbor?
w_queen.islands

## Moran's correlation

We need the neighboorhood matrix (the weight matrix) to compute spatial correlation: if the variable value is correlated with the values of its neighbors - which proves a spatial effect.

In [None]:
# needed for spatial correlation
w_queen.transform = 'R'

In [None]:
pd.DataFrame(*w_queen.full()).sum(axis=1) # 1 means both are neighbors

Spatial correlation is measured by the Moran's I statistic:

In [None]:
!pip install esda


Spatial correlation is measured by the Moran's I statistic:

In [None]:
from esda.moran import Moran

morancovid19 = Moran(covid19_vulnerables_Alarm_map['year_2022_qt'], w_queen)
morancovid19.I,morancovid19.p_sim

The Moran's I is significant. Let's see:

In [None]:
!pip install splot

In [None]:
from splot.esda import moran_scatterplot
import matplotlib.pyplot as plt

fig, ax = moran_scatterplot(morancovid19)
ax.set_xlabel('Covid19_alarma_share')
ax.set_ylabel('SpatialLag_Covid19_alarma_share')

### Local Spatial Correlation

We can compute a LISA (local Moran) for each case. That will help us find spatial clusters (spots) and spatial outliers:

* A **hotSpot** is a polygon whose value in the variable is high AND is surrounded with polygons with also high values.

* A **coldSpot** is a polygon whose value in the variable is low AND is surrounded with polygons with also low values.

* A **coldOutlier** is a polygon whose value in the variable is low BUT is surrounded with polygons with  high values.

* A **hotOutlier** is a polygon whose value in the variable is high BUT is surrounded with polygons with  low values.

It is also possible that no significant correlation is detected. Let's see those values:

In [None]:
# The scatterplot with local info

from esda.moran import Moran_Local

# calculate Moran_Local and plot
lisacovid19 = Moran_Local(y=covid19_vulnerables_Alarm_map['year_2022_qt'], w=w_knn,seed=2022)
fig, ax = moran_scatterplot(lisacovid19,p=0.05)
ax.set_xlabel('Covid19_alarma_share')
ax.set_ylabel('SpatialLag_Covid19_alarma_share');

In [None]:
from splot.esda import plot_local_autocorrelation
plot_local_autocorrelation(lisacovid19, covid19_vulnerables_Alarm_map,'year_2022_qt')
plt.show()

Let me add that data to my gdf:

In [None]:
# quadrant
lisacovid19.q

In [None]:
# significance
lisacovid19.p_sim

In [None]:
# quadrant: 1 HH,  2 LH,  3 LL,  4 HL
pd.Series(lisacovid19.q).value_counts()

The info in **lisacovid19.q** can not be used right away, we need to add if the local spatial correlation is significant:

In [None]:
covid19_vulnerables_Alarm_map['Covid19_quadrant']=[l if p <0.05 else 0 for l,p in zip(lisacovid19.q,lisacovid19.p_sim)  ]
covid19_vulnerables_Alarm_map['Covid19_quadrant'].value_counts()

Now, we recode:

In [None]:
labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']

covid19_vulnerables_Alarm_map['Covid19_quadrant_names']=[labels[i] for i in covid19_vulnerables_Alarm_map['Covid19_quadrant']]

covid19_vulnerables_Alarm_map['Covid19_quadrant_names'].value_counts()


Let's replot:

In [None]:
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])




f, ax = plt.subplots(1, figsize=(12,12))


plt.title('Spots and Outliers')

covid19_vulnerables_Alarm_map.plot(column='Covid19_quadrant_names',
                categorical=True,
                cmap=myColMap,
                linewidth=0.1,
                edgecolor='white',
                legend=True,
                legend_kwds={'loc': 'center left',
                             'bbox_to_anchor': (0.7, 0.6)},
                ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()

In [None]:
covid19_vulnerables_Alarm_map.explore("Covid19_quadrant_names", categorical=True,tooltip='location',cmap=myColMap)

In [None]:
import folium

map1=covid19_vulnerables_Alarm_map[covid19_vulnerables_Alarm_map.Covid19_quadrant_names=='1 hotSpot']
map2=covid19_vulnerables_Alarm_map[covid19_vulnerables_Alarm_map.Covid19_quadrant_names=='2 coldOutlier']
map3=covid19_vulnerables_Alarm_map[covid19_vulnerables_Alarm_map.Covid19_quadrant_names=='3 coldSpot']
map4=covid19_vulnerables_Alarm_map[covid19_vulnerables_Alarm_map.Covid19_quadrant_names=='4 hotOutlier']

m = map1.explore(
    color="red",
    tooltip=False,  # hide tooltip
    popup=["location"],  # (on-click)
    name="hotSpot"  # name of the layer in the map
)

map2.explore(
    m=m, # notice
    color="green",
    tooltip=False,
    popup=["location"],
    name="coldOutlier"
)

map3.explore(
    m=m,
    color="black",
    tooltip=False,
    popup=["location"],
    name="coldSpot",
)

map4.explore(
    m=m,
    color="orange",
    tooltip=False,
    popup=["location"],
    name="hotOutlier",
)

folium.TileLayer("CartoDB positron", show=False).add_to(m)  # use folium to add alternative tiles
folium.LayerControl(collapsed=True).add_to(m)  # use folium to add layer control

m  # show map

## Bivariate LISA

In [None]:
#from esda.moran import Moran_BV, Moran_Local_BV
from esda.moran import Moran_BV

mbi = Moran_BV(covid19_vulnerables_Alarm_map['year2021'],  covid19_vulnerables_Alarm_map['year2022'],  w_queen)
mbi.I,mbi.p_sim

In [None]:
# The scatterplot with local info
from esda.moran import Moran_Local_BV

# calculate Moran_Local and plot
lisacovid19_bv = Moran_Local_BV(y=covid19_vulnerables_Alarm_map['year2021'],
                               x=covid19_vulnerables_Alarm_map['year2022'],
                               w=w_queen)

fig, ax = moran_scatterplot(lisacovid19_bv, p=0.05,aspect_equal=True)

ax.set_xlabel('Covid19_2022')
ax.set_ylabel('SpatialLag_Covid19_2021')
plt.show()

In [None]:
covid19_vulnerables_Alarm_map['Covid19_quadrant_21_22']=[l if p <0.05 else 0 for l,p in zip(lisacovid19_bv.q,lisacovid19_bv.p_sim)  ]

labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']

covid19_vulnerables_Alarm_map['Covid19_quadrant_21_22_names']=[labels[i] for i in covid19_vulnerables_Alarm_map['Covid19_quadrant_21_22']]


In [None]:
# see new columns
covid19_vulnerables_Alarm_map

In [None]:
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])




f, ax = plt.subplots(1, figsize=(12,12))


plt.title('Spots and Outliers')

covid19_vulnerables_Alarm_map.plot(column='Covid19_quadrant_21_22_names',
                categorical=True,
                cmap=myColMap,
                linewidth=0.1,
                edgecolor='white',
                legend=True,
                legend_kwds={'loc': 'center left',
                             'bbox_to_anchor': (0.7, 0.6)},
                ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()

In [None]:
# the map with the spots and outliers

from splot.esda import lisa_cluster
f, ax = plt.subplots(1, figsize=(12, 12))
plt.title('Spots and Outliers')
fig = lisa_cluster(lisacovid19,
                   covid19_vulnerables_Alarm_map,ax=ax,
                   legend_kwds={'loc': 'center left',
                                'bbox_to_anchor': (0.7, 0.6)})

# Use github para almacenar, publicar y presentar su trabajo

Enlace al repositorio de la tarea 3: https://github.com/luispachecoc/covid_19

Enlace a GitHub Pages: https://luispachecoc.github.io/covid_19/

In [None]:
!jupyter nbconvert --to html "/content/drive/MyDrive/2024-2/Herramientas_cuantitativas/Index.ipynb"