# Reclasificación de AGEBS
Este es un nuevo repositorio para la reclasificación de AGEBS. El objetivo es crear un nuevo conjunto de AGEBS que sea más homogéneo y que permita una mejor comparación entre las diferentes variables de interés.

## Carga de librerías

In [2]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import glob

## Carga de datos

In [3]:
rename_quimicos = {
  'co': '$CO$',
  'hcho': '$HCHO$',
  'o3': '$O_3$',
  'so2': '$SO_2$',
  'no2': '$NO_2$'
}

### Datos de SENTINEL

In [4]:
sentinel_dir = "data/sentinel/AGEBS/month/*.csv"
sentinel_files = glob.glob(sentinel_dir)
sentinel_files.sort()
sentinel_files[:5]

['data/sentinel/AGEBS/month\\sum_CO_2018-11-01.csv',
 'data/sentinel/AGEBS/month\\sum_CO_2018-12-01.csv',
 'data/sentinel/AGEBS/month\\sum_CO_2019-01-01.csv',
 'data/sentinel/AGEBS/month\\sum_CO_2019-02-01.csv',
 'data/sentinel/AGEBS/month\\sum_CO_2019-03-01.csv']

In [5]:
pd.read_csv(sentinel_files[0]).head()

Unnamed: 0,sum,CVE_LOC,CVE_MUN,CVE_ENT,CVEGEO,CVE_AGEB,system:index
0,0.052075,1,1,6,0600100010081,0081,0
1,0.023302,1,1,6,060010001019A,019A,1
2,0.008745,1,1,6,0600100010359,0359,2
3,0.077275,1,1,6,0600100010151,0151,3
4,0.076924,1,1,6,0600100010166,0166,4


In [6]:
sentinel_files[0].split("\\")[1].split("_")[1]

'CO'

In [7]:
def read_sentinel(files):
  df_full = pd.DataFrame()
  for f in files:
    quimico = f.split("\\")[1].split("_")[1]
    quimico = rename_quimicos[quimico.lower()]

    date = f.split("\\")[1].split("_")[2].split(".")[0]
    cols_to_keep = ["CVEGEO", "sum"]
    df = pd.read_csv(f, usecols=cols_to_keep)
    
    df["quimico"] = quimico
    df["date"] = date
    df_full = pd.concat([df_full, df])
  return df_full

df_sentinel = read_sentinel(sentinel_files)
df_sentinel.head()

Unnamed: 0,sum,CVEGEO,quimico,date
0,0.052075,0600100010081,$CO$,2018-11-01
1,0.023302,060010001019A,$CO$,2018-11-01
2,0.008745,0600100010359,$CO$,2018-11-01
3,0.077275,0600100010151,$CO$,2018-11-01
4,0.076924,0600100010166,$CO$,2018-11-01


In [8]:
# set df_sentinel from long to wide
df_sentinel_pivot = pd.pivot_table(df_sentinel,index=["CVEGEO", "date"],columns="quimico", values="sum").dropna()
df_sentinel_pivot.reset_index(inplace=True)
df_sentinel_pivot.head()

quimico,CVEGEO,date,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$
0,600100010081,2018-11-01,0.052075,0.000757,0.000491,1.018031,0.000308
1,600100010081,2018-12-01,0.151151,0.001729,0.00056,1.030936,-2.6e-05
2,600100010081,2019-01-01,0.21108,0.001839,0.00057,1.048855,0.001457
3,600100010081,2019-02-01,0.226947,0.001311,0.000558,0.991918,0.001848
4,600100010081,2019-03-01,0.272456,0.001478,0.000607,1.135778,0.001786


In [9]:
# df_sentinel_pivot from monthly to yearly
df_sentinel_pivot["year"] = df_sentinel_pivot["date"].apply(lambda x: x[:4])
df_sentinel_pivot.head()

quimico,CVEGEO,date,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,year
0,600100010081,2018-11-01,0.052075,0.000757,0.000491,1.018031,0.000308,2018
1,600100010081,2018-12-01,0.151151,0.001729,0.00056,1.030936,-2.6e-05,2018
2,600100010081,2019-01-01,0.21108,0.001839,0.00057,1.048855,0.001457,2019
3,600100010081,2019-02-01,0.226947,0.001311,0.000558,0.991918,0.001848,2019
4,600100010081,2019-03-01,0.272456,0.001478,0.000607,1.135778,0.001786,2019


In [10]:
# count months per year per ageb
df_sentinel_pivot["count"] = df_sentinel_pivot.groupby(["CVEGEO", "year"])["date"].transform("count")
df_sentinel_pivot = df_sentinel_pivot[df_sentinel_pivot["count"] > 10]
df_sentinel_pivot

quimico,CVEGEO,date,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,year,count
2,0600100010081,2019-01-01,0.211080,0.001839,0.000570,1.048855,0.001457,2019,12
3,0600100010081,2019-02-01,0.226947,0.001311,0.000558,0.991918,0.001848,2019,12
4,0600100010081,2019-03-01,0.272456,0.001478,0.000607,1.135778,0.001786,2019,12
5,0600100010081,2019-04-01,0.287970,0.001254,0.000635,1.111413,0.002244,2019,12
6,0600100010081,2019-05-01,0.321284,0.002491,0.000618,1.212021,0.000165,2019,12
...,...,...,...,...,...,...,...,...,...
26994,060100001138A,2022-07-01,0.026194,0.000248,0.000112,0.213964,0.000251,2022,11
26995,060100001138A,2022-08-01,0.027008,0.000163,0.000115,0.217196,0.000038,2022,11
26996,060100001138A,2022-09-01,0.025355,0.000123,0.000101,0.194637,0.000083,2022,11
26997,060100001138A,2022-10-01,0.033969,0.000197,0.000111,0.191612,0.000119,2022,11


In [11]:
df_sentinel_pivot.year.unique()

array(['2019', '2020', '2021', '2022'], dtype=object)

In [12]:
df_sentinel_sum = df_sentinel_pivot.groupby(["CVEGEO"]).sum().reset_index()

# devide by 4 to get average
for col in ["$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$"]:
  df_sentinel_sum[col] = df_sentinel_sum[col] / 4
df_sentinel_sum.drop(columns=["count"], inplace=True)
df_sentinel_sum.head()

  df_sentinel_sum = df_sentinel_pivot.groupby(["CVEGEO"]).sum().reset_index()


quimico,CVEGEO,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$
0,600100010081,2.546332,0.017304,0.006821,13.622742,0.00658
1,600100010109,3.176153,0.02071,0.008328,16.739348,0.007977
2,600100010147,1.82761,0.012467,0.004884,9.781003,0.005085
3,600100010151,3.791038,0.025777,0.010156,20.258907,0.009859
4,600100010166,3.549575,0.023447,0.009389,18.672414,0.009003


### Censo 2020

In [15]:
censo2020ageb = pd.read_csv("data\censo2020\conjunto_de_datos_ageb_urbana_06_cpv2020.csv")
censo2020ageb

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBFEM,...,VPH_TELEF,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC
0,6,Colima,0,Total de la entidad Colima,0,Total de la entidad,0000,0,731391,370769,...,80251,206736,132395,114164,43881,22695,9173,12085,82366,2698
1,6,Colima,1,Armería,0,Total del municipio,0000,0,27626,13686,...,1955,7125,2636,3927,501,277,539,734,5097,193
2,6,Colima,1,Armería,1,Total de la localidad urbana,0000,0,15368,7632,...,1305,3912,1569,1862,387,186,227,379,2637,87
3,6,Colima,1,Armería,1,Total AGEB urbana,0081,0,1389,708,...,159,371,182,194,54,16,22,44,240,8
4,6,Colima,1,Armería,1,Ciudad de Armería,0081,1,41,18,...,5,12,4,4,*,0,*,*,10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14737,6,Colima,10,Villa de Álvarez,1,Ciudad de Villa de Álvarez,138A,4,0,0,...,0,0,0,0,0,0,0,0,0,0
14738,6,Colima,10,Villa de Álvarez,1,Ciudad de Villa de Álvarez,138A,5,63,31,...,*,18,9,7,4,5,*,0,7,0
14739,6,Colima,10,Villa de Álvarez,1,Ciudad de Villa de Álvarez,138A,6,14,8,...,0,4,0,*,0,*,0,*,4,0
14740,6,Colima,10,Villa de Álvarez,1,Ciudad de Villa de Álvarez,138A,7,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
censo2020ageb.NOM_LOC.unique()

array(['Total de la entidad', 'Total del municipio',
       'Total de la localidad urbana', 'Total AGEB urbana',
       'Ciudad de Armería', 'Cofradía de Juárez', 'Rincón de López',
       'Colima', 'Comala', 'Suchitlán', 'Coquimatlán',
       'Pueblo Juárez (La Magdalena)', 'Cuauhtémoc', 'Quesería',
       'El Trapiche', 'Ixtlahuacán', 'Manzanillo', 'El Colomo', 'Jalipa',
       'Minatitlán', 'Tecomán', 'Colonia Bayardo', 'Cerro de Ortega',
       'Cofradía de Morelos', 'Madrid', 'Ciudad de Villa de Álvarez'],
      dtype=object)

In [17]:
locs_project = [
  "Colima",
  'Ciudad de Villa de Álvarez'
  'Tecomán',
  # Pendiente Caleras:  No hay datos según Censo 2020
  'Cofradía de Morelos',
  'Minatitlán',
  # Pendiente 'Canoas': No hay datos según Censo 2020
  'Cuauhtémoc', 
  'Quesería',
  'Manzanillo'
]

In [18]:
censo2020ageb = censo2020ageb[censo2020ageb.NOM_LOC.isin(locs_project)]
censo2020ageb

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBFEM,...,VPH_TELEF,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC
643,6,Colima,2,Colima,1,Colima,0021,1,0,0,...,0,0,0,0,0,0,0,0,0,0
644,6,Colima,2,Colima,1,Colima,0021,2,146,72,...,26,40,35,18,12,5,*,*,11,0
645,6,Colima,2,Colima,1,Colima,0021,3,0,0,...,0,0,0,0,0,0,0,0,0,0
646,6,Colima,2,Colima,1,Colima,0021,4,238,134,...,41,74,63,41,33,9,4,*,15,*
647,6,Colima,2,Colima,1,Colima,0021,5,54,28,...,8,16,13,4,*,*,*,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11604,6,Colima,9,Tecomán,28,Cofradía de Morelos,0126,37,36,20,...,0,11,0,8,0,*,*,*,11,0
11605,6,Colima,9,Tecomán,28,Cofradía de Morelos,0126,38,10,5,...,0,*,0,*,0,0,0,*,*,0
11606,6,Colima,9,Tecomán,28,Cofradía de Morelos,0126,39,16,5,...,0,5,0,3,0,0,0,0,3,0
11607,6,Colima,9,Tecomán,28,Cofradía de Morelos,0126,40,23,10,...,0,6,*,3,*,*,0,0,5,0


### Datos del Indice de Marginación

In [13]:
im_src = "data/IM/IMU_2020.xls"
im = pd.read_excel(im_src, sheet_name="IMU_2020")
im = im[im["ENT"] == 6]
im_cols_to_keep = ["CVE_AGEB", "NOM_ENT","NOM_MUN", "NOM_LOC","POB_TOTAL","GM_2020"]
im = im[im_cols_to_keep]
im.rename(columns={"CVE_AGEB": "CVEGEO"}, inplace=True)
im

Unnamed: 0,CVEGEO,NOM_ENT,NOM_MUN,NOM_LOC,POB_TOTAL,GM_2020
4815,0600100010081,Colima,Armería,Ciudad de Armería,1389,Medio
4816,0600100010109,Colima,Armería,Ciudad de Armería,1856,Medio
4817,0600100010147,Colima,Armería,Ciudad de Armería,1078,Medio
4818,0600100010151,Colima,Armería,Ciudad de Armería,1671,Medio
4819,0600100010166,Colima,Armería,Ciudad de Armería,1463,Alto
...,...,...,...,...,...,...
5309,0601000011341,Colima,Villa de Álvarez,Ciudad de Villa de Álvarez,945,Muy bajo
5310,0601000011356,Colima,Villa de Álvarez,Ciudad de Villa de Álvarez,255,Muy bajo
5311,0601000011360,Colima,Villa de Álvarez,Ciudad de Villa de Álvarez,414,Muy bajo
5312,0601000011375,Colima,Villa de Álvarez,Ciudad de Villa de Álvarez,1878,Muy bajo


In [14]:
im.NOM_LOC.unique()

array(['Ciudad de Armería', 'Cofradía de Juárez', 'Rincón de López',
       'Colima', 'Comala', 'Suchitlán', 'Coquimatlán',
       'Pueblo Juárez (La Magdalena)', 'Cuauhtémoc', 'Quesería',
       'El Trapiche', 'Ixtlahuacán', 'Manzanillo', 'El Colomo', 'Jalipa',
       'Minatitlán', 'Tecomán', 'Colonia Bayardo', 'Cerro de Ortega',
       'Madrid', 'Ciudad de Villa de Álvarez'], dtype=object)

In [183]:
im.GM_2020.unique()

rename_gm = {
  'Muy alto': 5, 
  'Alto': 4,
  'Medio': 3,
  'Bajo': 2,
  'Muy bajo': 1
}

In [184]:
im["GM_2020"] = im["GM_2020"].apply(lambda x: rename_gm[x])
im.head()

Unnamed: 0,CVEGEO,NOM_ENT,NOM_MUN,NOM_LOC,POB_TOTAL,GM_2020
4815,600100010081,Colima,Armería,Ciudad de Armería,1389,3
4816,600100010109,Colima,Armería,Ciudad de Armería,1856,3
4817,600100010147,Colima,Armería,Ciudad de Armería,1078,3
4818,600100010151,Colima,Armería,Ciudad de Armería,1671,3
4819,600100010166,Colima,Armería,Ciudad de Armería,1463,4


### Recolección de metricas

In [185]:
df_sentinel_sum
im

metricas = pd.merge(df_sentinel_sum, im, on="CVEGEO")
metricas

Unnamed: 0,CVEGEO,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,NOM_ENT,NOM_MUN,NOM_LOC,POB_TOTAL,GM_2020
0,0600100010081,2.546332,0.017304,0.006821,13.622742,0.006580,Colima,Armería,Ciudad de Armería,1389,3
1,0600100010109,3.176153,0.020710,0.008328,16.739348,0.007977,Colima,Armería,Ciudad de Armería,1856,3
2,0600100010147,1.827610,0.012467,0.004884,9.781003,0.005085,Colima,Armería,Ciudad de Armería,1078,3
3,0600100010151,3.791038,0.025777,0.010156,20.258907,0.009859,Colima,Armería,Ciudad de Armería,1671,3
4,0600100010166,3.549575,0.023447,0.009389,18.672414,0.009003,Colima,Armería,Ciudad de Armería,1463,4
...,...,...,...,...,...,...,...,...,...,...,...
494,0601000011341,0.909909,0.006516,0.002780,5.075055,0.002552,Colima,Villa de Álvarez,Ciudad de Villa de Álvarez,945,1
495,0601000011356,0.782473,0.005305,0.002522,4.367105,0.002215,Colima,Villa de Álvarez,Ciudad de Villa de Álvarez,255,1
496,0601000011360,1.191221,0.008269,0.003564,6.475284,0.002907,Colima,Villa de Álvarez,Ciudad de Villa de Álvarez,414,1
497,0601000011375,1.859310,0.013077,0.005605,10.146833,0.005725,Colima,Villa de Álvarez,Ciudad de Villa de Álvarez,1878,1


In [186]:
metricas.NOM_MUN.unique()

array(['Armería', 'Colima', 'Comala', 'Coquimatlán', 'Cuauhtémoc',
       'Ixtlahuacán', 'Manzanillo', 'Minatitlán', 'Tecomán',
       'Villa de Álvarez'], dtype=object)

### Cargar agebs (shapefile)

In [187]:
colima = gpd.read_file("data/06_colima/06a.shp")
colima = colima[["CVEGEO", "area","geometry"]]
colima.head()

Unnamed: 0,CVEGEO,area,geometry
0,0600100010081,0.28,"POLYGON ((2293464.136 773222.003, 2293464.942 ..."
1,060010001019A,0.15,"POLYGON ((2293790.913 773288.458, 2293796.024 ..."
2,0600100010359,0.05,"POLYGON ((2292728.301 773695.719, 2292723.607 ..."
3,0600100010151,0.42,"POLYGON ((2293388.338 773721.118, 2293407.275 ..."
4,0600100010166,0.41,"POLYGON ((2294660.453 773180.454, 2294661.055 ..."


In [188]:
colima_metricas = pd.merge(colima, metricas, on="CVEGEO")
colima_metricas["densidad"] = colima_metricas["POB_TOTAL"] / colima_metricas["area"]
colima_metricas.head()

Unnamed: 0,CVEGEO,area,geometry,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,NOM_ENT,NOM_MUN,NOM_LOC,POB_TOTAL,GM_2020,densidad
0,0600100010081,0.28,"POLYGON ((2293464.136 773222.003, 2293464.942 ...",2.546332,0.017304,0.006821,13.622742,0.00658,Colima,Armería,Ciudad de Armería,1389,3,4960.714286
1,060010001019A,0.15,"POLYGON ((2293790.913 773288.458, 2293796.024 ...",1.143618,0.007762,0.003062,6.112355,0.002945,Colima,Armería,Ciudad de Armería,889,3,5926.666667
2,0600100010359,0.05,"POLYGON ((2292728.301 773695.719, 2292723.607 ...",0.424047,0.002876,0.001137,2.270659,0.001142,Colima,Armería,Ciudad de Armería,818,2,16360.0
3,0600100010151,0.42,"POLYGON ((2293388.338 773721.118, 2293407.275 ...",3.791038,0.025777,0.010156,20.258907,0.009859,Colima,Armería,Ciudad de Armería,1671,3,3978.571429
4,0600100010166,0.41,"POLYGON ((2294660.453 773180.454, 2294661.055 ...",3.549575,0.023447,0.009389,18.672414,0.009003,Colima,Armería,Ciudad de Armería,1463,4,3568.292683


In [189]:
# import minmax scaler from sklearn
from sklearn.preprocessing import MinMaxScaler


In [190]:
cols_to_normalize = ["$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]


df = colima_metricas[colima_metricas["NOM_MUN"] == "Armería"]
df.head()

Unnamed: 0,CVEGEO,area,geometry,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,NOM_ENT,NOM_MUN,NOM_LOC,POB_TOTAL,GM_2020,densidad
0,0600100010081,0.28,"POLYGON ((2293464.136 773222.003, 2293464.942 ...",2.546332,0.017304,0.006821,13.622742,0.00658,Colima,Armería,Ciudad de Armería,1389,3,4960.714286
1,060010001019A,0.15,"POLYGON ((2293790.913 773288.458, 2293796.024 ...",1.143618,0.007762,0.003062,6.112355,0.002945,Colima,Armería,Ciudad de Armería,889,3,5926.666667
2,0600100010359,0.05,"POLYGON ((2292728.301 773695.719, 2292723.607 ...",0.424047,0.002876,0.001137,2.270659,0.001142,Colima,Armería,Ciudad de Armería,818,2,16360.0
3,0600100010151,0.42,"POLYGON ((2293388.338 773721.118, 2293407.275 ...",3.791038,0.025777,0.010156,20.258907,0.009859,Colima,Armería,Ciudad de Armería,1671,3,3978.571429
4,0600100010166,0.41,"POLYGON ((2294660.453 773180.454, 2294661.055 ...",3.549575,0.023447,0.009389,18.672414,0.009003,Colima,Armería,Ciudad de Armería,1463,4,3568.292683


In [191]:
df[["CVEGEO","$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]].head()
df[cols_to_normalize] = df[cols_to_normalize].round(3)
ex1 = df[["CVEGEO","$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]].head()

# save to excel
ex1.to_excel("data/06_colima/test.xlsx", index=False)
# # round to 2 decimals

ex1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,CVEGEO,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,GM_2020,densidad
0,0600100010081,2.546,0.017,0.007,13.623,0.007,3,4960.714
1,060010001019A,1.144,0.008,0.003,6.112,0.003,3,5926.667
2,0600100010359,0.424,0.003,0.001,2.271,0.001,2,16360.0
3,0600100010151,3.791,0.026,0.01,20.259,0.01,3,3978.571
4,0600100010166,3.55,0.023,0.009,18.672,0.009,4,3568.293


In [192]:
# use minmax scaler to normalize data between 0 and 1
scaler = MinMaxScaler()
#df[cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
df[["CVEGEO","$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]].head()
df[cols_to_normalize] = df[cols_to_normalize].round(3)
ex2 = df[["CVEGEO","$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]].head()
ex2.to_excel("data/06_colima/test2.xlsx", index=False)
ex2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,CVEGEO,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,GM_2020,densidad
0,0600100010081,0.36,0.364,0.389,0.365,0.412,0.333,0.241
1,060010001019A,0.151,0.159,0.167,0.153,0.176,0.333,0.296
2,0600100010359,0.044,0.045,0.056,0.044,0.059,0.0,0.894
3,0600100010151,0.546,0.568,0.556,0.553,0.588,0.333,0.184
4,0600100010166,0.51,0.5,0.5,0.508,0.529,0.667,0.161


In [193]:
df["metrica"] = df[["$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]].sum(axis=1)
df[["CVEGEO","$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad", "metrica"]].head()
df[cols_to_normalize] = df[cols_to_normalize].round(3)

ex2 = df[["CVEGEO","$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad", "metrica"]].head()
ex2.to_excel("data/06_colima/test3.xlsx", index=False)
ex2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,CVEGEO,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,GM_2020,densidad,metrica
0,0600100010081,0.36,0.364,0.389,0.365,0.412,0.333,0.241,2.464
1,060010001019A,0.151,0.159,0.167,0.153,0.176,0.333,0.296,1.435
2,0600100010359,0.044,0.045,0.056,0.044,0.059,0.0,0.894,1.142
3,0600100010151,0.546,0.568,0.556,0.553,0.588,0.333,0.184,3.328
4,0600100010166,0.51,0.5,0.5,0.508,0.529,0.667,0.161,3.375


In [194]:
df_mun = pd.DataFrame()

for mun in colima_metricas.NOM_MUN.unique():
  scaler = MinMaxScaler()
  df = colima_metricas[colima_metricas["NOM_MUN"] == mun].copy()
  
  # normalize data by max min scaler
  cols_to_normalize = ["$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
  
  df["metrica"] = df[["$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]].sum(axis=1)
  df_mun = pd.concat([df_mun, df.sort_values(by="metrica", ascending=False).head(5)])


df_mun = gpd.GeoDataFrame(df_mun)

  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])


In [195]:
df_loc = pd.DataFrame()

for mun in colima_metricas.NOM_LOC.unique():
  scaler = MinMaxScaler()
  df = colima_metricas[colima_metricas["NOM_LOC"] == mun].copy()
  
  # normalize data by max min scaler
  cols_to_normalize = ["$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])

  df["metrica"] = df[["$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]].sum(axis=1)
  df_loc = pd.concat([df_loc, df.sort_values(by="metrica", ascending=False).head(5)])
  
df_loc = gpd.GeoDataFrame(df_loc)

  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
  df.loc[:, cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])


In [202]:
cols_to_normalize

['$CO$', '$HCHO$', '$NO_2$', '$O_3$', '$SO_2$', 'GM_2020', 'densidad']

In [201]:
final = df_mun[["CVEGEO","NOM_MUN","NOM_LOC","$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad", "metrica"]].head()
final[cols_to_normalize] = final[cols_to_normalize].round(3)
final.to_excel("data/06_colima/test4.xlsx", index=False)

In [197]:
df_mun

Unnamed: 0,CVEGEO,area,geometry,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,NOM_ENT,NOM_MUN,NOM_LOC,POB_TOTAL,GM_2020,densidad,metrica
8,060010001026A,0.67,"POLYGON ((2294375.304 773528.093, 2294457.218 ...",0.869752,0.89065,0.88467,0.87291,0.897586,Colima,Armería,Ciudad de Armería,510,1.0,0.0,5.415568
7,0600100010255,0.75,"POLYGON ((2293245.282 771844.136, 2293189.417 ...",1.0,1.0,1.0,1.0,1.0,Colima,Armería,Ciudad de Armería,768,0.333333,0.01507,5.348404
13,0600100070240,0.52,"POLYGON ((2294422.660 776477.447, 2294441.730 ...",0.686505,0.701042,0.717691,0.704684,0.675471,Colima,Armería,Cofradía de Juárez,1212,0.666667,0.090005,4.242065
20,0600100350325,0.48,"POLYGON ((2297387.169 785871.370, 2297381.704 ...",0.608884,0.608165,0.626817,0.612862,0.600746,Colima,Armería,Rincón de López,1433,0.666667,0.127544,3.851686
4,0600100010166,0.41,"POLYGON ((2294660.453 773180.454, 2294661.055 ...",0.509635,0.514443,0.513976,0.50783,0.525731,Colima,Armería,Ciudad de Armería,1463,0.666667,0.160969,3.39925
42,0600200010483,1.82,"POLYGON ((2319196.848 804654.646, 2319197.508 ...",1.0,1.0,1.0,1.0,1.0,Colima,Colima,Colima,742,0.5,0.023066,5.523066
39,0600200010464,1.21,"POLYGON ((2320207.018 803190.824, 2320268.911 ...",0.653495,0.6773,0.646226,0.663636,0.700817,Colima,Colima,Colima,90,0.5,0.0,3.841474
24,0600200011320,1.11,"POLYGON ((2315751.665 805258.386, 2315730.950 ...",0.610972,0.613015,0.610502,0.604411,0.547246,Colima,Colima,Colima,1520,0.25,0.089615,3.325761
100,0600200010021,0.88,"POLYGON ((2321869.250 806149.642, 2321862.311 ...",0.473514,0.477652,0.494071,0.476165,0.515735,Colima,Colima,Colima,1637,0.25,0.123583,2.810719
23,0600200011316,0.9,"POLYGON ((2315743.919 806145.039, 2315816.271 ...",0.505104,0.505054,0.511014,0.501588,0.426649,Colima,Colima,Colima,1110,0.25,0.080201,2.77961


In [198]:
df_loc[["CVEGEO","$CO$","$HCHO$","$NO_2$","$O_3$","$SO_2$","GM_2020", "densidad"]].head()

Unnamed: 0,CVEGEO,$CO$,$HCHO$,$NO_2$,$O_3$,$SO_2$,GM_2020,densidad
8,060010001026A,0.869752,0.89065,0.88467,0.87291,0.897586,1.0,0.0
7,0600100010255,1.0,1.0,1.0,1.0,1.0,0.333333,0.01507
4,0600100010166,0.509635,0.514443,0.513976,0.50783,0.525731,0.666667,0.160969
3,0600100010151,0.545624,0.56756,0.557554,0.552654,0.577758,0.333333,0.184495
16,0600100010109,0.453978,0.452059,0.453631,0.453213,0.463375,0.333333,0.243997


In [199]:
df_loc.NOM_LOC.unique()

array(['Ciudad de Armería', 'Cofradía de Juárez', 'Rincón de López',
       'Colima', 'Comala', 'Suchitlán', 'Coquimatlán',
       'Pueblo Juárez (La Magdalena)', 'El Trapiche', 'Cuauhtémoc',
       'Quesería', 'Ixtlahuacán', 'Manzanillo', 'El Colomo', 'Jalipa',
       'Minatitlán', 'Tecomán', 'Cerro de Ortega', 'Colonia Bayardo',
       'Madrid', 'Ciudad de Villa de Álvarez'], dtype=object)