In [1]:
import os
import geopandas as gpd
import pandas as pd
import numpy as np

from shapely import Polygon, MultiPolygon

In [2]:
### Célula para conectar com Google Drive
from google.colab import drive
drive.mount('/content/drive')

if not os.getcwd().endswith('Censo IBGE 2022/Compatibilização'):
    os.chdir('/content/drive/Shareddrives/SIG LabCidade/projetos/Censo IBGE 2022/Compatibilização')

Mounted at /content/drive


In [12]:
nome_compat = 'Distritos_RMSP_Dados'

if not os.path.isdir(nome_compat):
    os.mkdir(nome_compat)

## Importação de dados

In [13]:
malha1 = gpd.read_file('2010-2022-RMSP-Dados/PICs_RMSP_1022_agg.gpkg')
malha2 = gpd.read_file('2000-2010-2022-RMSP-Dados/PICs_RMSP_001022_agg.gpkg')

In [14]:
malha1['CD_DIST'] = malha1['CD_PERIMETRO'].apply(lambda x: x[:11])
malha2['CD_DIST'] = malha2['CD_PERIMETRO'].apply(lambda x: x[:11])

In [15]:
def removeHoles(geom, area_min=1):
    if isinstance(geom, Polygon):
        geom = MultiPolygon([geom])
    out_polys = []
    for part in geom.geoms:
        interiors = []
        for i in part.interiors:
            p = Polygon(i)
            if p.area > area_min:
                interiors.append(i)
        out_polys.append(Polygon(part.exterior.coords, holes=interiors))
    return MultiPolygon(out_polys) if len(out_polys)>1 else out_polys[0]

agg_dist = malha2.dissolve(by='CD_DIST')
agg_dist['geometry'] = agg_dist['geometry'].apply(removeHoles)
agg_dist = agg_dist.reset_index()[['CD_DIST','geometry']]

In [18]:
aggcols_malha1 = {
    'EXPANSAO_HA_COND_HOR': 'sum',
    'EXPANSAO_HA_CONJ_COND_VERT': 'sum',
    'EXPANSAO_HA_FAV_OCUP_LOT': 'sum',
    'EXPANSAO_HA_IND_GALP': 'sum',
    'EXPANSAO_HA_HOR': 'sum',
    'EXPANSAO_HA_OUTRO': 'sum',
    'EXPANSAO_HA_RODOVIA': 'sum',
    'EXPANSAO_HA_VERT': 'sum',
    }

aggcols_malha2 = {
    'DOM_COL_2022': 'sum',
    'DOM_PAR_2022': 'sum',
    'DOM_PI_2000': 'sum',
    'DOM_PI_2010': 'sum',
    'DOM_PO_2000': 'sum',
    'DOM_PO_2010': 'sum',
    'DOM_PO_2022': 'sum',
    'DOM_PO_IMPUT_2022': 'sum',
    'DOM_PP_2000': 'sum',
    'DOM_PP_2010': 'sum',
    'DOM_TOT_2000': 'sum',
    'DOM_TOT_2010': 'sum',
    'DOM_TOT_2022': 'sum',
    'POP_DOM_PO_2000': 'sum',
    'POP_DOM_PO_2022': 'sum',
    'POP_DOM_PP_2000': 'sum',
    'POP_DOM_PP_2010': 'sum',
    'POP_TOT_2000': 'sum',
    'POP_TOT_2010': 'sum',
    'POP_TOT_2022': 'sum',
    'AREA_URB_HA_2000': 'sum',
    'AREA_URB_HA_2010': 'sum',
    'AREA_URB_HA_2022': 'sum',
    'AU_EMP_00A10': 'sum',
    'AU_EMP_10A22': 'sum',
    'GAR_00A10': 'sum',
    'GAR_10A22': 'sum',
    'TT_UNID_00A10': 'sum',
    'TT_UNID_10A22': 'sum',
}

In [19]:
agg_malha = malha1.pivot_table(index='CD_DIST', aggfunc=aggcols_malha1).reset_index()
agg_dist = agg_dist.merge(agg_malha, on='CD_DIST', how='left')

agg_malha = malha2.pivot_table(index='CD_DIST', aggfunc=aggcols_malha2).reset_index()
agg_dist = agg_dist.merge(agg_malha, on='CD_DIST', how='left')

In [20]:
agg_dist = agg_dist.rename(columns={k:f'{k}_2010a2022' for k in agg_dist.columns if k.startswith('EXPANSAO_HA')})

In [21]:
agg_dist['AREA_HA'] = round(agg_dist['geometry'].area/10000, 3)

In [22]:
# Densidade populacional
for year in ['2000', '2010', '2022']:
    agg_dist[f'DENS_POP_TOT_HA_{year}'] = agg_dist[f'POP_TOT_{year}']/(agg_dist['geometry'].area/10000)

    # Densidade domiciliar (total)
    agg_dist[f'DENS_DOM_TOT_HA_{year}'] = agg_dist[f'DOM_TOT_{year}']/(agg_dist['geometry'].area/10000)

    # Densidade domiciliar (ocupados)
    agg_dist[f'DENS_DOM_PO_HA_{year}'] = agg_dist[f'DOM_PO_{year}']/(agg_dist['geometry'].area/10000)

    # Média de moradores por domicílio
    if year != '2022':
        agg_dist[f'MED_POP_DOM_PP_{year}'] = agg_dist[f'POP_DOM_PP_{year}']/agg_dist[f'DOM_PP_{year}']
    else:
        agg_dist[f'MED_POP_DOM_PO_{year}'] = agg_dist[f'POP_DOM_PO_{year}']/agg_dist[f'DOM_PO_{year}']

In [23]:
agg_dist.to_file(f'{nome_compat}/DISTR_001022_RMSP.gpkg', driver='GPKG')