In [1]:
import os
import geopandas as gpd
import pandas as pd
import numpy as np

In [2]:
### Célula para conectar com Google Drive
from google.colab import drive
drive.mount('/content/drive')

if not os.getcwd().endswith('Censo IBGE 2022/Compatibilização'):
    os.chdir('/content/drive/Shareddrives/SIG LabCidade/projetos/Censo IBGE 2022/Compatibilização')

Mounted at /content/drive


In [None]:
nome_compat = '2010-2022-RMSP-RMBS-AUJ'
nome_A = '2010'
nome_B = '2022'

## Importar geometria de distritos

In [3]:
# Importar perímetros compatíveis
gdf_dist = gpd.read_file('../../../dados/IBGE 2022 Agregados preliminares por setores censitários/georreferenciados/SP_Malha_Preliminar_2022_c_tributos.gpkg')
gdf_dist = gdf_dist[['CD_DIST', 'NM_DIST', 'CD_MESO', 'NM_MESO', 'geometry']]
gdf_dist = gdf_dist.dissolve(by=['CD_DIST', 'NM_DIST', 'CD_MESO', 'NM_MESO'])
gdf_dist = gdf_dist.to_crs("EPSG:31983")

## Importar dados IBGE

Dados comuns entre censos:

| Código | Variável | Localização em A | Localização em B |
|--------|----------|------------------|------------------|
| pop_tot | População total | Domicilio02_SP_V001 | v0001 |
| pop_dom_po | População em domicílios particulares | Domicilio02_SP_V002 (apenas permanentes) | v0005b = v0005 * v0007 |
| dom_tot | Domicílios totais | Domicilio01_SP_V001 | v0002 |
| dom_po | Domicílios particulares ocupados | Domicilio01_SP_V002 + DomicilioRenda_SP_V001 | v0007 |

In [None]:
# Dados malha A
loc_variaveis = [
    {
    'Basico_SP1': ['Cod_distrito'],
    'Basico_SP2': ['Cod_distrito'],},
    {
    'Domicilio01_SP1': ['V001', 'V002'],
    'Domicilio01_SP2': ['V001', 'V002'],},
     {
    'Domicilio02_SP1': ['V001', 'V002'],
    'Domicilio02_SP2': ['V001', 'V002'],},
     {
    'DomicilioRenda_SP1': ['V001'],
    'DomicilioRenda_SP2': ['V001'],}
]

data_A = pd.read_csv('2010-2022-RMSP-RMBS-AUJ/matriz_compat_2010.csv', sep='\t', dtype={'CD_GEOCODI':str, 'CD_PERIMETRO':str})
data_A = data_A.dropna(subset='CD_GEOCODI')
data_A = data_A[['CD_GEOCODI']]

for group in loc_variaveis:
    df_group = pd.DataFrame()
    for file, variaveis in group.items():
        df_temp = pd.read_csv(f'../../../dados/IBGE 2010 Agregados por setores censitários/{file}.csv',
                            sep=';',
                            usecols=['Cod_setor']+variaveis,
                            dtype={'Cod_setor':str,
                                   'Cod_distrito':str},
                            encoding='latin-1')
        for v in variaveis:
            if v.startswith('V'):
                df_temp[v] = df_temp[v].replace('X',0).astype(float)
            else:
                df_temp[v] = df_temp[v].replace('X',0).astype(str)
        df_temp = df_temp.rename(columns={k:f'{file[:-1]}_{k}' for k in variaveis})
        df_temp = df_temp.rename(columns={'Cod_setor':'CD_GEOCODI'})
        df_group = pd.concat([df_group, df_temp])
    data_A = data_A.merge(df_group, on='CD_GEOCODI', how='left')

data_A = data_A.rename(columns={'Basico_SP_Cod_distrito':'CD_DIST'})

In [None]:
# Dados malha B
data_B = pd.read_csv('../../../dados/IBGE 2022 Agregados preliminares por setores censitários/Agregados_preliminares_por_setores_censitarios_SP.csv',
            sep=';',
            dtype={'CD_SETOR':str,
                   'CD_DIST':str,
                   'CD_MESO':str})
data_B = data_B[['CD_DIST', 'NM_DIST', 'CD_MESO', 'NM_MESO', 'v0001', 'v0002', 'v0003', 'v0004', 'v0005', 'v0006', 'v0007']]
data_B = data_B.rename(columns={'CD_SETOR':'CD_GEOCODI'})

## Reagregação

In [4]:
data = gpd.read_file('2000-2010-2022-RMSP-Dados/PICs_RMSP_001022_agg.gpkg')
data['CD_DIST'] = data['CD_PERIMETRO'].apply(lambda x: x[:11])

In [5]:
data

Unnamed: 0,CD_PERIMETRO,DOM_COL_2022,DOM_PAR_2022,DOM_PI_2000,DOM_PI_2010,DOM_PO_2000,DOM_PO_2010,DOM_PO_2022,DOM_PO_IMPUT_2022,DOM_PP_2000,...,AREA_URB_HA_2000,AREA_URB_HA_2010,AREA_URB_HA_2022,GAR_00A10,GAR_10A22,TT_UNID_00A10,TT_UNID_10A22,AGSN_2010,geometry,CD_DIST
0,350390100001,3.0,119.0,0.0,0.0,134.0,109.0,95.0,11.0,134.0,...,18.219,18.219,18.219,,,,,False,"POLYGON ((365187.154 7411875.347, 365167.154 7...",35039010000
1,350390100002,1.0,174.0,1.0,0.0,150.0,151.0,163.0,22.0,149.0,...,8.689,8.689,8.689,,,,,False,"POLYGON ((365164.286 7412334.372, 365118.286 7...",35039010000
2,350390100003,0.0,292.0,1.0,0.0,132.0,175.0,250.0,26.0,131.0,...,12.029,12.031,12.031,,,,,False,"POLYGON ((365610.151 7412311.349, 365594.640 7...",35039010000
3,350390100004,0.0,273.0,0.0,0.0,170.0,238.0,241.0,11.0,170.0,...,13.800,13.800,13.800,,,,,False,"POLYGON ((365551.152 7412059.348, 365563.152 7...",35039010000
4,350390100005,0.0,535.0,0.0,0.0,279.0,416.0,485.0,116.0,279.0,...,12.347,17.189,18.049,,,,,False,"POLYGON ((365236.033 7413154.763, 365243.513 7...",35039010000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22721,355650300031,0.0,366.0,0.0,0.0,299.0,322.0,324.0,8.0,299.0,...,,,,,,,,False,"POLYGON ((312910.158 7432835.148, 312848.386 7...",35565030003
22722,355650300032,0.0,187.0,0.0,0.0,128.0,175.0,172.0,5.0,128.0,...,,,,,,,,False,"POLYGON ((313348.714 7433507.594, 313364.521 7...",35565030003
22723,355650300033,0.0,279.0,1.0,0.0,196.0,260.0,225.0,11.0,195.0,...,,,,,,,,False,"POLYGON ((313710.698 7433263.356, 313661.726 7...",35565030003
22724,355650300034,,,,,,,,,,...,,,,,,,,True,"POLYGON ((311199.831 7433997.862, 311192.785 7...",35565030003


## Renomeações

In [None]:
new_names = {'2022_v0001': 'pop_tot_2022',
             '2022_v0002': 'dom_tot_2022',
             '2022_v0003': 'dom_par_2022',
             '2022_v0004': 'dom_col_2022',
             '2022_v0005b': 'pop_dom_po_2022',
             '2022_v0006b': 'dom_po_imput_2022',
             '2022_v0007': 'dom_po_2022',
             '2022_v0005': 'med_pop_dom_po_2022',
             '2022_v0006': 'pct_dom_po_imput_2022',
             '2010_Domicilio01_SP_V001': 'dom_tot_2010',
             '2010_Domicilio01_SP_V002': 'dom_pp_2010',
             '2010_Domicilio02_SP_V001': 'pop_tot_2010',
             '2010_Domicilio02_SP_V002': 'pop_dom_pp_2010',
             '2010_DomicilioRenda_SP_V001': 'dom_pi_2010',
             '2010_dom_po': 'dom_po_2010'}

agg = agg.rename(columns=new_names)
agg = agg[sorted(agg.columns)]

## Cálculos de variação

In [None]:
gdf_agg = gdf_dist.merge(agg, on='CD_DIST', how='left')[[i for i in agg.columns]+['geometry']]

# Variação de população
gdf_agg['var_pop_tot'] = (gdf_agg['pop_tot_2022']-gdf_agg['pop_tot_2010'])

# Variação de domicílios
gdf_agg['var_dom_tot'] = (gdf_agg['dom_tot_2022']-gdf_agg['dom_tot_2010'])

# Variação de domicílios
gdf_agg['var_dom_po'] = (gdf_agg['dom_po_2022']-gdf_agg['dom_po_2010'])

# Variação de densidade populacional
gdf_agg['var_dens_pop_tot_ha'] = gdf_agg['var_pop_tot']/(gdf_agg['geometry'].area/10000)

# Variação de densidade domiciliar (total)
gdf_agg['var_dens_dom_tot_ha'] = gdf_agg['var_dom_tot']/(gdf_agg['geometry'].area/10000)

# Variação de densidade domiciliar (ocupados)
gdf_agg['var_dens_dom_po_ha'] = gdf_agg['var_dom_po']/(gdf_agg['geometry'].area/10000)

# Variação da média de moradores por domicílio
gdf_agg['var_med_pop_dom'] = gdf_agg['med_pop_dom_po_2022']-(gdf_agg['pop_dom_pp_2010']/gdf_agg['dom_pp_2010'])

## Exportação

In [None]:
agg.to_csv(f'{nome_compat}/agg_distritos_{nome_A}-{nome_B}.csv', sep='\t', index=False, decimal=',')

In [None]:
gdf_agg.to_file(f'{nome_compat}/distritos_agg.gpkg',
                      layer=f'{nome_A}-{nome_B}',
                      driver='GPKG')