<br>

# Macroregiões

In [1]:
import pandas as pd
from open_geodata import geo

from sp_mpsp_divadmin.paths import input_path_tab, output_path_tabs

<br>

## Read Data

### Nomes dos Municípios

Lê o arquivo csv com o nome dos municípios

In [2]:
df_mun = geo.load_dataset('tab.sp.tab_municipio_nome')

df_mun.info()
df_mun.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 645 entries, 0 to 644
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id_municipio    645 non-null    int64 
 1   municipio_nome  645 non-null    object
dtypes: int64(1), object(1)
memory usage: 10.2+ KB


Unnamed: 0,id_municipio,municipio_nome
0,3500105,Adamantina
1,3500204,Adolfo
2,3500303,Aguaí
3,3500402,Águas da Prata
4,3500501,Águas de Lindóia


<br>

## IDs vinculando Municípios com Áreas Regionais

In [3]:
# Dataframe
df_ar = pd.read_csv(
    input_path_tab / 'tab_ids_municipio.csv',
    usecols=['id_municipio', 'id_ar'],
)

# Results
df_ar.info()
df_ar.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 645 entries, 0 to 644
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   id_municipio  645 non-null    int64
 1   id_ar         645 non-null    int64
dtypes: int64(2)
memory usage: 10.2 KB


Unnamed: 0,id_municipio,id_ar
0,3500105,10
1,3500204,13
2,3500303,3
3,3500402,3
4,3500501,3


In [4]:
# Dataframe
df_ar_macro = pd.read_csv(
    input_path_tab / 'tab_ids_macrorregioes.csv',
)

# Results
df_ar_macro.info()
df_ar_macro.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   id_ar            16 non-null     int64
 1   id_macrorregiao  16 non-null     int64
dtypes: int64(2)
memory usage: 384.0 bytes


Unnamed: 0,id_ar,id_macrorregiao
0,1,4
1,2,4
2,3,2
3,4,1
4,5,5


In [5]:
# Dataframe GAEMAs
df_macro = pd.read_csv(input_path_tab / 'tab_macrorregioes.csv')
df_macro = df_macro.add_prefix('macrorregiao_')

# Results
df_macro.info()
df_macro.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   macrorregiao_id    5 non-null      int64 
 1   macrorregiao_nome  5 non-null      object
dtypes: int64(1), object(1)
memory usage: 208.0+ bytes


Unnamed: 0,macrorregiao_id,macrorregiao_nome
0,1,São Paulo
1,2,Campinas
2,3,Piracicaba
3,4,Bauru
4,5,Ribeirão Preto


<br>

## Município - AR

In [6]:
# Merge Municípios
df = pd.merge(
    df_ar,
    df_mun,
    how='left',
    left_on='id_municipio',
    right_on='id_municipio',
)

# Results
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 645 entries, 0 to 644
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id_municipio    645 non-null    int64 
 1   id_ar           645 non-null    int64 
 2   municipio_nome  645 non-null    object
dtypes: int64(2), object(1)
memory usage: 15.2+ KB


Unnamed: 0,id_municipio,id_ar,municipio_nome
0,3500105,10,Adamantina
1,3500204,13,Adolfo
2,3500303,3,Aguaí
3,3500402,3,Águas da Prata
4,3500501,3,Águas de Lindóia


<br>

## IDs vinculando Áreas Regionais com Macroregiões

In [7]:
# Merge Municípios
df = pd.merge(
    df,
    df_ar_macro,
    how='left',
    left_on='id_ar',
    right_on='id_ar',
)

# Results
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 645 entries, 0 to 644
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id_municipio     645 non-null    int64 
 1   id_ar            645 non-null    int64 
 2   municipio_nome   645 non-null    object
 3   id_macrorregiao  645 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 20.3+ KB


Unnamed: 0,id_municipio,id_ar,municipio_nome,id_macrorregiao
0,3500105,10,Adamantina,4
1,3500204,13,Adolfo,5
2,3500303,3,Aguaí,2
3,3500402,3,Águas da Prata,2
4,3500501,3,Águas de Lindóia,2


<br>

## Município Macroregiões

In [8]:
# Merge GAEMAs
df = pd.merge(
    df,
    df_macro,
    how='left',
    left_on='id_macrorregiao',
    right_on='macrorregiao_id',
)

# Results
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 645 entries, 0 to 644
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   id_municipio       645 non-null    int64 
 1   id_ar              645 non-null    int64 
 2   municipio_nome     645 non-null    object
 3   id_macrorregiao    645 non-null    int64 
 4   macrorregiao_id    645 non-null    int64 
 5   macrorregiao_nome  645 non-null    object
dtypes: int64(4), object(2)
memory usage: 30.4+ KB


Unnamed: 0,id_municipio,id_ar,municipio_nome,id_macrorregiao,macrorregiao_id,macrorregiao_nome
0,3500105,10,Adamantina,4,4,Bauru
1,3500204,13,Adolfo,5,5,Ribeirão Preto
2,3500303,3,Aguaí,2,2,Campinas
3,3500402,3,Águas da Prata,2,2,Campinas
4,3500501,3,Águas de Lindóia,2,2,Campinas


In [9]:
# Drop
df.drop(['id_ar', 'macrorregiao_id'], axis=1, inplace=True, errors='ignore')

# Results
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 645 entries, 0 to 644
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   id_municipio       645 non-null    int64 
 1   municipio_nome     645 non-null    object
 2   id_macrorregiao    645 non-null    int64 
 3   macrorregiao_nome  645 non-null    object
dtypes: int64(2), object(2)
memory usage: 20.3+ KB


Unnamed: 0,id_municipio,municipio_nome,id_macrorregiao,macrorregiao_nome
0,3500105,Adamantina,4,Bauru
1,3500204,Adolfo,5,Ribeirão Preto
2,3500303,Aguaí,2,Campinas
3,3500402,Águas da Prata,2,Campinas
4,3500501,Águas de Lindóia,2,Campinas


In [10]:
# Save
df.to_csv(
    output_path_tabs / 'tab_municipio_macrorregioes.csv',
    index=False,
)

# Results
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 645 entries, 0 to 644
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   id_municipio       645 non-null    int64 
 1   municipio_nome     645 non-null    object
 2   id_macrorregiao    645 non-null    int64 
 3   macrorregiao_nome  645 non-null    object
dtypes: int64(2), object(2)
memory usage: 20.3+ KB


Unnamed: 0,id_municipio,municipio_nome,id_macrorregiao,macrorregiao_nome
0,3500105,Adamantina,4,Bauru
1,3500204,Adolfo,5,Ribeirão Preto
2,3500303,Aguaí,2,Campinas
3,3500402,Águas da Prata,2,Campinas
4,3500501,Águas de Lindóia,2,Campinas
