# Intergração dados transporte IBGE e DNIT

Bases utilizadas:
- DNIT: https://servicos.dnit.gov.br/dadospnct/PesquisaOD/BaseDeDados
- IBGE: https://www.ibge.gov.br/geociencias/organizacao-do-territorio/redes-e-fluxos-geograficos/15794-rodoviarias-e-hidroviarias.html

## Importandos bibliotecas

In [1]:
import pandas as pd
from functools import reduce

## Carregando bases

Dados do IBGE: 
- VAR05 - Frequência de saídas de veículos hidroviários no par de ligação
- VAR06 - Frequência de saídas de veículos rodoviários no par de ligação
- VAR07 - Frequência total de saídas de veículos no par de ligação

In [2]:
#Carregando dados do IBGE
df_ibge = pd.read_excel(open('data/Base_de_dados_ligacoes_rodoviarias_e_hidroviarias_2016.xlsx', 'rb'), sheet_name='Base de dados', 
                               dtype = {'ID': int, 'CODMUNDV_A': int, 'CODMUNDV_B': int}) 
df_ibge.head()

Unnamed: 0,ID,COD_UF_A,UF_A,CODMUNDV_A,NOMEMUN_A,COD_UF_B,UF_B,CODMUNDV_B,NOMEMUN_B,VAR01,...,VAR05,VAR06,VAR07,VAR08,VAR09,VAR10,VAR11,VAR12,VAR13,VAR14
0,13026031304260,13,AM,1302603,Manaus,13,AM,1304260,Uarini,Núcleo de Metrópole (Manaus),...,7.5,0.0,7.5,-60.023335,-3.134691,-65.155346,-2.983609,0.0,Não,0.023148
1,13021081302603,13,AM,1302108,Japurá,13,AM,1302603,Manaus,Centro Local,...,3.0,0.0,3.0,-66.996903,-1.880845,-60.023335,-3.134691,3.0,Não,0.02662
2,51038095108402,51,MT,5103809,Figueirópolis D'Oeste,51,MT,5108402,Várzea Grande,Centro Local,...,0.0,42.0,42.0,-58.737825,-15.446135,-56.144664,-15.658065,0.0,Sim,0.028
3,13000291302603,13,AM,1300029,Alvarães,13,AM,1302603,Manaus,Centro Local,...,7.5,0.0,7.5,-64.813535,-3.216584,-60.023335,-3.134691,0.0,Não,0.03125
4,13000291301308,13,AM,1300029,Alvarães,13,AM,1301308,Codajás,Centro Local,...,6.0,0.0,6.0,-64.813535,-3.216584,-62.06343,-3.843046,0.0,Não,0.036232


In [3]:
# Carregando dados DNIT
nomes_planilhas = ["BD_1PNT2016", "BD_2PNT2016", "BD_1PNT2017", "BD_2PNT2017"] 
lista_df_dnit = []

for nome in nomes_planilhas:
    df_lido = pd.read_excel(open('data/PesquisaOD_2016_2017.xlsx', 'rb'), sheet_name=nome) 
    lista_df_dnit.append(df_lido)

## Plotando primeiro df da lista
lista_df_dnit[0].head()

Unnamed: 0,idPesquisa,idOD,idPosto,Sentido,DataIniPesq,DataFimPesq,POrigem,MOrigem,UFOrigem,NodeIDOrigem,...,MunDesembCarga,UFDesembCarga,LocalDesembCarga,IndoPegarCarga,MunParadaObr1,UFParadaObr1,MunParadaObr2,UFParadaObr2,considerar_parOD?,considerar_socio?
0,1,1,84,AB,02/07/2016 09:06:51,02/07/2016 09:14:43,Brasil,Seberi,RS,16646.0,...,,,,,,,,,1,1
1,1,2,84,AB,02/07/2016 09:15:19,02/07/2016 09:26:04,Brasil,São José do Inhacorá,RS,16575.0,...,,,,,,,,,1,1
2,1,3,84,AB,02/07/2016 09:26:29,02/07/2016 09:35:39,Brasil,Seberi,RS,16646.0,...,,,,,,,,,1,1
3,1,4,84,AB,02/07/2016 09:36:06,02/07/2016 09:50:54,Brasil,Palmitinho,RS,16603.0,...,,,,,,,,,1,1
4,1,5,84,AB,02/07/2016 09:51:04,02/07/2016 10:01:18,Brasil,Seberi,RS,16646.0,...,,,,,,,,,1,1


## Pré-processamento 

### IBGE

In [4]:
## Duplicando dados do IBGE para que as ligações tenham os 2 sentidos

df_ibge_1 = df_ibge[["CODMUNDV_A","CODMUNDV_B","VAR05","VAR06"]].copy(deep=True)
df_ibge_2 = df_ibge[["CODMUNDV_A","CODMUNDV_B","VAR05","VAR06"]].copy(deep=True)
df_ibge_2 = df_ibge_2.rename(columns={"CODMUNDV_B": "CODMUNDV_A", "CODMUNDV_A": "CODMUNDV_B"})
df_ibge_final = pd.concat([df_ibge_1, df_ibge_2])
df_ibge_final[df_ibge_final.duplicated()]

## Removendo valores 0 de frequência 
# -> Quando adiciona rodoviario e hidroviario não valores 0 em ambas colunas
df_ibge_final = df_ibge_final[(df_ibge_final["VAR05"]>0) | (df_ibge_final["VAR06"]>0)]
df_ibge_final.shape

## Renomeando colunas do df do IBGE e DNIT
#IBGE
df_ibge_final = df_ibge_final.rename(columns={"VAR05": "ibge_hidro", "VAR06": "ibge_rodov", "CODMUNDV_A": "origem", "CODMUNDV_B": "destino"})
df_ibge_final.head()


Unnamed: 0,origem,destino,ibge_hidro,ibge_rodov
0,1302603,1304260,7.5,0.0
1,1302108,1302603,3.0,0.0
2,5103809,5108402,0.0,42.0
3,1300029,1302603,7.5,0.0
4,1300029,1301308,6.0,0.0


### DNIT

In [5]:
def normalizar_semana_dnit(freq, tipoFreq, classe, qtd_pessoas):

    classe_normalizada = 1 if (classe == "O1" or classe == "O2" or classe == "O3") else .25
    freq = 11 if freq == ">10" else freq

    # Transforma todas frequencias para semana
    if(tipoFreq == "Ano"):
        freq_normalizada = 0.019165 * freq 
    elif(tipoFreq == "Eventualmente"):
        freq_normalizada =  0.019165 * freq 
    elif(tipoFreq == "Informação não registrada"):
        freq_normalizada = 0.019165 * 1 
    elif(tipoFreq == "Mês"):
        freq_normalizada = 0.230137 * freq 
    elif(tipoFreq == "Semana"):
        freq_normalizada = 1 * freq 
    elif(tipoFreq == "Dia"):
        freq_normalizada = 7 * freq 

    freq_normalizada_final = freq_normalizada * classe_normalizada
    freq_x_qtdPessoas = freq_normalizada * qtd_pessoas

    return freq_normalizada_final, freq_x_qtdPessoas



In [6]:
def processar_dnit(df):

    # Removendo campos nulos
    df_dnit = df[(df['GeocodOrigem'].notna()) & (df['GeocodDestino'].notna())]
    df_dnit = df_dnit.astype({"GeocodOrigem": int, "GeocodDestino": int})

    # Verificando valores nulos ou "Infomação não registrada" em QtdPessoasVeic
    df_dnit['QtdPessoasVeic'] = df_dnit['QtdPessoasVeic'].fillna(0)
    df_dnit['QtdPessoasVeic'] = df_dnit['QtdPessoasVeic'].replace("Informação não registrada", 0)

    # Normalizando para semana freq e qtd de pessoas
    df_dnit["freq_normalizada"], df_dnit["freq_x_qtdPessoas"]  = zip(*df_dnit.apply(lambda row: normalizar_semana_dnit(row["FreqQnt"],row["FreqPeriodo"],row["Classe"],row["QtdPessoasVeic"]), axis = 1))

    #  Filtrando observações por tipo de automóvel
    df_dnit_filtrado = df_dnit[(df_dnit['Classe']=="P2") 
             | (df_dnit['Classe']=="O1") 
             | (df_dnit['Classe']=="O2") 
             | (df_dnit['Classe']=="O3")] 

    # Fazendo a soma das frequências por ligação
    df_dnit_sum = pd.DataFrame({'freq_semanal' : df_dnit.groupby( [ "GeocodOrigem", "GeocodDestino" ] )["freq_normalizada"].sum(),
                                'dnit_total_pessoas' : df_dnit.groupby( [ "GeocodOrigem", "GeocodDestino" ] )["freq_x_qtdPessoas"].sum()
                                }).reset_index()
    df_dnit_filtrado_sum = pd.DataFrame({'freq_semanal_selecao' : df_dnit_filtrado.groupby( [ "GeocodOrigem", "GeocodDestino" ] )["freq_normalizada"].sum(),
                                         'dnit_total_pessoas_selecao' : df_dnit_filtrado.groupby( [ "GeocodOrigem", "GeocodDestino" ] )["freq_x_qtdPessoas"].sum()
                                         }).reset_index()
    
    #Removendo ligações que possuem mesma origem e destino
    df_dnit_sum = df_dnit_sum[df_dnit_sum["GeocodOrigem"]!=df_dnit_sum["GeocodDestino"]]
    df_dnit_filtrado_sum = df_dnit_filtrado_sum[df_dnit_filtrado_sum["GeocodOrigem"]!=df_dnit_filtrado_sum["GeocodDestino"]]
    
    #Merge original com filtrado
    df_dnit_final = df_dnit_sum.merge(df_dnit_filtrado_sum, left_on=[ "GeocodOrigem", "GeocodDestino" ], right_on = [ "GeocodOrigem", "GeocodDestino" ], how="outer")

    return df_dnit_final 

In [7]:
# Processando dados do DNIT
lista_df_dnit_proc = []
for df in lista_df_dnit:
    lista_df_dnit_proc.append(processar_dnit(df))

# Nome das tabelas 
lista_nome_tabela = ["2016_1","2016_2","2017_1","2017_2",]

#DNIT
for df_dnit, nome_tabela in zip(lista_df_dnit_proc, lista_nome_tabela):
    df_dnit.rename(columns={"freq_semanal": "dnit_" + nome_tabela, "freq_semanal_selecao": "dnit_sel_" + nome_tabela, 
                            "dnit_total_pessoas": "dnit_pessoas_" + nome_tabela, "dnit_total_pessoas_selecao": "dnit_pessoas_sel_" + nome_tabela,
                            "GeocodOrigem": "origem", "GeocodDestino": "destino"}, inplace=True)

In [8]:
lista_df_dnit_proc[0].shape

(42442, 6)

In [9]:
# lista_df_dnit_proc[3]
lista_df_dnit_proc[0][(lista_df_dnit_proc[0]["dnit_sel_2016_1"].notna())].head()

Unnamed: 0,origem,destino,dnit_2016_1,dnit_pessoas_2016_1,dnit_sel_2016_1,dnit_pessoas_sel_2016_1
75,1100049,4212502,0.004791,0.11499,0.004791,0.11499
80,1100049,5005400,0.019165,0.80493,0.019165,0.80493
91,1100049,5106752,4.527391,188.347881,4.460274,188.252056
95,1100049,5205000,0.460274,21.632878,0.460274,21.632878
142,1100098,3541505,0.019165,0.88159,0.019165,0.88159


### Merge diferentes pesquisas DNIT

In [10]:
df_dnit = reduce(lambda df1,df2: pd.merge(df1,df2,on=['origem','destino'], how='outer'), lista_df_dnit_proc)

In [11]:
df_dnit[df_dnit["dnit_sel_2016_1"].notna()].head()

Unnamed: 0,origem,destino,dnit_2016_1,dnit_pessoas_2016_1,dnit_sel_2016_1,dnit_pessoas_sel_2016_1,dnit_2016_2,dnit_pessoas_2016_2,dnit_sel_2016_2,dnit_pessoas_sel_2016_2,dnit_2017_1,dnit_pessoas_2017_1,dnit_sel_2017_1,dnit_pessoas_sel_2017_1,dnit_2017_2,dnit_pessoas_2017_2,dnit_sel_2017_2,dnit_pessoas_sel_2017_2
75,1100049,4212502,0.004791,0.11499,0.004791,0.11499,,,,,,,,,,,,
80,1100049,5005400,0.019165,0.80493,0.019165,0.80493,,,,,,,,,,,,
91,1100049,5106752,4.527391,188.347881,4.460274,188.252056,,,,,,,,,0.5,0.0,,
95,1100049,5205000,0.460274,21.632878,0.460274,21.632878,,,,,,,,,,,,
142,1100098,3541505,0.019165,0.88159,0.019165,0.88159,,,,,,,,,,,,


In [12]:
df_dnit.shape

(239151, 18)

## Realizando merge das bases

### Merge por tabela

In [13]:
#Realiza o merge entre os dataframes
def merge_dnit_ibge(df_dnit, df_ibge):
    df = df_ibge.merge(df_dnit, left_on=['origem','destino'], right_on = ['origem','destino'], how="outer")
    return df

In [14]:
lista_df_dnit_proc[3]

Unnamed: 0,origem,destino,dnit_2017_2,dnit_pessoas_2017_2,dnit_sel_2017_2,dnit_pessoas_sel_2017_2
0,1100015,1100049,5.732834,33.667918,,
1,1100015,1100122,0.902740,11.221918,,
2,1100015,1100254,1.754093,15.230137,,
3,1100015,1100288,0.250000,2.000000,,
4,1100015,1100304,0.812326,6.747906,,
...,...,...,...,...,...,...
74575,5300108,5221577,0.057534,0.460274,,
74576,5300108,5221601,0.134234,1.073868,,
74577,5300108,5221700,8.680789,94.166901,7.0,84.0
74578,5300108,5221809,0.014374,0.172485,,


In [15]:
lista_df_merge = []
for df_dnit in lista_df_dnit_proc:
    lista_df_merge.append(merge_dnit_ibge(df_dnit, df_ibge_final))

In [16]:
# Verificando merge
# lista_df_merge[2][lista_df_merge[2].dnit.isna()]
lista_df_merge[1]

Unnamed: 0,origem,destino,ibge_hidro,ibge_rodov,dnit_2016_2,dnit_pessoas_2016_2,dnit_sel_2016_2,dnit_pessoas_sel_2016_2
0,1302603,1304260,7.5,0.0,,,,
1,1302108,1302603,3.0,0.0,,,,
2,5103809,5108402,0.0,42.0,,,,
3,1300029,1302603,7.5,0.0,,,,
4,1300029,1301308,6.0,0.0,,,,
...,...,...,...,...,...,...,...,...
180467,5300108,5220405,,,0.004791,0.038330,,
180468,5300108,5220454,,,4.353399,74.717551,3.452055,69.0411
180469,5300108,5221197,,,0.177394,1.649289,,
180470,5300108,5221577,,,0.004791,0.038330,,


### Merge todas as tabelas

In [17]:
df_dnit = reduce(lambda left,right: pd.merge(left,right,on=['origem','destino'], how='outer'), lista_df_dnit_proc)

In [18]:
df_merge = df_ibge_final.merge(df_dnit, left_on=['origem','destino'], right_on = ['origem','destino'], how="outer")

In [19]:
df_merge

Unnamed: 0,origem,destino,ibge_hidro,ibge_rodov,dnit_2016_1,dnit_pessoas_2016_1,dnit_sel_2016_1,dnit_pessoas_sel_2016_1,dnit_2016_2,dnit_pessoas_2016_2,dnit_sel_2016_2,dnit_pessoas_sel_2016_2,dnit_2017_1,dnit_pessoas_2017_1,dnit_sel_2017_1,dnit_pessoas_sel_2017_1,dnit_2017_2,dnit_pessoas_2017_2,dnit_sel_2017_2,dnit_pessoas_sel_2017_2
0,1302603,1304260,7.5,0.0,,,,,,,,,,,,,,,,
1,1302108,1302603,3.0,0.0,,,,,,,,,,,,,,,,
2,5103809,5108402,0.0,42.0,0.874651,0.172485,,,,,,,,,,,,,,
3,1300029,1302603,7.5,0.0,,,,,,,,,,,,,,,,
4,1300029,1301308,6.0,0.0,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331111,5300108,5219258,,,,,,,,,,,,,,,0.177394,2.397195,,
331112,5300108,5219506,,,,,,,,,,,,,,,0.115069,0.920548,,
331113,5300108,5219902,,,,,,,,,,,,,,,0.557534,2.690411,,
331114,5300108,5220504,,,,,,,,,,,,,,,0.264374,1.191650,,


## Análise das tabelas geradas - Somente rodoviário

### Tabelas separadas

Todos os automóveis

In [21]:
for nome, df, nome_df in zip(nomes_planilhas, lista_df_merge, lista_nome_tabela):

    total = df.shape[0]
    
    ibge_total = df[(df.ibge_rodov.notna()) & (df["dnit_" + nome_df].isna())].shape[0] 
    ibge_values = round(ibge_total * 100/total,2) 

    dnit_total = df[(df.ibge_rodov.isna()) & (df["dnit_" + nome_df].notna())].shape[0]
    dnit_values = round(dnit_total * 100/total,2)

    merge_total = df[(df.ibge_rodov.notna()) & (df["dnit_" + nome_df].notna())].shape[0]
    merge_values = round(merge_total * 100/total,2)

    print(nome)
    print("Total de ligações: " + str(total) + " - 100%")
    print("Somente IBGE: " + str(ibge_total) + " - " + str(ibge_values) + "%")
    print("Somente DNIT: " + str(dnit_total) + " - " + str(dnit_values) + "%")
    print("IBGE + DNIT: " + str(merge_total) + " - " +  str(merge_values) + "%")
    print()

BD_1PNT2016
Total de ligações: 164923 - 100%
Somente IBGE: 122481 - 74.27%
Somente DNIT: 33647 - 20.4%
IBGE + DNIT: 8795 - 5.33%

BD_2PNT2016
Total de ligações: 180472 - 100%
Somente IBGE: 119705 - 66.33%
Somente DNIT: 49196 - 27.26%
IBGE + DNIT: 11571 - 6.41%

BD_1PNT2017
Total de ligações: 211370 - 100%
Somente IBGE: 111915 - 52.95%
Somente DNIT: 80094 - 37.89%
IBGE + DNIT: 19361 - 9.16%

BD_2PNT2017
Total de ligações: 191422 - 100%
Somente IBGE: 116842 - 61.04%
Somente DNIT: 60146 - 31.42%
IBGE + DNIT: 14434 - 7.54%



Somente automóveis das classe P2, O1, O2, O3

In [22]:
for nome, df, nome_df in zip(nomes_planilhas, lista_df_merge, lista_nome_tabela):

    #Removendo linhas que há somente dados do DNIT de outras classes
    df = df[(df.ibge_rodov.notna()) | (df["dnit_sel_" + nome_df].notna())]

    total = df.shape[0]
    
    ibge_total = df[(df.ibge_rodov.notna()) & (df["dnit_sel_" + nome_df].isna())].shape[0] 
    ibge_values = round(ibge_total * 100/total,2) 

    dnit_total = df[(df.ibge_rodov.isna()) & (df["dnit_sel_" + nome_df].notna())].shape[0]
    dnit_values = round(dnit_total * 100/total,2)

    merge_total = df[(df.ibge_rodov.notna()) & (df["dnit_sel_" + nome_df].notna())].shape[0]
    merge_values = round(merge_total * 100/total,2)

    print(nome)
    print("Total de ligações: " + str(total) + " - 100%")
    print("Somente IBGE: " + str(ibge_total) + " - " + str(ibge_values) + "%")
    print("Somente DNIT: " + str(dnit_total) + " - " + str(dnit_values) + "%")
    print("IBGE + DNIT: " + str(merge_total) + " - " +  str(merge_values) + "%")
    print()

BD_1PNT2016
Total de ligações: 132145 - 100%
Somente IBGE: 130065 - 98.43%
Somente DNIT: 869 - 0.66%
IBGE + DNIT: 1211 - 0.92%

BD_2PNT2016
Total de ligações: 132945 - 100%
Somente IBGE: 129302 - 97.26%
Somente DNIT: 1669 - 1.26%
IBGE + DNIT: 1974 - 1.48%

BD_1PNT2017
Total de ligações: 133766 - 100%
Somente IBGE: 128046 - 95.72%
Somente DNIT: 2490 - 1.86%
IBGE + DNIT: 3230 - 2.41%

BD_2PNT2017
Total de ligações: 133272 - 100%
Somente IBGE: 129026 - 96.81%
Somente DNIT: 1996 - 1.5%
IBGE + DNIT: 2250 - 1.69%



### Tabela única

In [23]:
df_merge.head()

Unnamed: 0,origem,destino,ibge_hidro,ibge_rodov,dnit_2016_1,dnit_pessoas_2016_1,dnit_sel_2016_1,dnit_pessoas_sel_2016_1,dnit_2016_2,dnit_pessoas_2016_2,dnit_sel_2016_2,dnit_pessoas_sel_2016_2,dnit_2017_1,dnit_pessoas_2017_1,dnit_sel_2017_1,dnit_pessoas_sel_2017_1,dnit_2017_2,dnit_pessoas_2017_2,dnit_sel_2017_2,dnit_pessoas_sel_2017_2
0,1302603,1304260,7.5,0.0,,,,,,,,,,,,,,,,
1,1302108,1302603,3.0,0.0,,,,,,,,,,,,,,,,
2,5103809,5108402,0.0,42.0,0.874651,0.172485,,,,,,,,,,,,,,
3,1300029,1302603,7.5,0.0,,,,,,,,,,,,,,,,
4,1300029,1301308,6.0,0.0,,,,,,,,,,,,,,,,


In [24]:
total = df_merge.shape[0]
dnit_notna = (df_merge.dnit_2016_1.notna()) | (df_merge.dnit_2016_2.notna()) | (df_merge.dnit_2017_1.notna()) | (df_merge.dnit_2017_2.notna())
dnit_isna = (df_merge.dnit_2016_1.isna()) & (df_merge.dnit_2016_2.isna()) & (df_merge.dnit_2017_1.isna()) & (df_merge.dnit_2017_2.isna())
dnit_notna_todos = (df_merge.dnit_2016_1.notna()) & (df_merge.dnit_2016_2.notna()) & (df_merge.dnit_2017_1.notna()) & (df_merge.dnit_2017_2.notna())

total_ibge = df_merge[df_merge.ibge_rodov.notna()].shape[0]
porc_ibge = round(total_ibge*100/total, 2)
total_dnit = df_merge[dnit_notna].shape[0]
porc_dnit = round(total_dnit*100/total, 2)

total_only_ibge = df_merge[(df_merge.ibge_rodov.notna()) & (dnit_isna)].shape[0] #Somente há valores do IBGE 
porc_only_ibge = round( total_only_ibge * 100/total,2) 

total_only_dnit = df_merge[(df_merge.ibge_rodov.isna()) & (dnit_notna)].shape[0]  #Ao menos uma pesquisa do dnit
porc_only_dnit = round(total_only_dnit * 100/total,2)

total_merge = df_merge[(df_merge.ibge_rodov.notna()) & (dnit_notna)].shape[0] #Ao menos uma pesquisa do dnit + ibge
porc_merge = round(total_merge * 100/total,2) 

total_merge_2 = df_merge[(df_merge.ibge_rodov.notna()) & (dnit_notna_todos)].shape[0] #Todos as pesquisas do dnit
porc_merge_2 = round(df_merge[dnit_notna_todos].shape[0]* 100/total,2)

total_todos_dnit = df_merge[dnit_notna_todos].shape[0]#Todos do completo do dnit sem o IBGE
porc_todos_dnit = round(total_todos_dnit* 100/total,2)

print("Total de ligações: " + str(total) + " - 100%")
print()
print("Total IBGE: " + str(total_ibge) + " - " + str(porc_ibge) + "%")
print("Total DNIT: " + str(total_dnit) + " - " + str(porc_dnit) + "%")
print()
print("Somente IBGE: " + str(total_only_ibge) + " - " + str(porc_only_ibge) + "%")
print("Somente DNIT (ao menos 1 pesquisa): " + str(total_only_dnit) + " - " + str(porc_only_dnit) + "%")
print("IBGE + DNIT (ao menos 1 pesquisa): "+ str(total_merge) + " - " + str(porc_merge) + "%")
print()
print("IBGE + todos DNIT: " + str(total_merge_2) + " - " + str(porc_merge_2) + "%")
print("Todos DNIT (sem o IBGE): " + str(total_todos_dnit) + " - " + str(porc_todos_dnit) + "%")


Total de ligações: 331116 - 100%

Total IBGE: 131276 - 39.65%
Total DNIT: 239151 - 72.23%

Somente IBGE: 91965 - 27.77%
Somente DNIT (ao menos 1 pesquisa): 199840 - 60.35%
IBGE + DNIT (ao menos 1 pesquisa): 39311 - 11.87%

IBGE + todos DNIT: 182 - 0.09%
Todos DNIT (sem o IBGE): 311 - 0.09%


## Salvando CSV

In [None]:
df_merge.to_csv('ibge_dnit.csv',index=False)