# Calculo de probabilidade de Fluxo

Autora: Mariama Oliveira
 
Objetivo: Calcular a probabilidade de deslocamento de uma cidade para outra e qtd de passageiros.

In [77]:
import pandas as pd

In [78]:
#Lendo arquivos de fluxo

df_aereo = pd.read_csv("../data/integrado/fluxo_transporte/aereos_final.csv")
df_rodoviario = pd.read_csv("../data/integrado/fluxo_transporte/rodoviarios_final.csv")

In [79]:
#Removendo viagens com cidades fora do Brasil
df_rodoviario[['mun_origem','estado_origem']] = df_rodoviario.origem.str.split("/",expand=True)
df_rodoviario[['mun_destino','estado_destino']] = df_rodoviario.destino.str.split("/",expand=True)
df_rodoviario = df_rodoviario[(df_rodoviario["estado_origem"].str.len()==2) & (df_rodoviario["estado_destino"].str.len()==2) ]

In [80]:
df_rodoviario = df_rodoviario[(df_rodoviario["cod_mun_destino"].notna()) & (df_rodoviario["cod_mun_origem"].notna())]


In [81]:
#Consertando formatos de entrada
df_aereo.cod_mun_origem = df_aereo.cod_mun_origem.astype(int)
df_aereo.cod_mun_destino = df_aereo.cod_mun_destino.astype(int)

df_rodoviario.cod_mun_origem = df_rodoviario.cod_mun_origem.astype(int)
df_rodoviario.cod_mun_destino = df_rodoviario.cod_mun_destino.astype(int)

In [82]:
df_aereo

Unnamed: 0,Origem,Destino,Mes,passageiros,cod_mun_origem,cod_mun_destino,Ano
0,AGUA BOA/MT,CONFRESA/MT,12,2.0,5100201,5103353,2019
1,AGUA BOA/MT,SAO FELIX DO ARAGUAIA/MT,3,6.0,5100201,5107859,2019
2,AGUA BOA/MT,SAO FELIX DO ARAGUAIA/MT,4,0.0,5100201,5107859,2019
3,AGUA BOA/MT,SAO FELIX DO ARAGUAIA/MT,5,5.0,5100201,5107859,2019
4,AGUA BOA/MT,SAO FELIX DO ARAGUAIA/MT,6,16.0,5100201,5107859,2019
...,...,...,...,...,...,...,...
11803,VITORIA/ES,UNA/BA,3,0.0,3205309,2932507,2019
11804,VITORIA/ES,VITORIA/ES,1,0.0,3205309,3205309,2019
11805,VITORIA/ES,VITORIA/ES,4,0.0,3205309,3205309,2019
11806,VITORIA/ES,VITORIA/ES,5,0.0,3205309,3205309,2019


In [83]:
df_rodoviario

Unnamed: 0,origem,destino,MesViagem,QuantidaDeBilhetes,cod_mun_origem,cod_mun_destino,mun_origem,estado_origem,mun_destino,estado_destino
0,ABADIA DOS DOURADOS/MG,CAMPINAS/SP,05/2019,1,3100104,3509502,ABADIA DOS DOURADOS,MG,CAMPINAS,SP
1,ABADIA DOS DOURADOS/MG,SAO PAULO/SP,07/2019,1,3100104,3550308,ABADIA DOS DOURADOS,MG,SAO PAULO,SP
2,ABADIANIA/GO,ALEXANIA/GO,01/2019,65,5200100,5200308,ABADIANIA,GO,ALEXANIA,GO
3,ABADIANIA/GO,ALEXANIA/GO,02/2019,41,5200100,5200308,ABADIANIA,GO,ALEXANIA,GO
4,ABADIANIA/GO,ALEXANIA/GO,03/2019,67,5200100,5200308,ABADIANIA,GO,ALEXANIA,GO
...,...,...,...,...,...,...,...,...,...,...
275534,ZORTEA/SC,BELO HORIZONTE/MG,11/2019,1,4219853,3106200,ZORTEA,SC,BELO HORIZONTE,MG
275535,ZORTEA/SC,BELO HORIZONTE/MG,12/2019,22,4219853,3106200,ZORTEA,SC,BELO HORIZONTE,MG
275536,ZORTEA/SC,FLORIANOPOLIS/SC,01/2020,21,4219853,4205407,ZORTEA,SC,FLORIANOPOLIS,SC
275537,ZORTEA/SC,FLORIANOPOLIS/SC,11/2019,39,4219853,4205407,ZORTEA,SC,FLORIANOPOLIS,SC


In [84]:
#Quantifica passagem por ano inteiro 
df_total_aereo = pd.DataFrame({"passageiros_aereo": df_aereo.groupby(['cod_mun_origem','cod_mun_destino'])['passageiros'].sum()}).reset_index()

df_total_rodov = pd.DataFrame({"passageiros_rodov": df_rodoviario.groupby(['cod_mun_origem','cod_mun_destino'])['QuantidaDeBilhetes'].sum()}).reset_index()

In [85]:
df_total_rodov

Unnamed: 0,cod_mun_origem,cod_mun_destino,passageiros_rodov
0,1100023,1100304,7
1,1100023,1200401,31
2,1100023,3119401,3
3,1100023,3127701,152
4,1100023,3131307,212
...,...,...,...
36913,5300108,5221197,5
36914,5300108,5221601,25
36915,5300108,5222005,291
36916,5300108,5222302,15


In [86]:
df_total_aereo

Unnamed: 0,cod_mun_origem,cod_mun_destino,passageiros_aereo
0,1100049,5002704,107.0
1,1100049,5108402,30864.0
2,1100122,3509502,87.0
3,1100122,5002704,165.0
4,1100122,5108402,34977.0
...,...,...,...
1730,5300108,4314902,298553.0
1731,5300108,5002704,88331.0
1732,5300108,5108402,221055.0
1733,5300108,5208707,124936.0


In [87]:
#Juntando aereo e rodoviario no mesmo DF
df_total = df_total_rodov.merge(
    df_total_aereo,
    how='outer', 
    left_on=["cod_mun_origem","cod_mun_destino"], 
    right_on=["cod_mun_origem","cod_mun_destino"]
    )

df_total['passageiros_rodov'] = df_total['passageiros_rodov'].fillna(0)
df_total['passageiros_aereo'] = df_total['passageiros_aereo'].fillna(0)
df_total['passageiros_total'] = df_total['passageiros_rodov'] + df_total['passageiros_aereo']
df_total

Unnamed: 0,cod_mun_origem,cod_mun_destino,passageiros_rodov,passageiros_aereo,passageiros_total
0,1100023,1100304,7.0,0.0,7.0
1,1100023,1200401,31.0,0.0,31.0
2,1100023,3119401,3.0,0.0,3.0
3,1100023,3127701,152.0,0.0,152.0
4,1100023,3131307,212.0,0.0,212.0
...,...,...,...,...,...
38112,5300108,3518800,0.0,694548.0,694548.0
38113,5300108,3548906,0.0,0.0,0.0
38114,5300108,3549904,0.0,0.0,0.0
38115,5300108,4125506,0.0,205731.0,205731.0


## Calculando a probabilidade do fluxo

In [88]:
#DF como fluxos totais
df_fluxo_total = df_total.groupby("cod_mun_origem")[["passageiros_rodov", "passageiros_aereo", "passageiros_total"]].sum().reset_index()

In [89]:
df_fluxo_total

Unnamed: 0,cod_mun_origem,passageiros_rodov,passageiros_aereo,passageiros_total
0,1100023,5302.0,0.0,5302.0
1,1100049,3221.0,30971.0,34192.0
2,1100114,1325.0,0.0,1325.0
3,1100122,5554.0,35229.0,40783.0
4,1100155,1252.0,0.0,1252.0
...,...,...,...,...
1894,5221403,23669.0,0.0,23669.0
1895,5221601,1084.0,0.0,1084.0
1896,5221858,8214.0,0.0,8214.0
1897,5222005,12.0,0.0,12.0


In [90]:
#Funcao que calcula a probabilidade do fluxo de cada cidade
def calculo_probabilidade(row, df_fluxo_total):
    DIAS_ANO = 365
    fluxo_rodov = row["passageiros_rodov"] 
    fluxo_aereo = row["passageiros_aereo"] 
    fluxo_total = row["passageiros_total"] 
    cod_cidade_origem = row["cod_mun_origem"]
    cod_cidade_destino = row["cod_mun_destino"]

    #Filtrar fluxo de saida da cidade
    fluxo_cidade = df_fluxo_total[df_fluxo_total["cod_mun_origem"]==cod_cidade_origem].to_dict(orient='records')[0]
    fluxo_cidade_total = fluxo_cidade["passageiros_total"]
    fluxo_cidade_aereo = fluxo_cidade["passageiros_aereo"]
    fluxo_cidade_rodov = fluxo_cidade["passageiros_rodov"]

    prob_total = 0 if fluxo_cidade_total == 0 else fluxo_total / fluxo_cidade_total
    prob_aereo = 0 if fluxo_cidade_aereo==0 else fluxo_aereo / fluxo_cidade_aereo
    prob_rodov = 0 if fluxo_cidade_rodov == 0 else fluxo_rodov / fluxo_cidade_rodov

    return pd.Series([prob_rodov, prob_aereo, prob_total], index =['prob_rodov', 'prob_aereo', 'prob_total'])

In [91]:

df_prob = df_total.apply(lambda row: calculo_probabilidade(row,df_fluxo_total),axis=1)
df_final = pd.concat([df_total, df_prob], axis=1)

In [92]:
df_prob

Unnamed: 0,prob_rodov,prob_aereo,prob_total
0,0.001320,0.000000,0.001320
1,0.005847,0.000000,0.005847
2,0.000566,0.000000,0.000566
3,0.028668,0.000000,0.028668
4,0.039985,0.000000,0.039985
...,...,...,...
38112,0.000000,0.085709,0.079242
38113,0.000000,0.000000,0.000000
38114,0.000000,0.000000,0.000000
38115,0.000000,0.025388,0.023472


In [99]:
df_final.to_csv("../data/calculado/calculo_qtd_fluxo.csv", index=False)