In [1]:
import sys
import json
import pickle
import pandas as pd
import googlemaps as gmaps
from tqdm import tqdm
from datatable import dt, f
from datetime import datetime
from utils import get_path, data_load
sys.path.insert(0, '../')
from secret import API_KEY

# Descrição

Get the travel distance and time for a matrix of origins and destinations.

[Documentação](https://developers.google.com/maps/documentation/distance-matrix/start#maps_http_distancematrix_start-py)

[Repositório API Python](https://github.com/googlemaps/google-maps-services-python)

# Fontes de dados

## Procedimentos

In [2]:
path_proc = get_path('GESTANTES', 'procs_ano.csv.gzip') # procs_ano | procs_bienio
df_proc = pd.read_csv(path_proc)
df_proc

Unnamed: 0,ano,parto_normal,origem,destino,distancia (km),tempo (min),dist_pond,tempo_pond,criticidade,capitais,socioecon,origem_latitude,origem_longitude,destino_latitude,destino_longitude,count
0,2010,0,NT/RO/11005/110001,NT/RO/11005/110001/2679477,0.0000,0.000000,0.0000,0.000000,0,0_0,1_1,-11.9283,-61.9953,-11.92830,-61.9953,49
1,2010,0,NT/RO/11005/110001,NT/RO/11003/110012/2495279,130.7705,152.868333,261.5410,305.736667,2,0_0,1_2,-11.9283,-61.9953,-10.87770,-61.9322,2
2,2010,0,NT/RO/11005/110001,NT/RO/11001/110013/2808617,357.5110,427.840000,357.5110,427.840000,2,0_0,1_1,-11.9283,-61.9953,-9.44363,-61.9818,1
3,2010,0,NT/RO/11005/110001,NT/RO/11002/110018/2496534,114.0612,112.283333,114.0612,112.283333,2,0_0,1_3,-11.9283,-61.9953,-11.67200,-61.1980,1
4,2010,0,NT/RO/11005/110001,NT/RO/11005/110028/2495228,48.6099,50.140000,194.4396,200.560000,1,0_0,1_1,-11.9283,-61.9953,-11.72710,-61.7714,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461847,2019,1,CO/DF/53001/530010,CO/DF/53001/530010/0010545,0.0000,0.000000,0.0000,0.000000,0,1_1,5_5,-15.7795,-47.9297,-15.77950,-47.9297,479
461848,2019,1,CO/DF/53001/530010,CO/DF/53001/530010/2645157,0.0000,0.000000,0.0000,0.000000,0,1_1,5_5,-15.7795,-47.9297,-15.77950,-47.9297,1373
461849,2019,1,CO/DF/53001/530010,CO/DF/53001/530010/2650355,0.0000,0.000000,0.0000,0.000000,0,1_1,5_5,-15.7795,-47.9297,-15.77950,-47.9297,5
461850,2019,1,CO/DF/53001/530010,CO/DF/53001/530010/2672197,0.0000,0.000000,0.0000,0.000000,0,1_1,5_5,-15.7795,-47.9297,-15.77950,-47.9297,2483


# Gerando tabelas

## Rotas

In [3]:
cols_route = [c for c in df_proc.columns if 'origem' in c or 'destino' in c]
cols_route.append('count')
df_route = df_proc.loc[:, cols_route]
df_route['destino'] = df_route['destino'].apply(lambda x: x[:-8])
df_route = df_route[df_route['origem'] != df_route['destino']]
df_route = df_route.groupby(list(df_route.columns[:-1]), as_index=False).sum()
df_route = df_route.sort_values(by='count', ascending=False)
df_route = df_route.reset_index(drop=True)
df_route

Unnamed: 0,origem,destino,origem_latitude,origem_longitude,destino_latitude,destino_longitude,count
0,CO/MT/51002/510840,CO/MT/51002/510340,-15.64580,-56.1322,-15.60100,-56.0974,22807
1,SD/MG/31008/315460,SD/MG/31008/310620,-19.76210,-44.0844,-19.91020,-43.9266,17147
2,ND/PE/26010/260790,ND/PE/26010/261160,-8.11298,-35.0150,-8.04666,-34.8771,16012
3,CO/GO/52004/521250,CO/DF/53001/530010,-16.25300,-47.9500,-15.77950,-47.9297,15461
4,ND/SE/28006/280480,ND/SE/28001/280030,-10.84680,-37.1231,-10.90910,-37.0677,15090
...,...,...,...,...,...,...,...
68568,SD/ES/32004/320370,SD/MG/31041/313770,-20.46520,-41.4156,-20.15390,-41.6228,1
68569,ND/BA/29009/293270,ND/BA/29015/291760,-14.59630,-39.2851,-13.52480,-39.9640,1
68570,SD/ES/32004/320380,SD/ES/32004/320230,-20.95090,-41.3460,-20.76680,-41.6734,1
68571,SD/ES/32004/320380,SD/ES/32004/320480,-20.95090,-41.3460,-21.02740,-41.6636,1


### Intra RJ

In [4]:
origem_rj = df_route['origem'].str.contains('/RJ/')
destino_rj = df_route['destino'].str.contains('/RJ/')
df_route_rj = df_route[origem_rj & destino_rj]
df_route_rj

Unnamed: 0,origem,destino,origem_latitude,origem_longitude,destino_latitude,destino_longitude,count
5,SD/RJ/33005/330350,SD/RJ/33005/330285,-22.7556,-43.4603,-22.8028,-43.4601,14875
16,SD/RJ/33005/330045,SD/RJ/33005/330285,-22.7640,-43.3992,-22.8028,-43.4601,11761
17,SD/RJ/33005/330350,SD/RJ/33005/330045,-22.7556,-43.4603,-22.7640,-43.3992,11658
27,SD/RJ/33006/330490,SD/RJ/33006/330330,-22.8268,-43.0634,-22.8832,-43.1034,7848
29,SD/RJ/33005/330045,SD/RJ/33005/330510,-22.7640,-43.3992,-22.8058,-43.3729,7263
...,...,...,...,...,...,...,...
63432,SD/RJ/33002/330130,SD/RJ/33002/330020,-22.4812,-42.2066,-22.8697,-42.3326,1
63435,SD/RJ/33002/330130,SD/RJ/33006/330330,-22.4812,-42.2066,-22.8832,-43.1034,1
63439,SD/RJ/33002/330130,SD/RJ/33008/330415,-22.4812,-42.2066,-22.1031,-41.4693,1
63440,SD/RJ/33002/330130,SD/RJ/33009/330080,-22.4812,-42.2066,-22.4658,-42.6523,1


In [5]:
cols_code = ['origem', 'destino']
cols_latlon = list(df_route_rj.columns[2:-1])
rows_code = df_route_rj[cols_code].groupby(by=cols_code, as_index=False).sum().shape[0]
rows_latlon = df_route_rj[cols_latlon].groupby(by=cols_latlon, as_index=False).sum().shape[0]
rows_code == rows_latlon == df_route_rj.shape[0]

True

# Coletando dados

In [6]:
gmaps = gmaps.Client(key=API_KEY)

In [7]:
def get_matrix(origens, destinos):
  return gmaps.distance_matrix(
    origins = origens,
    destinations = destinos,
    mode = 'transit',
    language = 'pt-BR',
    units = 'metric',
    departure_time = datetime(year=2022, month=11, day=7, hour=6),
    traffic_model = 'best_guess',
  )

In [8]:
infos = list()
for i, row in tqdm(df_route_rj.iterrows(), total=df_route_rj.shape[0]):
  origin = [(row['origem_latitude'], row['origem_longitude'])]
  destin = [(row['destino_latitude'], row['destino_longitude'])]
  res = get_matrix(origin, destin)
  elem = res['rows'][0]['elements'][0]
  if elem['status'] == 'OK':
    d = dict()
    for info in ['distance', 'duration']:
      for i_type in ['text', 'value']:
        col_name = '_'.join([info, i_type])
        d[col_name] = elem[info][i_type]
    infos.append(d)
    # break
  else:
    infos.append(None)
len(infos)

100%|██████████| 1316/1316 [03:27<00:00,  6.33it/s]


1316

100%|██████████| 1398/1398 [03:25<00:00,  6.82it/s]

100%|██████████| 1316/1316 [03:27<00:00,  6.33it/s]

In [23]:
print(f'missing: {infos.count(None)} out of {len(infos)} ({(626/1316*100):.2f}%)')

missing: 626 out of 1316 (47.57%)


In [10]:
for i in infos:
  if i != None:
    print(i)
    break

{'distance_text': '10,3 km', 'distance_value': 10281, 'duration_text': '1 hora 16 minutos', 'duration_value': 4587}


# Exportar tabela

In [11]:
path_infos = get_path('GESTANTES', 'gmaps/infos_6am.pickle')
with open(path_infos, 'wb') as f:
    pickle.dump(infos, f, protocol=pickle.HIGHEST_PROTOCOL)

In [12]:
with open(path_infos, 'rb') as f:
    b = pickle.load(f)
print(infos == b)

True


In [14]:
path_miss = get_path('GESTANTES', 'gmaps/missing.xlsx')
df_route_rj[[i == None for i in infos]].to_excel(path_miss, index=False)