In [1]:
import sys
import json
import pickle
import pandas as pd
import googlemaps as gmaps
from tqdm import tqdm
from pathlib import Path
from datatable import dt, f, by
from datetime import datetime
from utils import get_path, data_load
sys.path.insert(0, '../')
from secret import API_KEY

# Descrição

Get the travel distance and time for a matrix of origins and destinations.

[Documentação](https://developers.google.com/maps/documentation/distance-matrix/start#maps_http_distancematrix_start-py)

[Repositório API Python](https://github.com/googlemaps/google-maps-services-python)

# Fontes de dados

## Municípios

In [2]:
path_munic = get_path('LOCALIDADES', 'municipios.csv.gzip')
df_munic = dt.fread(path_munic)
df_munic

Unnamed: 0_level_0,cod_municipio,nome_municipio,capital,uf,cod_uf,nome_uf,regiao,latitude,longitude
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,520005,Abadia de Goiás,0,GO,52,Goiás,Centro-Oeste,−16.7573,−49.4412
1,310010,Abadia dos Dourados,0,MG,31,Minas Gerais,Sudeste,−18.4831,−47.3916
2,520010,Abadiânia,0,GO,52,Goiás,Centro-Oeste,−16.197,−48.7057
3,310020,Abaeté,0,MG,31,Minas Gerais,Sudeste,−19.1551,−45.4444
4,150010,Abaetetuba,0,PA,15,Pará,Norte,−1.72183,−48.8788
5,230010,Abaiara,0,CE,23,Ceará,Nordeste,−7.34588,−39.0416
6,290010,Abaíra,0,BA,29,Bahia,Nordeste,−13.2488,−41.6619
7,290020,Abaré,0,BA,29,Bahia,Nordeste,−8.72073,−39.1162
8,410010,Abatiá,0,PR,41,Paraná,Sul,−23.3049,−50.3133
9,420005,Abdon Batista,0,SC,42,Santa Catarina,Sul,−27.6126,−51.0233


## SIH

### Colunas

In [3]:
cols_sih = {
  'cod_municipio': 'origem',
  'hosp_cod_municipio': 'destino',
}

### Carregando tabela

In [4]:
path_sih = get_path('SIH', 'sih.jay')
df_sih = dt.fread(path_sih, columns=cols_sih)
df_sih.names = cols_sih
df_sih = df_sih[:, list(cols_sih.values())]
df_sih = df_sih[f.origem != f.destino, :]
df_sih['count'] = 1
df_sih = df_sih[:, dt.sum(f.count), by('origem', 'destino')]
df_sih = df_sih.sort(-f.count)
df_sih

Unnamed: 0_level_0,origem,destino,count
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,510840,510340,22807
1,315460,310620,17147
2,260790,261160,16012
3,521250,530010,15461
4,280480,280030,15090
5,330350,330285,14875
6,230370,230440,14545
7,320500,320530,14543
8,150080,150140,14521
9,432300,431490,14273


### Lat & Lon

In [5]:
def get_latlon_table_by_ref(df_munic, ref):
  cols_latlon = ['cod_municipio', 'latitude', 'longitude']
  df_latlon = df_munic[:, cols_latlon]
  df_latlon.names = {
    'cod_municipio': ref,
    'latitude': f'{ref}_latitude',
    'longitude': f'{ref}_longitude',
  }
  df_latlon.key = ref
  return df_latlon

In [6]:
df_latlon_orig = get_latlon_table_by_ref(df_munic, 'origem')
df_latlon_dest = get_latlon_table_by_ref(df_munic, 'destino')
df_sih = df_sih[:, :, dt.join(df_latlon_orig)]
df_sih = df_sih[:, :, dt.join(df_latlon_dest)]
df_sih

Unnamed: 0_level_0,origem,destino,count,origem_latitude,origem_longitude,destino_latitude,destino_longitude
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,510840,510340,22807,−15.6458,−56.1322,−15.601,−56.0974
1,315460,310620,17147,−19.7621,−44.0844,−19.9102,−43.9266
2,260790,261160,16012,−8.11298,−35.015,−8.04666,−34.8771
3,521250,530010,15461,−16.253,−47.95,−15.7795,−47.9297
4,280480,280030,15090,−10.8468,−37.1231,−10.9091,−37.0677
5,330350,330285,14875,−22.7556,−43.4603,−22.8028,−43.4601
6,230370,230440,14545,−3.72797,−38.6619,−3.71664,−38.5423
7,320500,320530,14543,−20.121,−40.3074,−20.3155,−40.3128
8,150080,150140,14521,−1.36391,−48.3743,−1.4554,−48.4898
9,432300,431490,14273,−30.0819,−51.0194,−30.0318,−51.2065


### Parâmetros

In [7]:
def params_in_cols(df):
  dfs = list()
  for mode in ['driving', 'transit']: # driving | transit | bicycling | walking
    for hour in [6, 12, 18]: # [6, 12, 18] | [7, 13, 19]
      for traffic_model in ['best_guess', 'optimistic', 'pessimistic']: # best_guess | optimistic | pessimistic
        if mode != 'driving' and traffic_model != 'best_guess':
          continue
        df_ = df.copy()
        df_['mode'] = mode
        df_['traffic_model'] = traffic_model
        df_['hour'] = hour
        dfs.append(df_)
  df_param = pd.concat(dfs)
  df_param = df_param.sort_values(by=[
    'origem', 'destino', 'mode', 'traffic_model', 'hour'])
  df_param = df_param.reset_index(drop=True)
  return df_param

In [8]:
df_param = params_in_cols(df_sih.to_pandas())
df_param

Unnamed: 0,origem,destino,count,origem_latitude,origem_longitude,destino_latitude,destino_longitude,mode,traffic_model,hour
0,110001,110002,2,-11.9283,-61.9953,-9.90571,-63.0325,driving,best_guess,6
1,110001,110002,2,-11.9283,-61.9953,-9.90571,-63.0325,driving,best_guess,12
2,110001,110002,2,-11.9283,-61.9953,-9.90571,-63.0325,driving,best_guess,18
3,110001,110002,2,-11.9283,-61.9953,-9.90571,-63.0325,driving,optimistic,6
4,110001,110002,2,-11.9283,-61.9953,-9.90571,-63.0325,driving,optimistic,12
...,...,...,...,...,...,...,...,...,...,...
822871,530010,522200,2,-15.7795,-47.9297,-16.74050,-48.5159,driving,pessimistic,12
822872,530010,522200,2,-15.7795,-47.9297,-16.74050,-48.5159,driving,pessimistic,18
822873,530010,522200,2,-15.7795,-47.9297,-16.74050,-48.5159,transit,best_guess,6
822874,530010,522200,2,-15.7795,-47.9297,-16.74050,-48.5159,transit,best_guess,12


# Modelo de Coleta

In [9]:
def get_backup_path(data):
  hour = data['hour']
  fname = '-'.join([
    data['mode'],
    data['traffic_model'],
    f'{hour}h',
    str(data['origem']),
    str(data['destino'])
  ])
  return get_path('LOCALIDADES', f'gmaps/{fname}.json')

In [10]:
def get_travel_info(row):
  departure_time = datetime(year=2022, month=11, day=7, hour=row['hour'])
  info = row.copy()
  info['departure_time'] = str(departure_time)
  res = gmaps.distance_matrix(
    origins = [(row['origem_latitude'], row['origem_longitude'])],
    destinations = [(row['destino_latitude'], row['destino_longitude'])],
    mode = row['mode'],
    traffic_model = row['traffic_model'],
    departure_time = departure_time,
    language = 'pt-BR',
    units = 'metric',)
  info['origin_addresses'] = res['origin_addresses'][0]
  info['destination_addresses'] = res['destination_addresses'][0]
  elem = res['rows'][0]['elements'][0]
  info['status'] = elem['status']
  for col in list(elem):
    if col == 'status':
      continue
    for i_type in ['text', 'value']:
      col_name = f'{col} ({i_type})'
      if elem['status'] == 'OK':
        info[col_name] = elem[col][i_type]
      else:
        info[col_name] = ''
  return info

In [11]:
def request_travel_infos(df):
  for row in tqdm(df.to_dict(orient='records')):
    fpath = get_backup_path(row)
    if Path(fpath).is_file():
      continue
    info = get_travel_info(row)
    with open(fpath, 'w') as fp:
      json.dump(info, fp)

# Filtros

## Intra RJ

In [12]:
origem_rj = df_param['origem'].astype(str).str.startswith('33')
destino_rj = df_param['destino'].astype(str).str.startswith('33')
df_route_rj = df_param[origem_rj & destino_rj]
df_route_rj

Unnamed: 0,origem,destino,count,origem_latitude,origem_longitude,destino_latitude,destino_longitude,mode,traffic_model,hour
497964,330010,330023,1,-23.0011,-44.3196,-22.7528,-41.8846,driving,best_guess,6
497965,330010,330023,1,-23.0011,-44.3196,-22.7528,-41.8846,driving,best_guess,12
497966,330010,330023,1,-23.0011,-44.3196,-22.7528,-41.8846,driving,best_guess,18
497967,330010,330023,1,-23.0011,-44.3196,-22.7528,-41.8846,driving,optimistic,6
497968,330010,330023,1,-23.0011,-44.3196,-22.7528,-41.8846,driving,optimistic,12
...,...,...,...,...,...,...,...,...,...,...
519643,330630,330620,1,-22.5202,-44.0996,-22.4059,-43.6686,driving,pessimistic,12
519644,330630,330620,1,-22.5202,-44.0996,-22.4059,-43.6686,driving,pessimistic,18
519645,330630,330620,1,-22.5202,-44.0996,-22.4059,-43.6686,transit,best_guess,6
519646,330630,330620,1,-22.5202,-44.0996,-22.4059,-43.6686,transit,best_guess,12


# Coletando dados

## Seleção parâmetros

In [None]:
df_select = df_route_rj[
  (
    df_route_rj['mode'] == 'driving' # driving | transit | bicycling | walking
  ) & (
    df_route_rj['traffic_model'] == 'best_guess' # best_guess | optimistic | pessimistic
  ) & (
    df_route_rj['hour'] == 18 # [6, 12, 18] | [7, 13, 19]
  )
].sort_values(by='count', ascending=False)
df_select

Unnamed: 0,origem,destino,count,origem_latitude,origem_longitude,destino_latitude,destino_longitude,mode,traffic_model,hour
508934,330350,330285,14875,-22.7556,-43.4603,-22.8028,-43.4601,driving,best_guess,18
499910,330045,330285,11761,-22.7640,-43.3992,-22.8028,-43.4601,driving,best_guess,18
508802,330350,330045,11658,-22.7556,-43.4603,-22.7640,-43.3992,driving,best_guess,18
515858,330490,330330,7848,-22.8268,-43.0634,-22.8832,-43.1034,driving,best_guess,18
500042,330045,330510,7263,-22.7640,-43.3992,-22.8058,-43.3729,driving,best_guess,18
...,...,...,...,...,...,...,...,...,...,...
507530,330310,330090,1,-21.0390,-41.9697,-21.5691,-41.9187,driving,best_guess,18
507542,330310,330170,1,-21.0390,-41.9697,-22.7858,-43.3049,driving,best_guess,18
507554,330310,330205,1,-21.0390,-41.9697,-21.4296,-41.7014,driving,best_guess,18
507590,330310,330490,1,-21.0390,-41.9697,-22.8268,-43.0634,driving,best_guess,18


## Consulta API

In [14]:
gmaps = gmaps.Client(key=API_KEY)

In [15]:
request_travel_infos(df_select)

100%|██████████| 1316/1316 [04:29<00:00,  4.89it/s]


RJ driving best_guess 6h

100%|██████████| 1316/1316 [04:07<00:00,  5.32it/s]

RJ driving best_guess 12h

100%|██████████| 1316/1316 [04:28<00:00,  4.91it/s]

RJ driving best_guess 18h

100%|██████████| 1316/1316 [04:29<00:00,  4.89it/s]