In [1]:
# carregar bibliotecas
source('fun/setup.R')
library('httr')
library('jsonlite')

# Aplicar funcoes em paralelo
library('future.apply')

GDAL version >= 3.1.0 | setting mapviewOptions(fgb = TRUE)

Linking to GEOS 3.9.1, GDAL 3.2.2, PROJ 8.0.0


Attaching package: ‘jsonlite’


The following object is masked from ‘package:purrr’:

    flatten


Loading required package: future



In [2]:
# Checando: Jupyter suporta multicore?
future::supportsMulticore()

In [3]:
# https://stackoverflow.com/questions/40536067/how-to-adjust-future-global-maxsize
# For 850MB: 850*1024^2 = 891289600
# For +1.3GB: 1500*1024^2 = 1572864000
# options(future.globals.maxSize = 891289600)

In [4]:
# Estrutura de pastas
pasta_dados        <- "../../yellow_dados"
dados_originais  <- sprintf("%s/00_dados_originais/IPEA", pasta_dados)
pasta_graphhopper  <- sprintf("%s/07_graphhopper", pasta_dados)
pasta_gh_ttmarix   <- sprintf("%s/03_ttmatrix", pasta_graphhopper)
# dir.create(pasta_gh_ttmarix, recursive = TRUE, showWarnings = FALSE)

In [5]:
# ------------------------------------------------------------------------------
# Agregar totais de população e oportunidades aos hexágonos
# ------------------------------------------------------------------------------

# Abrir hexágonos para SP à resolução 9, com distância de ~350m entre os vértices
hex_sp <- read_sf(sprintf("%s/aop_hex_grid_v2.gpkg", dados_originais))
hex_sp <- hex_sp %>% filter(abbrev_muni == 'spo') %>% select(-c(abbrev_muni, name_muni, code_muni))

# Tratar como dataframe e selecionar somente colunas de interesse
hex_sp <- st_centroid(hex_sp) %>% mutate(centroides = as.character(geom)) %>% st_drop_geometry()

# Oportunidades por hexágono
# https://ipeagit.github.io/aopdata/reference/read_landuse.html
open_file <- sprintf('%s/aop_landuse_2019_v2.csv', dados_originais)
dados_ipea <- read_delim(open_file, delim = ',', col_types = "cccccddddddddddddddddd")
dados_ipea <- dados_ipea %>% filter(abbrev_muni == 'spo') %>% select(-c(year, abbrev_muni, name_muni, code_muni))
# Deixar só totais de oportunidades
dados_ipea <- dados_ipea %>% mutate(oportunidades = T001 + E001 + M001 + S001 + C001) %>% select(id_hex, oportunidades)
# head(dados_ipea)

# População por hexágono
# https://ipeagit.github.io/aopdata/reference/read_landuse.html
open_file <- sprintf('%s/aop_population_2010_v2.csv', dados_originais)
dados_ipea_pop <- read_delim(open_file, delim = ',', col_types = "cccccddddddddddddddddd")
dados_ipea_pop <- dados_ipea_pop %>% filter(abbrev_muni == 'spo') %>% select(-c(year, abbrev_muni, name_muni, code_muni))
# Deixar neste momento só dados totais da população
dados_ipea_pop <- dados_ipea_pop %>% select(id_hex, populacao = P001)
# head(dados_ipea_pop)

# Juntar dados de oportunidades e população
hex_sp <- 
  hex_sp %>% 
  left_join(dados_ipea, by = 'id_hex') %>% 
  left_join(dados_ipea_pop, by = 'id_hex')

# Hexágonos sem oportunidade e sem população devem ser descartados
hex_sp <- hex_sp %>% filter(oportunidades > 0 & populacao > 0)

# Limpar ambiente
rm(dados_ipea, dados_ipea_pop)

head(hex_sp)

filter: removed 374,971 rows (96%), 15,059 rows remaining

select: dropped 3 variables (abbrev_muni, name_muni, code_muni)

“st_centroid assumes attributes are constant over geometries of x”
“st_centroid does not give correct centroids for longitude/latitude data”
mutate: new variable 'centroides' (character) with 15,059 unique values and 0% NA

filter: removed 374,971 rows (96%), 15,059 rows remaining

select: dropped 4 variables (year, abbrev_muni, name_muni, code_muni)

mutate: new variable 'oportunidades' (double) with 2,051 unique values and 0% NA

select: dropped 17 variables (T001, T002, T003, T004, E001, …)

filter: removed 374,971 rows (96%), 15,059 rows remaining

select: dropped 4 variables (year, abbrev_muni, name_muni, code_muni)

select: renamed one variable (populacao) and dropped 16 variables

left_join: added one column (oportunidades)

           > rows only in x        0

           > rows only in y  (     0)

           > matched rows     15,059


           > rows 

id_hex,centroides,oportunidades,populacao
<chr>,<chr>,<dbl>,<dbl>
89a8100d9d7ffff,"c(-46.6243269389247, -23.5027719122434)",4696,338
89a8100986fffff,"c(-46.4551023034772, -23.5780741727837)",22,488
89a8103986fffff,"c(-46.7502131069765, -23.4139123856488)",45,2403
89a81015653ffff,"c(-46.690778409091, -23.7834099811074)",114,692
89a81039653ffff,"c(-46.6818936790765, -23.4832021869071)",52,1611
89a8100f437ffff,"c(-46.5971911117835, -23.6207040761765)",927,2807


In [6]:
# ------------------------------------------------------------------------------
# Gerar latlong para as origens e destinos (centroides dos hexágonos)
# ------------------------------------------------------------------------------

# Separar coluna de centroides em latlon
hex_sp <-
  hex_sp %>%
  separate(centroides, '[c\\(\\), )]', into = c('x', 'y', 'lon', 'z', 'lat', 'u')) %>%
  select(id_hex, lat, lon)

# hex_sp %>% filter(is.na(lat) | is.na(lon))

# Abrir hexágonos para SP combinados com vizinhos
hex_com_vizinhos <- sprintf("%s/01_hex_spo_res09_23vizinhos.csv", pasta_gh_ttmarix)
hex_com_vizinhos <- read_delim(hex_com_vizinhos, delim = ';', col_types = cols(.default = "c"))

# Juntar hexágonos de origem e destino às cordenadas latlong de seus centroides
hex_com_vizinhos <-
  hex_com_vizinhos %>%
  left_join(hex_sp, by = c('id_hex_x' = 'id_hex')) %>%
  left_join(hex_sp, by = c('id_hex_y' = 'id_hex'))

# Remover hexágonos vizinhos que estão fora do shape de São Paulo
hex_com_vizinhos <- hex_com_vizinhos %>% filter(!is.na(lat.y) & !is.na(lon.y))
hex_com_vizinhos <- hex_com_vizinhos %>% filter(!is.na(lat.x) & !is.na(lon.x))
# 8859962 / 1267988 = 7 vezes as queries com resolução 8

# Limpar ambiente
rm(hex_sp)

select: dropped 6 variables (x, y, z, u, oportunidades, …)

left_join: added 2 columns (lat, lon)

           > rows only in x   11,371,752

           > rows only in y  (         0)

           > matched rows     13,565,952


           > rows total       24,937,704

left_join: added 4 columns (lat.x, lon.x, lat.y, lon.y)

           > rows only in x   13,737,942

           > rows only in y  (         0)

           > matched rows     11,199,762


           > rows total       24,937,704

filter: removed 13,737,942 rows (55%), 11,199,762 rows remaining

filter: removed 2,339,800 rows (21%), 8,859,962 rows remaining



In [7]:
# ------------------------------------------------------------------------------
# Routing a partir de dois pontos
# ------------------------------------------------------------------------------

# Faz query de routing no GraphHopper e retorna resultados principais em dataframe
gh_route <- function(url, n) {
  # url <- 'http://localhost:8989/route/?point=-23.5314933121698%2C-46.634354542765&point=-23.5390199310058%2C-46.6376369484305&profile=bike&instructions=false&calc_points=true&details=average_speed'
  
  # Fazer a GET de roteamento no Grahphopper
  # print(url)
  gh_response <- GET(url)
  
  # Encurtar url para guardar no dataframe de resultado
  url <- 
    url %>% 
    str_replace('http:\\/\\/localhost:8989\\/route\\/\\?point=', '') %>% 
    str_replace('&point=', ';') %>%
    str_replace_all('%2C', ',') %>% 
    str_replace('&profile=bike&instructions=false&calc_points=true&details=average_speed', '')
  
  # Mensagem tem que ser "Success: (200) OK"
  if (http_status(gh_response)$message == 'Success: (200) OK') {
    
    # Resposta da query, já colapsada e transformada em dataframe
    # Remover aviso de 'No encoding supplied: defaulting to UTF-8' na linha fromJSON()
    suppressMessages(
      response_text <- 
        # Ignorar aviso 'argument is not an atomic vector; coercing'
        suppressWarnings(str_c(gh_response, collapse = ", ")) %>% 
        # Concatenar toda a string de resultados
        str_c("[", ., "]") %>% 
        # Transformar em dataframe
        fromJSON() %>% 
        as.data.frame()
    )
    
    # Nos interessa a coluna de 'paths', como um novo dataframe
    paths <- response_text$paths %>% as.data.frame()
    
    # Isolar colunas de interesse
    paths <- 
      paths %>% 
      # Calcular tempo em segundos e velocidade média
      mutate(time = time / 1000,
             speed = distance / time * 3.6) %>% 
      # Descartar colunas extras - a coluna poly é o shape da rota traçada
      select(distance, weight, time, speed, poly = points) %>% 
      mutate(url = url)
    
  } else {
    
    # Se a query no GraphHopper não deu resultados, guardar como dataframe vazio
    paths <- data.frame(distance = NA,
                        weight   = NA,
                        time     = NA,
                        speed    = NA,
                        poly     = NA,
                        url      = url
    )
    
  }
  
  # Guardar resultados temporários
  out_file <- sprintf('%s/02_ttmatrix_res09_%s.csv', pasta_gh_ttmarix, n)
  write_delim(paths, out_file, delim = ';', append = TRUE)
  
}

In [8]:
# Criar coluna com URL para GET no GraphHopper
hex_com_vizinhos <- 
  hex_com_vizinhos %>% 
  mutate(url = paste('http://localhost:8989/route/?point=', 
                     lat.x, '%2C', lon.x, '&point=', 
                     lat.y, '%2C', lon.y, '&profile=bike&instructions=false&calc_points=true&details=average_speed',
                     sep = ''))

head(hex_com_vizinhos, 2)

mutate: new variable 'url' (character) with 8,859,962 unique values and 0% NA



id_hex_x,id_hex_y,lat.x,lon.x,lat.y,lon.y,url
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
89a8100d9d7ffff,89a8100d9d3ffff,-23.5027719122434,-46.6243269389247,-23.5044472587788,-46.6213157795618,http://localhost:8989/route/?point=-23.5027719122434%2C-46.6243269389247&point=-23.5044472587788%2C-46.6213157795618&profile=bike&instructions=false&calc_points=true&details=average_speed
89a8100d9d7ffff,89a8100d9c3ffff,-23.5027719122434,-46.6243269389247,-23.5013787666731,-46.6212254402001,http://localhost:8989/route/?point=-23.5027719122434%2C-46.6243269389247&point=-23.5013787666731%2C-46.6212254402001&profile=bike&instructions=false&calc_points=true&details=average_speed


In [9]:
# Dividir dataframe em várias partes (7 partes) para rodar
# nrow(hex_com_vizinhos) # 8,859,962

# hex_com_vizinhos <- hex_com_vizinhos %>% slice(1:1250000); n_secao = '01'
# hex_com_vizinhos <- hex_com_vizinhos %>% slice(1250001:2500000); n_secao = '02'
# hex_com_vizinhos <- hex_com_vizinhos %>% slice(2500001:3750000); n_secao = '03'
hex_com_vizinhos <- hex_com_vizinhos %>% slice(3750001:5000000); n_secao = '04'
# hex_com_vizinhos <- hex_com_vizinhos %>% slice(5000001:6250000); n_secao = '05'
# hex_com_vizinhos <- hex_com_vizinhos %>% slice(6250001:7500000); n_secao = '06'
# hex_com_vizinhos <- hex_com_vizinhos %>% slice(7500001:8860000); n_secao = '07'

# nrow(hex_com_vizinhos)
print(n_secao)

head(hex_com_vizinhos, 2)

slice: removed 7,609,962 rows (86%), 1,250,000 rows remaining



[1] "03"


id_hex_x,id_hex_y,lat.x,lon.x,lat.y,lon.y,url
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
89a8100f487ffff,89a8100e247ffff,-23.6315691474912,-46.5944497877008,-23.599514057312,-46.6241167273322,http://localhost:8989/route/?point=-23.6315691474912%2C-46.5944497877008&point=-23.599514057312%2C-46.6241167273322&profile=bike&instructions=false&calc_points=true&details=average_speed
89a8100f487ffff,89a8100e20bffff,-23.6315691474912,-46.5944497877008,-23.6009050842264,-46.6272186938058,http://localhost:8989/route/?point=-23.6315691474912%2C-46.5944497877008&point=-23.6009050842264%2C-46.6272186938058&profile=bike&instructions=false&calc_points=true&details=average_speed


In [10]:
gc(T)

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,3546509,189.5,13962871,745.7,11495941,614.0
Vcells,50728540,387.1,321667848,2454.2,402084806,3067.7


In [11]:
# Criar ttmatrix a partir do GrahHopper
detach("package:tidylog")
# lapply(hex_com_vizinhos$url, gh_route, n = n_secao)

# Rodar função para todos os arquivos- multi thread (Jupyter)
(start = Sys.time())
future::plan(future::multicore)
invisible(future.apply::future_lapply(X   = hex_com_vizinhos$url, 
                                      FUN = gh_route, 
                                      n   = n_secao,
                                      # future.packages = c('dplyr'), 
                                      future.seed = TRUE))
Sys.time()
Sys.time() - start

[1] "2023-05-23 09:44:17 -03"

[1] "2023-05-23 12:24:24 -03"

Time difference of 2.668822 hours

In [12]:
rm(hex_com_vizinhos)
gc(T)

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,2339481,125.0,103712180,5538.9,129640225,6923.6
Vcells,12078580,92.2,247104908,1885.3,402084806,3067.7
