Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign upadd query column to `geocode()` result? #9
Comments
|
library(hereR)
library(sf)
#> Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
x <- data.frame(
company = c("Schweizerische Bundesbahnen SBB", "Bahnhof AG", "Deutsche Bahn AG"),
address = c("Wylerstrasse 123, 3000 Bern 65", "not_an_address", "Potsdamer Platz 2, 10785 Berlin"),
stringsAsFactors = FALSE
)
locs <- geocode(x$address)
#> Address 'not_an_address' not found.
locs
#> Simple feature collection with 2 features and 10 fields
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 7.44657 ymin: 46.96138 xmax: 13.37525 ymax: 52.51005
#> epsg (SRID): 4326
#> proj4string: +proj=longlat +datum=WGS84 +no_defs
#> address street houseNumber
#> 1 Wylerstrasse 123, 3014 Bern Bern, Schweiz Wylerstrasse 123
#> 3 Potsdamer Platz 2, 10785 Berlin, Deutschland Potsdamer Platz 2
#> postalCode district city county state country type
#> 1 3014 Wyler Bern Bern-Mittelland BE CHE point
#> 3 10785 Tiergarten Berlin Berlin Berlin DEU point
#> geometry
#> 1 POINT (7.44657 46.96138)
#> 3 POINT (13.37525 52.51005)Using the row names of the returned st_as_sf(data.frame(locs, x[row.names(locs), ]))
#> Simple feature collection with 2 features and 12 fields
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 7.44657 ymin: 46.96138 xmax: 13.37525 ymax: 52.51005
#> epsg (SRID): 4326
#> proj4string: +proj=longlat +datum=WGS84 +no_defs
#> address street houseNumber
#> 1 Wylerstrasse 123, 3014 Bern Bern, Schweiz Wylerstrasse 123
#> 3 Potsdamer Platz 2, 10785 Berlin, Deutschland Potsdamer Platz 2
#> postalCode district city county state country type
#> 1 3014 Wyler Bern Bern-Mittelland BE CHE point
#> 3 10785 Tiergarten Berlin Berlin Berlin DEU point
#> company address.1
#> 1 Schweizerische Bundesbahnen SBB Wylerstrasse 123, 3000 Bern 65
#> 3 Deutsche Bahn AG Potsdamer Platz 2, 10785 Berlin
#> geometry
#> 1 POINT (7.44657 46.96138)
#> 3 POINT (13.37525 52.51005)Thank you for reporting this important issue. |
|
Thanks! library(hereR)
library(sf)
#> Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
x <- data.frame(
company = c("Schweizerische Bundesbahnen SBB", "Bahnhof AG", "Deutsche Bahn AG"),
address = c("Wylerstrasse 123, 3000 Bern 65", "not_an_address", "Potsdamer Platz 2, 10785 Berlin"),
stringsAsFactors = FALSE
)
locs <- geocode(x$address)
#> Address 'not_an_address' not found.
merge(x, locs, all.x = TRUE, by = "row.names")
#> Row.names company address.x
#> 1 1 Schweizerische Bundesbahnen SBB Wylerstrasse 123, 3000 Bern 65
#> 2 2 Bahnhof AG not_an_address
#> 3 3 Deutsche Bahn AG Potsdamer Platz 2, 10785 Berlin
#> address.y street houseNumber
#> 1 Wylerstrasse 123, 3014 Bern Bern, Schweiz Wylerstrasse 123
#> 2 Potsdamer Platz 2, 10785 Berlin, Deutschland Potsdamer Platz 2
#> 3 <NA> <NA> <NA>
#> postalCode district city county state country type
#> 1 3014 Wyler Bern Bern-Mittelland BE CHE point
#> 2 10785 Tiergarten Berlin Berlin Berlin DEU point
#> 3 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> geometry
#> 1 POINT (7.44657 46.96138)
#> 2 POINT (13.37525 52.51005)
#> 3 POINT EMPTYAnyway, using ‘real’ columns instead of x$id <- rownames(x)
locs$id <- rownames(locs)
(y <- merge(x, locs, all.x = TRUE, by = "id"))
#> id company address.x
#> 1 1 Schweizerische Bundesbahnen SBB Wylerstrasse 123, 3000 Bern 65
#> 2 2 Bahnhof AG not_an_address
#> 3 3 Deutsche Bahn AG Potsdamer Platz 2, 10785 Berlin
#> address.y street houseNumber
#> 1 Wylerstrasse 123, 3014 Bern Bern, Schweiz Wylerstrasse 123
#> 2 <NA> <NA> <NA>
#> 3 Potsdamer Platz 2, 10785 Berlin, Deutschland Potsdamer Platz 2
#> postalCode district city county state country type
#> 1 3014 Wyler Bern Bern-Mittelland BE CHE point
#> 2 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 3 10785 Tiergarten Berlin Berlin Berlin DEU point
#> geometry
#> 1 POINT (7.44657 46.96138)
#> 2 POINT EMPTY
#> 3 POINT (13.37525 52.51005)
So adding an id column might be better (like you do with Also keep in mind that users might have duplicate addresses in their Finally, (z <- st_sf(y[c("company", "geometry")]))
#> Simple feature collection with 3 features and 1 field (with 1 geometry empty)
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 7.44657 ymin: 46.96138 xmax: 13.37525 ymax: 52.51005
#> epsg (SRID): 4326
#> proj4string: +proj=longlat +datum=WGS84 +no_defs
#> company geometry
#> 1 Schweizerische Bundesbahnen SBB POINT (7.44657 46.96138)
#> 2 Bahnhof AG POINT EMPTY
#> 3 Deutsche Bahn AG POINT (13.37525 52.51005)
resz <- reverse_geocode(z)
#> Warning in (function (res) : Request failed: HTTP status code 400.How would you merge these, reliably? I’d suggest something along the lines of: z$id <- seq_len(nrow(z))
resz$id <- c(1,3) # because the current id's represent the index of successful queries not the index of the POIs. So this would need fixing.
merge(st_drop_geometry(z), resz, by = "id")
#> id company rank distance level
#> 1 1 Schweizerische Bundesbahnen SBB 1 0 houseNumber
#> 2 3 Deutsche Bahn AG 1 0 houseNumber
#> label country state county
#> 1 Wylerstrasse 123, 3014 Bern Bern, Schweiz CHE BE Bern-Mittelland
#> 2 Potsdamer Platz 2, 10785 Berlin, Deutschland DEU Berlin Berlin
#> city district street houseNumber postalCode
#> 1 Bern Wyler Wylerstrasse 123 3014
#> 2 Berlin Tiergarten Potsdamer Platz 2 10785
#> geometry
#> 1 POINT (7.44657 46.96138)
#> 2 POINT (13.37525 52.51005)Session infodevtools::session_info()
#> - Session info ---------------------------------------------------------------
#> setting value
#> version R version 3.6.1 (2019-07-05)
#> os Windows 10 x64
#> system x86_64, mingw32
#> ui RTerm
#> language en
#> collate German_Germany.1252
#> ctype German_Germany.1252
#> tz Europe/Berlin
#> date 2019-11-19
#>
#> - Packages -------------------------------------------------------------------
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.1)
#> backports 1.1.5 2019-10-02 [1] CRAN (R 3.6.1)
#> callr 3.3.2 2019-09-22 [1] CRAN (R 3.6.1)
#> class 7.3-15 2019-01-01 [2] CRAN (R 3.6.1)
#> classInt 0.4-2 2019-10-17 [1] CRAN (R 3.6.1)
#> cli 1.1.0 2019-03-19 [1] CRAN (R 3.6.1)
#> codetools 0.2-16 2018-12-24 [1] CRAN (R 3.6.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.1)
#> curl 4.2 2019-09-24 [1] CRAN (R 3.6.1)
#> data.table 1.12.6 2019-10-18 [1] CRAN (R 3.6.1)
#> DBI 1.0.0 2018-05-02 [1] CRAN (R 3.6.1)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 3.6.1)
#> devtools 2.2.1 2019-09-24 [1] CRAN (R 3.6.1)
#> digest 0.6.22 2019-10-21 [1] CRAN (R 3.6.1)
#> e1071 1.7-2 2019-06-05 [1] CRAN (R 3.6.1)
#> ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.1)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.1)
#> fs 1.3.1 2019-05-06 [1] CRAN (R 3.6.1)
#> glue 1.3.1 2019-03-12 [1] CRAN (R 3.6.1)
#> hereR * 0.2.0 2019-11-13 [1] CRAN (R 3.6.1)
#> highr 0.8 2019-03-20 [1] CRAN (R 3.6.1)
#> htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.1)
#> jsonlite 1.6 2018-12-07 [1] CRAN (R 3.6.1)
#> KernSmooth 2.23-15 2015-06-29 [2] CRAN (R 3.6.1)
#> knitr 1.26 2019-11-12 [1] CRAN (R 3.6.1)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.1)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 3.6.1)
#> pkgbuild 1.0.6 2019-10-09 [1] CRAN (R 3.6.1)
#> pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.6.1)
#> prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.6.1)
#> processx 3.4.1 2019-07-18 [1] CRAN (R 3.6.1)
#> ps 1.3.0 2018-12-21 [1] CRAN (R 3.6.1)
#> R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.1)
#> Rcpp 1.0.3 2019-11-08 [1] CRAN (R 3.6.1)
#> remotes 2.1.0.9000 2019-07-22 [1] Github (r-lib/remotes@6e9eaa9)
#> rlang 0.4.1 2019-10-24 [1] CRAN (R 3.6.1)
#> rmarkdown 1.17 2019-11-13 [1] CRAN (R 3.6.1)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.6.1)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.1)
#> sf * 0.8-0 2019-09-17 [1] CRAN (R 3.6.1)
#> stringi 1.4.3 2019-03-12 [1] CRAN (R 3.6.0)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 3.6.1)
#> testthat 2.3.0 2019-11-05 [1] CRAN (R 3.6.1)
#> units 0.6-5 2019-10-08 [1] CRAN (R 3.6.1)
#> usethis 1.5.1 2019-07-04 [1] CRAN (R 3.6.1)
#> withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.1)
#> xfun 0.11 2019-11-12 [1] CRAN (R 3.6.1)
#> yaml 2.2.0 2018-07-25 [1] CRAN (R 3.6.0)
#>
#> [1] C:/Users/daniel/Documents/.R/win-library
#> [2] C:/Program Files/R/R-3.6.1/library |
|
Oh, I meant |
|
Thanks for your feedback and sorry that it took me some time to answer. Yes, merging using Since empty entries in the geometry column in Therefore I added an library(hereR)
library(sf)
#> Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
x <- data.frame(
company = c("Schweizerische Bundesbahnen SBB", "Bahnhof AG", "Deutsche Bahn AG"),
address = c("Wylerstrasse 123, 3000 Bern 65", "not_an_address", "Potsdamer Platz 2, 10785 Berlin"),
stringsAsFactors = FALSE
)
(locs <- geocode(x$address))
#> Address(es) 'not_an_address' not found.
#> Simple feature collection with 2 features and 11 fields
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 7.44657 ymin: 46.96138 xmax: 13.37525 ymax: 52.51005
#> epsg (SRID): 4326
#> proj4string: +proj=longlat +datum=WGS84 +no_defs
#> id address street
#> 1 1 Wylerstrasse 123, 3014 Bern Bern, Schweiz Wylerstrasse
#> 2 3 Potsdamer Platz 2, 10785 Berlin, Deutschland Potsdamer Platz
#> houseNumber postalCode district city county state country
#> 1 123 3014 Wyler Bern Bern-Mittelland BE CHE
#> 2 2 10785 Tiergarten Berlin Berlin Berlin DEU
#> type geometry
#> 1 point POINT (7.44657 46.96138)
#> 2 point POINT (13.37525 52.51005)The column (y <- st_as_sf(data.frame(locs, x[locs$id, ])))
#> Simple feature collection with 2 features and 13 fields
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 7.44657 ymin: 46.96138 xmax: 13.37525 ymax: 52.51005
#> epsg (SRID): 4326
#> proj4string: +proj=longlat +datum=WGS84 +no_defs
#> id address street
#> 1 1 Wylerstrasse 123, 3014 Bern Bern, Schweiz Wylerstrasse
#> 3 3 Potsdamer Platz 2, 10785 Berlin, Deutschland Potsdamer Platz
#> houseNumber postalCode district city county state country
#> 1 123 3014 Wyler Bern Bern-Mittelland BE CHE
#> 3 2 10785 Tiergarten Berlin Berlin Berlin DEU
#> type company address.1
#> 1 point Schweizerische Bundesbahnen SBB Wylerstrasse 123, 3000 Bern 65
#> 3 point Deutsche Bahn AG Potsdamer Platz 2, 10785 Berlin
#> geometry
#> 1 POINT (7.44657 46.96138)
#> 3 POINT (13.37525 52.51005)As for your last point:
The result of x <- data.frame(
place = c("Barcelona", "Ocean", "Melbourne"),
lng = c(2.173404, 76, 144.963058),
lat = c(41.385063, -28, -37.813629)
) %>% st_as_sf(coords = c("lng", "lat")) %>% st_set_crs(4326)
y <- reverse_geocode(x, landmarks = TRUE, results = 3)
x$id <- seq(1, nrow(x))
(merge(st_drop_geometry(x), y, all.x = TRUE, by = "id"))
#> id place rank distance level name
#> 1 1 Barcelona 1 101.0 landmark Plaça de la Vila de Madrid
#> 2 1 Barcelona 2 276.2 landmark Plaça de Sant Josep Oriol
#> 3 1 Barcelona 3 349.1 landmark Plaça de Vicenç Martorell
#> 4 2 Ocean NA NA <NA> <NA>
#> 5 3 Melbourne 1 455.2 landmark RMIT University City Campus
#> 6 3 Melbourne 2 617.4 landmark Flagstaff Gardens
#> 7 3 Melbourne 3 654.3 landmark Yarra River
#> label country
#> 1 08007 Barcelona (Barcelona), Espanya ESP
#> 2 Plaça de Sant Josep Oriol, 08002 Barcelona (Barcelona), Espanya ESP
#> 3 Carrer de les Ramelleres, 08001 Barcelona (Barcelona), Espanya ESP
#> 4 <NA> <NA>
#> 5 Melbourne VIC 3000, Australia AUS
#> 6 West Melbourne VIC 3003, Australia AUS
#> 7 Melbourne VIC 3000, Australia AUS
#> state geometry
#> 1 Catalunya POINT (2.17221 41.38441)
#> 2 Catalunya POINT (2.17413 41.38244)
#> 3 Catalunya POINT (2.16903 41.384)
#> 4 <NA> POINT EMPTY
#> 5 VIC POINT (144.9648 -37.80718)
#> 6 VIC POINT (144.9544 -37.81043)
#> 7 VIC POINT (144.9407 -37.81627)The problem starts if an API request fails. My first guess to fix this is to avoid empty geometries in the input of But there are other possibilities for a failed request and these also must be handled, which means Session infodevtools::session_info()
#> ─ Session info ──────────────────────────────────────────────────────────
#> setting value
#> version R version 3.6.1 (2019-07-05)
#> os macOS Catalina 10.15.1
#> system x86_64, darwin15.6.0
#> ui X11
#> language (EN)
#> collate de_CH.UTF-8
#> ctype de_CH.UTF-8
#> tz Europe/Zurich
#> date 2019-12-08
#>
#> ─ Packages ──────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
#> backports 1.1.5 2019-10-02 [1] CRAN (R 3.6.0)
#> callr 3.3.2 2019-09-22 [1] CRAN (R 3.6.0)
#> class 7.3-15 2019-01-01 [1] CRAN (R 3.6.1)
#> classInt 0.4-2 2019-10-17 [1] CRAN (R 3.6.0)
#> cli 1.1.0 2019-03-19 [1] CRAN (R 3.6.0)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
#> curl 4.2 2019-09-24 [1] CRAN (R 3.6.0)
#> data.table 1.12.6 2019-10-18 [1] CRAN (R 3.6.0)
#> DBI 1.0.0 2018-05-02 [1] CRAN (R 3.6.0)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 3.6.0)
#> devtools 2.2.1 2019-09-24 [1] CRAN (R 3.6.0)
#> digest 0.6.22 2019-10-21 [1] CRAN (R 3.6.0)
#> e1071 1.7-2 2019-06-05 [1] CRAN (R 3.6.0)
#> ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0)
#> fs 1.3.1 2019-05-06 [1] CRAN (R 3.6.0)
#> glue 1.3.1 2019-03-12 [1] CRAN (R 3.6.0)
#> hereR * 0.2.0.9000 2019-12-08 [1] local
#> htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0)
#> jsonlite 1.6 2018-12-07 [1] CRAN (R 3.6.0)
#> KernSmooth 2.23-16 2019-10-15 [1] CRAN (R 3.6.0)
#> knitr 1.25 2019-09-18 [1] CRAN (R 3.6.0)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 3.6.0)
#> pkgbuild 1.0.6 2019-10-09 [1] CRAN (R 3.6.0)
#> pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.6.0)
#> prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.6.0)
#> processx 3.4.1 2019-07-18 [1] CRAN (R 3.6.0)
#> ps 1.3.0 2018-12-21 [1] CRAN (R 3.6.0)
#> R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.0)
#> Rcpp 1.0.3 2019-11-08 [1] CRAN (R 3.6.0)
#> remotes 2.1.0 2019-06-24 [1] CRAN (R 3.6.0)
#> rlang 0.4.1 2019-10-24 [1] CRAN (R 3.6.0)
#> rmarkdown 1.16 2019-10-01 [1] CRAN (R 3.6.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.6.0)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0)
#> sf * 0.8-0 2019-09-17 [1] CRAN (R 3.6.0)
#> stringi 1.4.3 2019-03-12 [1] CRAN (R 3.6.0)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
#> testthat 2.2.1 2019-07-25 [1] CRAN (R 3.6.0)
#> units 0.6-5 2019-10-08 [1] CRAN (R 3.6.0)
#> usethis 1.5.1 2019-07-04 [1] CRAN (R 3.6.0)
#> withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0)
#> xfun 0.10 2019-10-01 [1] CRAN (R 3.6.0)
#> yaml 2.2.0 2018-07-25 [1] CRAN (R 3.6.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library |
Add an "id" column to the output of geocode(), closes #9.
Potential use case: When geocoding, I would like to bind a source data frame to the results sf object. I.e. in the following example I would like to have the company name in the sf object.
However, I don’t have an identical column in both data frames with which I could merge both. How would I be able do that? (In this simple example it would be possible to
rbind()both objects, but that rests on the (fragile) assumption that both objects have the samenrow()).One possible solution would be to add the query vector as a column to the results object.