Skip to content

Commit

Permalink
Make columns on spatial output consistent
Browse files Browse the repository at this point in the history
  • Loading branch information
dieghernan committed Jul 31, 2023
1 parent 756cf9f commit 3a6e48f
Show file tree
Hide file tree
Showing 7 changed files with 423 additions and 10 deletions.
4 changes: 3 additions & 1 deletion R/data_spatial.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#' The dataset contains 2016 observations (rows) and 12 variables (columns).
#'
#' The object contains the following columns:
#' * **id**: JSON id code, the same as **NUTS_ID**. See **NUTS_ID** below for
#' further clarification.
#' * **LEVL_CODE**: NUTS level code: 0 (national level), 1 (major
#' socio-economic regions), 2 (basic regions for the application of regional
#' policies) or 3 (small regions).
Expand Down Expand Up @@ -47,9 +49,9 @@
#' * 0: no classification provided (e.g. in the case of NUTS 1 and NUTS 2
#' regions)
#' * **FID**: Same as NUTS_ID.
#' * **geometry**: geospatial information.
#' * **geo**: Same as NUTS_ID, added for for easier joins with dplyr. However,
#' it is recommended to use other identical fields for this purpose.
#' * **geometry**: geospatial information.
#'
#' Dataset updated: 2023-06-29. For a more recent version, please use
#' [giscoR::gisco_get_nuts()] function.
Expand Down
52 changes: 47 additions & 5 deletions R/get_eurostat_geospatial.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#'
#' The objects downloaded from GISCO should contain all or some of the
#' following variable columns:
#' * **id**: JSON id code, the same as **NUTS_ID**. See **NUTS_ID** below for
#' further clarification.
#' * **LEVL_CODE**: NUTS level code: 0 (national level), 1 (major
#' socio-economic regions), 2 (basic regions for the application of regional
#' policies) or 3 (small regions).
Expand Down Expand Up @@ -75,9 +77,9 @@
#' * 0: no classification provided (e.g. in the case of NUTS 1 and NUTS 2
#' regions)
#' * **FID**: Same as NUTS_ID.
#' * **geometry**: geospatial information.
#' * **geo**: Same as NUTS_ID, added for for easier joins with dplyr. However,
#' it is recommended to use other identical fields for this purpose.
#' * **geometry**: geospatial information.
#'
#' @author
#' Markus Kainu <markuskainu@gmail.com>, Diego Hernangomez
Expand Down Expand Up @@ -175,10 +177,12 @@ get_eurostat_geospatial <- function(output_class = "sf",
nuts_level <- match.arg(nuts_level, c("all", 0:3))

# Performance - If df requested resolution and crs are meaningless. Switching
# to 60 and 4326 for speed
# to 60 and 4326 for speed (except for 2003, no available)
if (output_class == "df") {
resolution <- "60"
crs <- "4326"

if (as.integer(year) == 2003) resolution <- "20"
}


Expand Down Expand Up @@ -249,17 +253,16 @@ get_eurostat_geospatial <- function(output_class = "sf",
)
}


# Just to capture potential NULL outputs from giscoR - this happen
# on some errors
if (is.null(shp)) {
return(NULL)
}

# Post-data treatments
# Manage col names
shp <- geo_names(shp)

# Adding a `geo` column for easier joins with dplyr
shp$geo <- shp$NUTS_ID
# to df
if (output_class == "df") {
# Remove geometry
Expand All @@ -268,3 +271,42 @@ get_eurostat_geospatial <- function(output_class = "sf",

return(shp)
}


# Helper function, add names and reorder
geo_names <- function(x) {
# Case for border (BN), there are no NUTS_ID , do nothing
if (!"NUTS_ID" %in% names(x)) {
return(x)
}


# Add `id` and `geo` column for easier joins with dplyr
x$geo <- x$NUTS_ID
x$id <- x$geo

# Arrange names in proper order
sfcol <- attr(x, "sf_column")
rest <- c(
"id", "LEVL_CODE", "NUTS_ID", "CNTR_CODE", "NAME_LATN", "NUTS_NAME",
"MOUNT_TYPE", "URBN_TYPE", "COAST_TYPE", "FID", "geo"
)

# Check what needed columns are not present in the source file
miss_cols <- setdiff(unique(c(rest, sfcol)), names(x))
extra_cols <- setdiff(names(x), unique(c(rest, sfcol)))


# Add missing cols with NAs
list_df <- lapply(miss_cols, function(x) {
template_df <- data.frame(somecol = NA)
names(template_df) <- x
template_df
})
x <- dplyr::bind_cols(c(list(x), list_df))

# Final column order
order_cols <- unique(c(rest, extra_cols, sfcol))
xend <- x[, order_cols]
xend
}
14 changes: 12 additions & 2 deletions data-raw/eurostat_geodata_60_2016.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@ library(giscoR)
library(tidyverse)

# Current names internal)
aa <- eurostat::eurostat_geodata_60_2016

from_gisco <- gisco_get_nuts(
year = 2016, resolution = 60,
epsg = 4326, update_cache = TRUE,
verbose = TRUE
)

from_gisco$geo <- from_gisco$NUTS_ID
from_gisco$id <- from_gisco$NUTS_ID

# End

Expand All @@ -22,4 +21,15 @@ unique(sf::st_is_valid(from_gisco))
eurostat_geodata_60_2016 <- eurostat_geodata_60_2016 %>%
arrange(LEVL_CODE, NUTS_ID)

# Arrange names in proper order
sfcol <- attr(eurostat_geodata_60_2016, "sf_column")
rest <- c(
"id", "LEVL_CODE", "NUTS_ID", "CNTR_CODE", "NAME_LATN",
"NUTS_NAME", "MOUNT_TYPE", "URBN_TYPE", "COAST_TYPE",
"FID", "geo"
)

reorder <- intersect(unique(c(rest, sfcol)), names(eurostat_geodata_60_2016))
eurostat_geodata_60_2016 <- eurostat_geodata_60_2016[, reorder]

usethis::use_data(eurostat_geodata_60_2016, overwrite = TRUE, compress = "xz")
Binary file modified data/eurostat_geodata_60_2016.rda
Binary file not shown.
4 changes: 3 additions & 1 deletion man/eurostat_geodata_60_2016.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/get_eurostat_geospatial.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3a6e48f

Please sign in to comment.