Make columns on spatial output consistent

rOpenGov · Jul 31, 2023 · 3a6e48f · 3a6e48f
1 parent 756cf9f
commit 3a6e48f
Show file tree

Hide file tree

Showing 7 changed files with 423 additions and 10 deletions.
diff --git a/R/data_spatial.R b/R/data_spatial.R
@@ -14,6 +14,8 @@
 #' The dataset contains 2016 observations (rows) and 12 variables (columns).
 #'
 #' The object contains the following columns:
+#' * **id**: JSON id code, the same as **NUTS_ID**. See **NUTS_ID** below for
+#'   further clarification.
 #' * **LEVL_CODE**: NUTS level code: 0 (national level), 1 (major
 #'   socio-economic regions), 2 (basic regions for the application of regional
 #'   policies) or 3 (small regions).
@@ -47,9 +49,9 @@
 #'   * 0: no classification provided (e.g. in the case of NUTS 1 and NUTS 2
 #'     regions)
 #' * **FID**: Same as NUTS_ID.
-#' * **geometry**: geospatial information.
 #' * **geo**: Same as NUTS_ID, added for for easier joins with dplyr. However,
 #'   it is recommended to use other identical fields for this purpose.
+#' * **geometry**: geospatial information.
 #'
 #' Dataset updated: 2023-06-29. For a more recent version, please use
 #'   [giscoR::gisco_get_nuts()] function.

diff --git a/R/get_eurostat_geospatial.R b/R/get_eurostat_geospatial.R
@@ -42,6 +42,8 @@
 #'
 #' The objects downloaded from GISCO should contain all or some of the
 #' following variable columns:
+#' * **id**: JSON id code, the same as **NUTS_ID**. See **NUTS_ID** below for
+#'   further clarification.
 #' * **LEVL_CODE**: NUTS level code: 0 (national level), 1 (major
 #'   socio-economic regions), 2 (basic regions for the application of regional
 #'   policies) or 3 (small regions).
@@ -75,9 +77,9 @@
 #'   * 0: no classification provided (e.g. in the case of NUTS 1 and NUTS 2
 #'     regions)
 #' * **FID**: Same as NUTS_ID.
-#' * **geometry**: geospatial information.
 #' * **geo**: Same as NUTS_ID, added for for easier joins with dplyr. However,
 #'   it is recommended to use other identical fields for this purpose.
+#' * **geometry**: geospatial information.
 #'
 #' @author
 #' Markus Kainu <markuskainu@gmail.com>, Diego Hernangomez
@@ -175,10 +177,12 @@ get_eurostat_geospatial <- function(output_class = "sf",
   nuts_level <- match.arg(nuts_level, c("all", 0:3))
 
   # Performance - If df requested resolution and crs are meaningless. Switching
-  # to 60 and 4326 for speed
+  # to 60 and 4326 for speed (except for 2003, no available)
   if (output_class == "df") {
     resolution <- "60"
     crs <- "4326"
+
+    if (as.integer(year) == 2003) resolution <- "20"
   }
 
 
@@ -249,17 +253,16 @@ get_eurostat_geospatial <- function(output_class = "sf",
     )
   }
 
-
   # Just to capture potential NULL outputs from giscoR - this happen
   # on some errors
   if (is.null(shp)) {
     return(NULL)
   }
 
   # Post-data treatments
+  # Manage col names
+  shp <- geo_names(shp)
 
-  # Adding a `geo` column for easier joins with dplyr
-  shp$geo <- shp$NUTS_ID
   # to df
   if (output_class == "df") {
     # Remove geometry
@@ -268,3 +271,42 @@ get_eurostat_geospatial <- function(output_class = "sf",
 
   return(shp)
 }
+
+
+# Helper function, add names and reorder
+geo_names <- function(x) {
+  # Case for border (BN), there are no NUTS_ID , do nothing
+  if (!"NUTS_ID" %in% names(x)) {
+    return(x)
+  }
+
+
+  # Add `id` and `geo` column for easier joins with dplyr
+  x$geo <- x$NUTS_ID
+  x$id <- x$geo
+
+  # Arrange names in proper order
+  sfcol <- attr(x, "sf_column")
+  rest <- c(
+    "id", "LEVL_CODE", "NUTS_ID", "CNTR_CODE", "NAME_LATN", "NUTS_NAME",
+    "MOUNT_TYPE", "URBN_TYPE", "COAST_TYPE", "FID", "geo"
+  )
+
+  # Check what needed columns are not present in the source file
+  miss_cols <- setdiff(unique(c(rest, sfcol)), names(x))
+  extra_cols <- setdiff(names(x), unique(c(rest, sfcol)))
+
+
+  # Add missing cols with NAs
+  list_df <- lapply(miss_cols, function(x) {
+    template_df <- data.frame(somecol = NA)
+    names(template_df) <- x
+    template_df
+  })
+  x <- dplyr::bind_cols(c(list(x), list_df))
+
+  # Final column order
+  order_cols <- unique(c(rest, extra_cols, sfcol))
+  xend <- x[, order_cols]
+  xend
+}
diff --git a/data-raw/eurostat_geodata_60_2016.R b/data-raw/eurostat_geodata_60_2016.R
@@ -3,15 +3,14 @@ library(giscoR)
 library(tidyverse)
 
 # Current names internal)
-aa <- eurostat::eurostat_geodata_60_2016
-
 from_gisco <- gisco_get_nuts(
   year = 2016, resolution = 60,
   epsg = 4326, update_cache = TRUE,
   verbose = TRUE
 )
 
 from_gisco$geo <- from_gisco$NUTS_ID
+from_gisco$id <- from_gisco$NUTS_ID
 
 # End
 
@@ -22,4 +21,15 @@ unique(sf::st_is_valid(from_gisco))
 eurostat_geodata_60_2016 <- eurostat_geodata_60_2016 %>%
   arrange(LEVL_CODE, NUTS_ID)
 
+# Arrange names in proper order
+sfcol <- attr(eurostat_geodata_60_2016, "sf_column")
+rest <- c(
+  "id", "LEVL_CODE", "NUTS_ID", "CNTR_CODE", "NAME_LATN",
+  "NUTS_NAME", "MOUNT_TYPE", "URBN_TYPE", "COAST_TYPE",
+  "FID", "geo"
+)
+
+reorder <- intersect(unique(c(rest, sfcol)), names(eurostat_geodata_60_2016))
+eurostat_geodata_60_2016 <- eurostat_geodata_60_2016[, reorder]
+
 usethis::use_data(eurostat_geodata_60_2016, overwrite = TRUE, compress = "xz")
diff --git a/data/eurostat_geodata_60_2016.rda b/data/eurostat_geodata_60_2016.rda
diff --git a/man/eurostat_geodata_60_2016.Rd b/man/eurostat_geodata_60_2016.Rd
diff --git a/man/get_eurostat_geospatial.Rd b/man/get_eurostat_geospatial.Rd