In [1]:
library(terra)
library(geodata)
library(dplyr)
library(tidyr)
library(eurostat)
library(sf)

terra 1.7.78


Attaching package: ‘dplyr’


The following objects are masked from ‘package:terra’:

    intersect, union


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘tidyr’


The following object is masked from ‘package:terra’:

    extract


Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE



In [2]:
# Import the data from meta
path_data_fb <- "../../data/task_44/meta/gadm1_nuts3_counties-gadm1_nuts3_counties - FB Social Connectedness Index - October 2021.tsv"
# df <- read.table(file = path_data_fb, sep = "\t", header = TRUE)

In [3]:
# tail(df)

In [4]:
# Import the table with the levels of the counties
regions <- read.table(file = "../../data/task_44/meta/gadm1_nuts3_counties_levels.csv", sep = ',', header = TRUE)
regions <- regions |> rename(nodeCode = key)

tail(regions)

Unnamed: 0_level_0,nodeCode,level
Unnamed: 0_level_1,<chr>,<chr>
8003,LKA24_318,gadm2
8004,LKA24_319,gadm2
8005,LKA25_320,gadm2
8006,LKA25_321,gadm2
8007,LKA25_322,gadm2
8008,LKA25_323,gadm2


In [5]:
get_nuts3_country <- function(nuts3_code, nuts3_shapefile){
  nuts3_region <- nuts3_shapefile[nuts3_shapefile$id == nuts3_code, ]
  country <- countrycode::countrycode(nuts3_region$CNTR_CODE, origin = "eurostat", destination = "iso3c")
  return(country)
}

In [6]:
# Attach the country to the levels table
regions$country_ISO3 <- sub("^([A-Z]+).*", "\\1", regions$nodeCode)

# Overwrite nuts3 countries with ISO 3 codes
nuts3_shapefile <- eurostat::get_eurostat_geospatial(nuts_level = 3, year = 2016, output_class = "sf")
nuts3_keys <- regions[regions$level == "nuts3", ]$nodeCode
nuts3_countries <- sapply(nuts3_keys, get_nuts3_country, nuts3_shapefile = nuts3_shapefile)
regions[regions$level == "nuts3", ]$country_ISO3 <- nuts3_countries

tail(regions)

Extracting data from eurostat::eurostat_geodata_60_2016



Unnamed: 0_level_0,nodeCode,level,country_ISO3
Unnamed: 0_level_1,<chr>,<chr>,<chr>
8003,LKA24_318,gadm2,LKA
8004,LKA24_319,gadm2,LKA
8005,LKA25_320,gadm2,LKA
8006,LKA25_321,gadm2,LKA
8007,LKA25_322,gadm2,LKA
8008,LKA25_323,gadm2,LKA


In [7]:
regions[regions$level == "gadm1", ]

Unnamed: 0_level_0,nodeCode,level,country_ISO3
Unnamed: 0_level_1,<chr>,<chr>,<chr>
49,DZA1,gadm1,DZA
50,DZA2,gadm1,DZA
51,DZA3,gadm1,DZA
52,DZA4,gadm1,DZA
53,DZA5,gadm1,DZA
54,DZA6,gadm1,DZA
55,DZA7,gadm1,DZA
56,DZA8,gadm1,DZA
57,DZA9,gadm1,DZA
58,DZA10,gadm1,DZA


In [8]:
# Get the GADM1 data
gadm1_regions <- regions[regions$level == "gadm1", ]
gadm1_keys <- unique(gadm1_regions$country_ISO3)
gadm1_data <- gadm(gadm1_keys, level = 1, path = "../../data/task_44/", version="3.6")
gadm1_data$nodeCode <- gsub("\\.", "", sub("_1$", "", gadm1_data$GID_1))
gadm1_data <- gadm1_data[match(gadm1_regions$nodeCode, gadm1_data$nodeCode), ]

gadm1_data <- gadm1_data[, (names(gadm1_data) %in% c("nodeCode", "NAME_1"))]

centroids <- terra::centroids(gadm1_data)
coords <- terra::crds(centroids)
gadm1_data$longitude <- coords[, 1]
gadm1_data$latitude <- coords[, 2]

rm(centroids, coords, gadm1_regions, gadm1_keys)

In [9]:
# Merge the data
gadm1_data <- as.data.frame(gadm1_data)
regions <- regions |> 
  left_join(gadm1_data, by = c("nodeCode" = "nodeCode")) |>
  rename(nodeLabel = NAME_1)

rm(gadm1_data)

In [10]:
# Get the GADM2 data
gadm2_regions <- regions[regions$level == "gadm2", ]
gadm2_keys <- unique(gadm2_regions$country_ISO3)
gadm2_data <- gadm(gadm2_keys, level = 2, path = "../../data/task_44/", version="3.6")
gadm2_data$nodeCode <- gsub("(\\w+)\\.(\\d+)\\.(\\d+)_1$", "\\1\\2_\\3", gadm2_data$GID_2)

gadm2_data <- gadm2_data[, (names(gadm2_data) %in% c("nodeCode", "NAME_2"))]

centroids <- terra::centroids(gadm2_data)
coords <- terra::crds(centroids)
gadm2_data$longitude <- coords[, 1]
gadm2_data$latitude <- coords[, 2]

rm(centroids, coords, gadm2_regions, gadm2_keys)

In [11]:
# Convert the data to a data frame
gadm2_data <- as.data.frame(gadm2_data) |> rename(nodeLabel = NAME_2)

# Change the nodeCode to the format of the GADM2 data
gadm2_data <- gadm2_data |>
  separate(nodeCode, into = c("prefix", "n2"), sep = c("_"), remove = TRUE) |>
  separate(prefix, into = c("prefix", "n1"), sep = c("(?<=^[A-Z]{3})"), remove = TRUE) |>
  group_by(prefix) |>
  mutate(sequential_number = row_number()) |>
  ungroup() |>
  mutate(nodeCode = paste0(prefix, n1, "_", sequential_number)) |>
  select(c("nodeCode", "nodeLabel", "longitude", "latitude"))

In [12]:
# Merge the data
regions <- regions |> 
  left_join(gadm2_data, by = c("nodeCode" = "nodeCode"), suffix = c("_x", "_y")) |>
  mutate(nodeLabel = coalesce(nodeLabel_x, nodeLabel_y)) |>
  select(-nodeLabel_x, -nodeLabel_y) |>
  mutate(longitude = coalesce(longitude_x, longitude_y)) |>
  mutate(latitude = coalesce(latitude_x, latitude_y)) |>
  select(-longitude_x, -longitude_y, -latitude_x, -latitude_y)

rm(gadm2_data)

In [56]:
# Get the nuts3 data
nuts3_regions <- regions[regions$level == "nuts3", ]
nuts3_keys <- unique(nuts3_regions$nodeCode)
nuts3_shapefile <- eurostat::get_eurostat_geospatial(nuts_level = 3, year = 2016, output_class = "sf")
nuts3_data <- nuts3_shapefile[match(nuts3_keys, nuts3_shapefile$id), ]

centroids <- st_centroid(nuts3_data$geometry)
coords <- st_coordinates(centroids)
nuts3_data$longitude <- coords[, 1]
nuts3_data$latitude <- coords[, 2]

rm(centroids, coords, nuts3_regions, nuts3_keys)

Extracting data from eurostat::eurostat_geodata_60_2016



In [57]:
# Convert the data to a data frame
nuts3_data <- as.data.frame(nuts3_data) 

nuts3_data <- nuts3_data |>
  rename(nodeLabel = NAME_LATN) |>
  rename(nodeCode = id) |>
  select(c("nodeCode", "nodeLabel", "longitude", "latitude"))

In [58]:
nuts3_data

Unnamed: 0_level_0,nodeCode,nodeLabel,longitude,latitude
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>
818,HR043,Krapinsko-zagorska županija,15.920217,46.07827
36,AT314,Steyr-Kirchdorf,14.324103,47.85302
37,AT315,Traunviertel,13.690190,47.82881
322,DE600,Hamburg,9.506859,53.67060
323,DE711,"Darmstadt, Kreisfreie Stadt",8.680591,49.90291
324,DE712,"Frankfurt am Main, Kreisfreie Stadt",8.660673,50.11875
325,DE713,"Offenbach am Main, Kreisfreie Stadt",8.780571,50.09518
326,DE714,"Wiesbaden, Kreisfreie Stadt",8.257287,50.07088
327,DE715,Bergstraße,8.665372,49.61520
91,BE353,Arr. Philippeville,4.568933,50.16439


In [62]:
# # Merge the data
regions_prova <- regions |> 
  left_join(nuts3_data, by = c("nodeCode" = "nodeCode"), suffix=c("_x", "_y"))
  # mutate(nodeLabel = coalesce(nodeLabel_x, nodeLabel_y)) |>
  # select(-nodeLabel_x, -nodeLabel_y) |>
  # mutate(longitude = coalesce(longitude_x, longitude_y)) |>
  # mutate(latitude = coalesce(latitude_x, latitude_y)) |>
  # select(-longitude_x, -longitude_y, -latitude_x, -latitude_y)

ERROR: Error in eval(expr, envir, enclos): object 'nodeLabel_x' not found
