Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I think both the article and the regional functions can be releasesed #173

Merged
merged 2 commits into from
Feb 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions R/harmonize_geo_code.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,20 +137,22 @@ harmonize_geo_code <- function (dat) {

tmp2 <- rbind ( tmp_s, tmp_a1, tmp_a2 )

not_found_geo <- unique(dat$geo[! dat$geo %in% tmp$geo ])
not_found_geo <- unique(dat$geo[! dat$geo %in% tmp2$geo ])
not_eu_regions <- not_found_geo[! substr(not_found_geo,1,2) %in% eu_countries$code]

## Checking if there are unmatched EU regions-------------------------

not_found_eu_regions <- not_found_geo[ substr(not_found_geo,1,2) %in% eu_countries$code]

if ( length(not_found_eu_regions)>0 ) {
warning ( "The following geo labels were not found in the correspondence table:")
message ( paste(not_found_eu_regions, collapse = ", "))
if ( any(geo%in% c("SI02", "SI01", "EL1", "EL2"))) {
if ( any( not_found_eu_regions %in% c("SI02", "SI01", "EL1", "EL2"))) {
message ( "Some or all of these regions use codes earlier than NUTS2013 definition.")
}
}

if ( any(grepl("XX", not_found_eu_regions ))) {
message ( "Some or all of these regions use data that cannot be connected to a regional unit.")
}

tmp_not_found <- dat %>%
filter ( geo %in% not_found_eu_regions ) %>%
mutate ( nuts_level = nchar(geo)-2,
Expand All @@ -164,25 +166,39 @@ harmonize_geo_code <- function (dat) {
geo == "EL2" ~ "EL6",
geo == "SI01" ~ "SI03",
geo == "SI02" ~ "SI04",
substr(geo,3,4) == "XX" ~ geo,
TRUE ~ NA_character_ )) %>%
mutate ( code16 = case_when (
geo == "EL1" ~ "EL5",
geo == "EL2" ~ "EL6",
geo == "SI01" ~ "SI03",
geo == "SI02" ~ "SI04",
substr(geo,3,4) == "XX" ~ geo,
TRUE ~ NA_character_) ) %>%
mutate ( name = dplyr::case_when (
geo == "SI01" ~ "Vzhodna Slovenija",
geo == "SI02" ~ "Zahodna Slovenija",
geo == "EL1" ~ "Voreia Ellada",
geo == "EL2" ~ "Kentriki Ellada",
substr(geo,3,4) == "XX" ~ "data not related to any territorial unit",
TRUE ~ NA_character_)) %>%
mutate ( change = dplyr::case_when (
geo %in% c("EL1", "EL2") ~ "boundary shift in 2013 (NUTS2010 coding)",
geo %in% c("SI01", "SI02") ~ "boundary shift in 2013 (NUTS2010 coding)",
substr(geo,3,4) == "XX" ~ "data not related to any territorial unit",
TRUE ~ NA_character_ )) %>%
mutate ( resolution = "You should control these changes and see how they affect your data.")

still_not_found_vector <- tmp_not_found %>%
filter ( is.na(code16)) %>%
select (geo) %>% unlist () %>%
unique()

if ( length(still_not_found_vector)>0) {
warning ( "The following geo labels were not found in the correspondence table:",
paste(still_not_found_vector, collapse = ", "), ".")
}

tmp2 <- rbind ( tmp2, tmp_not_found )

}
Expand All @@ -204,6 +220,9 @@ harmonize_geo_code <- function (dat) {

## Check if all original rows are handled correctly ------------------
if ( length(dat$geo [! dat$geo %in% tmp3$geo ])>0 ) {

unique ( dat$geo [! dat$geo %in% tmp3$geo ])

message (tmp3 %>% anti_join (dat))
message (dat %>% anti_join (tmp3))
stop ("Not all original rows were checked.")
Expand All @@ -216,7 +235,7 @@ harmonize_geo_code <- function (dat) {
if ( any(tmp3$change == 'not in EU - not controlled') ) {

not_EU_country_vector <- tmp3 %>%
filter ( tmp2$change == 'not in EU - not controlled' ) %>%
filter ( change == 'not in EU - not controlled' ) %>%
select ( geo )

not_eu_observations <- nrow (not_EU_country_vector)
Expand All @@ -229,7 +248,7 @@ harmonize_geo_code <- function (dat) {
"In this data frame not controlled countries: ",
paste (not_EU_country_vector,
collapse = ", "), " \n",
"with alltogether ", not_eu_observations, " observations/rows.")
"with altogether ", not_eu_observations, " observations/rows.")
}

## Reorder columns for readability -------------------------------
Expand Down
Loading