In [1]:
library(tidycensus)
library(tidyr)
library(stringr)
library(dplyr)

census_api_key(Sys.getenv("CENSUS_API_KEY"))

# load variable data and transform
variable_data <- load_variables(2020, "acs5", cache = TRUE) %>%
    rename_all(recode,
        name = "variable_key", concept = "dataset",
        label = "variable"
    ) %>%
    mutate(
        dataset = tolower(dataset),
        dataset = gsub(" ", "_", dataset),
        variable = tolower(variable),
        variable = gsub("!!", "_", variable),
        variable = gsub(" ", "_", variable),
        variable = gsub(":", "", variable)
    ) %>%
    select(-geography)


getCensusData <- function(table) {
    # load census data and transform
    census_data <- get_acs(
        geography = "tract",
        table = table,
        year = 2020,
        state = "WA",
        survey = "acs5",
        cache_table = TRUE
    ) %>%
        separate(NAME, c("tract", "county", "state"), sep = ",") %>%
        mutate(
            tract = gsub("Census Tract ", "", tract),
            tract = as.double(tract),
            county = tolower(county),
            county = gsub(" county", "", county)
        ) %>%
        rename_all(recode, variable = "variable_key") %>%
        merge(variable_data, by = "variable_key") %>%
        select(-state, -GEOID, -variable_key)

    return(census_data)
}

getUrbanCensusData <- function(table) {
    # load urban data and transform
    census_data <- get_acs(
    geography = "urban area",
    table = table,
    year = 2020,
    survey = "acs5",
    cache_table = TRUE
    ) %>%
        filter(str_detect(NAME, "Spokane")) %>%
        rename_all(recode, variable = "variable_key") %>%
        merge(variable_data, by = "variable_key") %>%
        select(-GEOID, -NAME, -variable_key)

    return(census_data)
}




Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


To install your API key for use in future sessions, run this function with `install = TRUE`.



In [2]:
variable_data <- load_variables(2020, "acs5", cache = FALSE) %>%
  rename_all(recode,
    name = "variable_key", concept = "dataset",
    label = "variable"
  ) %>%
  mutate(
    dataset = tolower(dataset),
    dataset = gsub(" ", "_", dataset),
    variable = tolower(variable),
    variable = gsub("!!", "_", variable),
    variable = gsub(" ", "_", variable),
    variable = gsub(":", "", variable)
  ) %>%
  select(-geography)

variable_data2 <- load_variables(2020, "acs5/subject", cache = FALSE) %>%
  rename_all(recode,
    name = "variable_key", concept = "dataset",
    label = "variable"
  ) %>%
  mutate(
    dataset = tolower(dataset),
    dataset = gsub(" ", "_", dataset),
    variable = tolower(variable),
    variable = gsub("!!", "_", variable),
    variable = gsub(" ", "_", variable),
    variable = gsub(":", "", variable)
  )

variable_data <- rbind(variable_data, variable_data2)


In [14]:
head(variable_data)

variable_key,variable,dataset
<chr>,<chr>,<chr>
B01001_001,estimate_total,sex_by_age
B01001_002,estimate_total_male,sex_by_age
B01001_003,estimate_total_male_under_5_years,sex_by_age
B01001_004,estimate_total_male_5_to_9_years,sex_by_age
B01001_005,estimate_total_male_10_to_14_years,sex_by_age
B01001_006,estimate_total_male_15_to_17_years,sex_by_age


In [3]:
counties <- c(
  "adams", "asotin", "ferry", "garfield", "lincoln",
  "pend oreille", "spokane", "stevens", "whitman"
)

getCensusData <- function(table) {
  census_data <- get_acs(
      geography = "tract",
      table = table,
      year = 2020,
      state = "WA",
      survey = "acs5",
      cache_table = FALSE,
      county = counties
      # ,output = "wide"
      # , keep_geo_vars = TRUE
      # , geometry = TRUE
    )
  return(census_data)
  }


naturalization <- getCensusData("B05011")
nativity <- getCensusData("B05012")
transportation <- getCensusData("B08101")
type_computer <- getCensusData("B28001")
presence_computer <- getCensusData("B28003")
internet_subscription <- getCensusData("B28011")
age <- getCensusData("S0101")
household_income <- getCensusData("S1901")


census_data <- naturalization %>% 
  rbind(., nativity) %>% 
  rbind(., transportation) %>%
  rbind(., type_computer) %>% 
  rbind(., presence_computer) %>% 
  rbind(., internet_subscription) %>% 
  rbind(., age) %>% 
  rbind(., household_income)


nrow(census_data)


Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Getting data from the 2016-2020 5-year ACS

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables



In [14]:
names(census_data)

In [8]:
x <- census_data %>% inner_join(variable_data, by = c("variable" = "variable_key"))

x <- x %>% 
    select(variable, variable.y, dataset) %>% 
    unique()
# unique(x$variable.y)

write.csv(x, '../data/variables.csv')