In [1]:
library(tidycensus)
library(tidyverse)
library(tigris)
library(sf)

census_api_key(Sys.getenv("CENSUS_API_KEY"))

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.4.0     [32m✔[39m [34mpurrr  [39m 1.0.1
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.5.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

To enable caching of data, set `options(tigris_use_cache = TRUE)`
in your R script or .Rprofile.

Linking to GEOS 3.10.2, GDAL 3.4.3, PROJ 8.2.0; sf_use_s2() is TRUE

To install your API key for use in future sessions, run this function with `install = TRUE`.



In [2]:
census_variables <- load_variables(2021, "acs5", cache = TRUE)
census_variables

name,label,concept,geography
<chr>,<chr>,<chr>,<chr>
B01003_001,Estimate!!Total,TOTAL POPULATION,


In [3]:
variable_data <- load_variables(2021, "acs5", cache = TRUE) %>%
  rename_all(recode,
    name = "variable_key", concept = "dataset",
    label = "variable"
  ) %>%
  mutate(
    dataset = tolower(dataset),
    dataset = gsub(" ", "_", dataset),
    variable = tolower(variable),
    variable = gsub("!!", "_", variable),
    variable = gsub(" ", "_", variable),
    variable = gsub(":", "", variable)
  ) %>%
  select(-geography)

variable_data2 <- load_variables(2021, "acs5/subject", cache = TRUE) %>%
  rename_all(recode,
    name = "variable_key", concept = "dataset",
    label = "variable"
  ) %>%
  mutate(
    dataset = tolower(dataset),
    dataset = gsub(" ", "_", dataset),
    variable = tolower(variable),
    variable = gsub("!!", "_", variable),
    variable = gsub(" ", "_", variable),
    variable = gsub(":", "", variable)
  )

variable_data <- rbind(variable_data, variable_data2)

In [4]:
variables <- c(
    "B08101_049", "B28003_002", "B28003_006", "B28011_002", "B28011_008", "S0101_C01_030", "S1901_C01_012",
    "B01003_001"
)

variable_data <- variable_data %>%
    filter(variable_key %in% variables)

variable_data

variable_key,variable,dataset
<chr>,<chr>,<chr>
B01003_001,estimate_total,total_population
B08101_049,estimate_total_worked_from_home,means_of_transportation_to_work_by_age
B28003_002,estimate_total_has_a_computer,presence_of_a_computer_and_type_of_internet_subscription_in_household
B28003_006,estimate_total_no_computer,presence_of_a_computer_and_type_of_internet_subscription_in_household
B28011_002,estimate_total_with_an_internet_subscription,internet_subscriptions_in_household
B28011_008,estimate_total_no_internet_access,internet_subscriptions_in_household
S0101_C01_030,estimate_total_total_population_selected_age_categories_65_years_and_over,age_and_sex
S1901_C01_012,estimate_households_median_income_(dollars),income_in_the_past_12_months_(in_2021_inflation-adjusted_dollars)


In [32]:
counties <- c(
  "adams", "asotin", "ferry", "garfield", "lincoln",
  "pend oreille", "spokane", "stevens", "whitman"
)

getCensusData <- function(table) {
  census_data <- get_acs(
    geography = "tract",
    table = table,
    year = 2021,
    state = "WA",
    survey = "acs5",
    cache_table = TRUE,
    county = counties,
    show_call = FALSE,
    
    # ,output = "tidy"
    # , keep_geo_vars = TRUE
    # , geometry = TRUE
  )
  return(census_data)
}

population <- getCensusData("B01003")
transportation <- getCensusData("B08101")
type_computer <- getCensusData("B28001")
presence_computer <- getCensusData("B28003")
internet_subscription <- getCensusData("B28011")
age <- getCensusData("S0101")
household_income <- getCensusData("S1901")


census_data <- population %>%
  rbind(., transportation) %>%
  rbind(., type_computer) %>%
  rbind(., presence_computer) %>%
  rbind(., internet_subscription) %>%
  rbind(., age) %>%
  rbind(., household_income)


nrow(census_data)
head(census_data)

Getting data from the 2017-2021 5-year ACS

Loading ACS5 variables for 2021 from table B01003 and caching the dataset for faster future access.

Getting data from the 2017-2021 5-year ACS

Loading ACS5 variables for 2021 from table B08101 and caching the dataset for faster future access.

Getting data from the 2017-2021 5-year ACS

Loading ACS5 variables for 2021 from table B28001 and caching the dataset for faster future access.

Getting data from the 2017-2021 5-year ACS

Loading ACS5 variables for 2021 from table B28003 and caching the dataset for faster future access.

Getting data from the 2017-2021 5-year ACS

Loading ACS5 variables for 2021 from table B28011 and caching the dataset for faster future access.

Getting data from the 2017-2021 5-year ACS

Loading ACS5/SUBJECT variables for 2021 from table S0101 and caching the dataset for faster future access.

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Usi

GEOID,NAME,variable,estimate,moe
<chr>,<chr>,<chr>,<dbl>,<dbl>
53001950100,"Census Tract 9501, Adams County, Washington",B01003_001,2577,292
53001950200,"Census Tract 9502, Adams County, Washington",B01003_001,1794,456
53001950301,"Census Tract 9503.01, Adams County, Washington",B01003_001,1790,388
53001950302,"Census Tract 9503.02, Adams County, Washington",B01003_001,2738,544
53001950303,"Census Tract 9503.03, Adams County, Washington",B01003_001,2555,462
53001950400,"Census Tract 9504, Adams County, Washington",B01003_001,3100,432


In [33]:
census_data <- census_data %>% inner_join(variable_data, by = c("variable" = "variable_key"))

census_data <- census_data %>%
    select(GEOID, estimate, variable = variable.y)

tract_data <- tracts(state = "washington", county = counties, progress_bar = FALSE, cb = FALSE)

tract_data <- tract_data %>%
    select(GEOID, tract = NAME)

census_data <- inner_join(census_data, tract_data)

census_data <- st_drop_geometry(census_data)

census_data <- census_data %>%
    pivot_wider(names_from = variable, values_from = estimate) %>%
    select(-geometry)

head(census_data)

Retrieving data for the year 2021

[1m[22mJoining, by = "GEOID"


GEOID,tract,estimate_total,estimate_total_worked_from_home,estimate_total_has_a_computer,estimate_total_no_computer,estimate_total_with_an_internet_subscription,estimate_total_no_internet_access,estimate_total_total_population_selected_age_categories_65_years_and_over,estimate_households_median_income_(dollars)
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
53001950100,9501.0,2577,96,943,97,855,136,500,56458
53001950200,9502.0,1794,39,497,67,436,121,233,62794
53001950301,9503.01,1790,0,367,202,412,141,251,48083
53001950302,9503.02,2738,3,684,69,620,123,309,47663
53001950303,9503.03,2555,110,658,24,583,90,270,53665
53001950400,9504.0,3100,26,799,96,760,116,305,67188


In [35]:
census_data <- census_data %>% 
    rename(
        total_population = estimate_total,
        work_from_home = estimate_total_worked_from_home,
        has_computer = estimate_total_has_a_computer,
        no_computer = estimate_total_no_computer,
        with_internet = estimate_total_with_an_internet_subscription,
        no_internet_access = estimate_total_no_internet_access,
        sixty_five_and_older = estimate_total_total_population_selected_age_categories_65_years_and_over,                                                           
        median_income = "estimate_households_median_income_(dollars)"                                                                                       
    )

In [36]:
head(census_data)

GEOID,tract,total_population,work_from_home,has_computer,no_computer,with_internet,no_internet_access,sixty_five_and_older,median_income
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
53001950100,9501.0,2577,96,943,97,855,136,500,56458
53001950200,9502.0,1794,39,497,67,436,121,233,62794
53001950301,9503.01,1790,0,367,202,412,141,251,48083
53001950302,9503.02,2738,3,684,69,620,123,309,47663
53001950303,9503.03,2555,110,658,24,583,90,270,53665
53001950400,9504.0,3100,26,799,96,760,116,305,67188


In [37]:
summary(census_data)

    GEOID              tract           total_population work_from_home 
 Length:183         Length:183         Min.   :1235     Min.   :  0.0  
 Class :character   Class :character   1st Qu.:2740     1st Qu.: 91.0  
 Mode  :character   Mode  :character   Median :3553     Median :142.0  
                                       Mean   :3836     Mean   :172.4  
                                       3rd Qu.:4640     3rd Qu.:227.5  
                                       Max.   :8591     Max.   :593.0  
                                                                       
  has_computer   no_computer     with_internet  no_internet_access
 Min.   : 207   Min.   :  0.00   Min.   : 140   Min.   :  0.0     
 1st Qu.:1005   1st Qu.: 37.50   1st Qu.: 920   1st Qu.: 66.5     
 Median :1315   Median : 77.00   Median :1234   Median :121.0     
 Mean   :1413   Mean   : 91.55   Mean   :1328   Mean   :136.3     
 3rd Qu.:1826   3rd Qu.:122.50   3rd Qu.:1694   3rd Qu.:200.5     
 Max.   :3611   Max.  

In [38]:
census_data$median_income[is.na(census_data$median_income)] <- mean(census_data$median_income, na.rm=TRUE)


In [39]:
summary(census_data)

    GEOID              tract           total_population work_from_home 
 Length:183         Length:183         Min.   :1235     Min.   :  0.0  
 Class :character   Class :character   1st Qu.:2740     1st Qu.: 91.0  
 Mode  :character   Mode  :character   Median :3553     Median :142.0  
                                       Mean   :3836     Mean   :172.4  
                                       3rd Qu.:4640     3rd Qu.:227.5  
                                       Max.   :8591     Max.   :593.0  
  has_computer   no_computer     with_internet  no_internet_access
 Min.   : 207   Min.   :  0.00   Min.   : 140   Min.   :  0.0     
 1st Qu.:1005   1st Qu.: 37.50   1st Qu.: 920   1st Qu.: 66.5     
 Median :1315   Median : 77.00   Median :1234   Median :121.0     
 Mean   :1413   Mean   : 91.55   Mean   :1328   Mean   :136.3     
 3rd Qu.:1826   3rd Qu.:122.50   3rd Qu.:1694   3rd Qu.:200.5     
 Max.   :3611   Max.   :371.00   Max.   :3254   Max.   :457.0     
 sixty_five_and_older media

In [40]:
write.csv(census_data, "../data/census_data.csv")