In [1]:
library(tidycensus)
library(tidyr)
library(stringr)
library(dplyr)
library(tigris)
library(sf)

census_api_key(Sys.getenv("CENSUS_API_KEY"))


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


To enable caching of data, set `options(tigris_use_cache = TRUE)`
in your R script or .Rprofile.

Linking to GEOS 3.10.2, GDAL 3.4.3, PROJ 8.2.0; sf_use_s2() is TRUE

To install your API key for use in future sessions, run this function with `install = TRUE`.



In [2]:
test <- load_variables(2020, "acs5")

In [3]:
head(test)

name,label,concept,geography
<chr>,<chr>,<chr>,<chr>
B01001_001,Estimate!!Total:,SEX BY AGE,block group
B01001_002,Estimate!!Total:!!Male:,SEX BY AGE,block group
B01001_003,Estimate!!Total:!!Male:!!Under 5 years,SEX BY AGE,block group
B01001_004,Estimate!!Total:!!Male:!!5 to 9 years,SEX BY AGE,block group
B01001_005,Estimate!!Total:!!Male:!!10 to 14 years,SEX BY AGE,block group
B01001_006,Estimate!!Total:!!Male:!!15 to 17 years,SEX BY AGE,block group


In [2]:
variable_data <- load_variables(2020, "acs5", cache = FALSE) %>%
  rename_all(recode,
    name = "variable_key", concept = "dataset",
    label = "variable"
  ) %>%
  mutate(
    dataset = tolower(dataset),
    dataset = gsub(" ", "_", dataset),
    variable = tolower(variable),
    variable = gsub("!!", "_", variable),
    variable = gsub(" ", "_", variable),
    variable = gsub(":", "", variable)
  ) %>%
  select(-geography)

variable_data2 <- load_variables(2020, "acs5/subject", cache = FALSE) %>%
  rename_all(recode,
    name = "variable_key", concept = "dataset",
    label = "variable"
  ) %>%
  mutate(
    dataset = tolower(dataset),
    dataset = gsub(" ", "_", dataset),
    variable = tolower(variable),
    variable = gsub("!!", "_", variable),
    variable = gsub(" ", "_", variable),
    variable = gsub(":", "", variable)
  )

variable_data <- rbind(variable_data, variable_data2)

In [3]:
variables <- c(
    "B05011_002", "B05011_003", "B05012_002", "B05012_003", "B08101_049", "B28001_003", "B28001_004",
    "B28001_005", "B28001_006", "B28001_007", "B28003_002", "B28003_006", "B28011_002", "B28011_003", "B28011_004",
    "B28011_005", "B28011_006", "B28011_007", "B28011_008", "S0101_C01_030", "S1901_C01_012", "S1901_C01_013"
)

variable_data <- variable_data %>%
    filter(variable_key %in% variables)

In [4]:
counties <- c(
  "adams", "asotin", "ferry", "garfield", "lincoln",
  "pend oreille", "spokane", "stevens", "whitman"
)

getCensusData <- function(table) {
  census_data <- get_acs(
    geography = "tract",
    table = table,
    year = 2020,
    state = "WA",
    survey = "acs5",
    cache_table = FALSE,
    county = counties
    # ,output = "wide"
    # , keep_geo_vars = TRUE
    # , geometry = TRUE
  )
  return(census_data)
}


naturalization <- getCensusData("B05011")
nativity <- getCensusData("B05012")
transportation <- getCensusData("B08101")
type_computer <- getCensusData("B28001")
presence_computer <- getCensusData("B28003")
internet_subscription <- getCensusData("B28011")
age <- getCensusData("S0101")
household_income <- getCensusData("S1901")


census_data <- naturalization %>%
  rbind(., nativity) %>%
  rbind(., transportation) %>%
  rbind(., type_computer) %>%
  rbind(., presence_computer) %>%
  rbind(., internet_subscription) %>%
  rbind(., age) %>%
  rbind(., household_income)


nrow(census_data)
head(census_data)

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Getting data from the 2016-2020 5-year ACS

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables

Getting data from the 2016-2020 5-year ACS

Using the ACS Subject Tables

Using the ACS Subject Tables

Using the ACS Subject Tables



GEOID,NAME,variable,estimate,moe
<chr>,<chr>,<chr>,<dbl>,<dbl>
53001950100,"Census Tract 9501, Adams County, Washington",B05011_001,122,52
53001950100,"Census Tract 9501, Adams County, Washington",B05011_002,28,27
53001950100,"Census Tract 9501, Adams County, Washington",B05011_003,94,43
53001950100,"Census Tract 9501, Adams County, Washington",B05011_004,6,10
53001950100,"Census Tract 9501, Adams County, Washington",B05011_005,0,13
53001950100,"Census Tract 9501, Adams County, Washington",B05011_006,10,10


In [5]:
census_data <- census_data %>% inner_join(variable_data, by = c("variable" = "variable_key"))

census_data <- census_data %>% 
    select(GEOID, estimate, variable = variable.y)

In [6]:
tract_data <- tracts(state = "washington", county = counties, progress_bar = FALSE, cb = FALSE)

tract_data <- tract_data %>% 
    select(GEOID, tract = NAME)

Retrieving data for the year 2021



In [7]:
census_data <- inner_join(census_data, tract_data)

[1m[22mJoining, by = "GEOID"


In [8]:
census_data <- st_drop_geometry(census_data)

In [9]:
head(census_data)

GEOID,estimate,variable,tract,geometry
<chr>,<dbl>,<chr>,<chr>,<MULTIPOLYGON [°]>
53001950100,28,estimate_total_not_a_u.s._citizen,9501.0,MULTIPOLYGON (((-118.9799 4...
53001950100,94,estimate_total_naturalized_citizens,9501.0,MULTIPOLYGON (((-118.9799 4...
53001950200,132,estimate_total_not_a_u.s._citizen,9502.0,MULTIPOLYGON (((-118.9826 4...
53001950200,8,estimate_total_naturalized_citizens,9502.0,MULTIPOLYGON (((-118.9826 4...
53001950301,486,estimate_total_not_a_u.s._citizen,9503.01,MULTIPOLYGON (((-119.1765 4...
53001950301,31,estimate_total_naturalized_citizens,9503.01,MULTIPOLYGON (((-119.1765 4...


In [10]:
census_data <- census_data %>% 
    pivot_wider(names_from = variable, values_from = estimate) %>% 
    select(-geometry)

In [11]:
head(census_data)

GEOID,tract,estimate_total_not_a_u.s._citizen,estimate_total_naturalized_citizens,estimate_total_native,estimate_total_foreign-born,estimate_total_worked_from_home,estimate_total_has_one_or_more_types_of_computing_devices_desktop_or_laptop,estimate_total_has_one_or_more_types_of_computing_devices_desktop_or_laptop_desktop_or_laptop_with_no_other_type_of_computing_device,estimate_total_has_one_or_more_types_of_computing_devices_smartphone,⋯,estimate_total_with_an_internet_subscription,estimate_total_with_an_internet_subscription_dial-up_alone,"estimate_total_with_an_internet_subscription_broadband_such_as_cable,_fiber_optic,_or_dsl",estimate_total_with_an_internet_subscription_satellite_internet_service,estimate_total_with_an_internet_subscription_other_service,estimate_total_internet_access_without_a_subscription,estimate_total_no_internet_access,estimate_total_total_population_selected_age_categories_65_years_and_over,estimate_households_median_income_(dollars),estimate_households_mean_income_(dollars)
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
53001950100,9501.0,28,94,2484,122,82,754,88,867,⋯,910,5,597,96,20,52,137,505,52589,63888
53001950200,9502.0,132,8,1623,140,47,448,21,503,⋯,487,2,223,121,34,9,116,228,59886,61202
53001950301,9503.01,486,31,1064,517,3,242,17,301,⋯,327,0,80,132,5,11,163,259,39928,53032
53001950302,9503.02,571,194,1669,765,26,305,0,554,⋯,515,0,340,30,8,18,88,258,58884,70361
53001950303,9503.03,545,128,1841,673,110,396,6,581,⋯,557,0,436,39,16,9,66,271,50915,74863
53001950400,9504.0,729,141,2274,870,23,455,46,639,⋯,685,0,488,48,0,23,168,308,63750,69882


In [12]:
names(census_data)

In [13]:
census_data <- census_data %>% 
    rename(
        not_citizen = estimate_total_not_a_u.s._citizen,
        naturalized_citizen = estimate_total_naturalized_citizens,
        native_citizen = estimate_total_native,
        foreign_born = 'estimate_total_foreign-born',
        work_from_home = estimate_total_worked_from_home,
        desktop_or_laptop = estimate_total_has_one_or_more_types_of_computing_devices_desktop_or_laptop,
        desktop_or_laptop_only = estimate_total_has_one_or_more_types_of_computing_devices_desktop_or_laptop_desktop_or_laptop_with_no_other_type_of_computing_device,
        smartphone = estimate_total_has_one_or_more_types_of_computing_devices_smartphone,
        smartphone_only = estimate_total_has_one_or_more_types_of_computing_devices_smartphone_smartphone_with_no_other_type_of_computing_device,
        tablet_or_portable = estimate_total_has_one_or_more_types_of_computing_devices_tablet_or_other_portable_wireless_computer,
        has_computer = estimate_total_has_a_computer,
        no_computer = estimate_total_no_computer,
        internet_subscription = estimate_total_with_an_internet_subscription,
        dial_up = 'estimate_total_with_an_internet_subscription_dial-up_alone',
        broadband = 'estimate_total_with_an_internet_subscription_broadband_such_as_cable,_fiber_optic,_or_dsl',
        satellite = estimate_total_with_an_internet_subscription_satellite_internet_service,
        other_internet_service = estimate_total_with_an_internet_subscription_other_service,
        access_with_no_subscription = estimate_total_internet_access_without_a_subscription,
        no_internet_access = estimate_total_no_internet_access,
        sixty_five_and_older = estimate_total_total_population_selected_age_categories_65_years_and_over,                                                           
        median_income = "estimate_households_median_income_(dollars)",                                                                                         
        mean_income = "estimate_households_mean_income_(dollars)"
    )

In [14]:
head(census_data)

GEOID,tract,not_citizen,naturalized_citizen,native_citizen,foreign_born,work_from_home,desktop_or_laptop,desktop_or_laptop_only,smartphone,⋯,internet_subscription,dial_up,broadband,satellite,other_internet_service,access_with_no_subscription,no_internet_access,sixty_five_and_older,median_income,mean_income
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
53001950100,9501.0,28,94,2484,122,82,754,88,867,⋯,910,5,597,96,20,52,137,505,52589,63888
53001950200,9502.0,132,8,1623,140,47,448,21,503,⋯,487,2,223,121,34,9,116,228,59886,61202
53001950301,9503.01,486,31,1064,517,3,242,17,301,⋯,327,0,80,132,5,11,163,259,39928,53032
53001950302,9503.02,571,194,1669,765,26,305,0,554,⋯,515,0,340,30,8,18,88,258,58884,70361
53001950303,9503.03,545,128,1841,673,110,396,6,581,⋯,557,0,436,39,16,9,66,271,50915,74863
53001950400,9504.0,729,141,2274,870,23,455,46,639,⋯,685,0,488,48,0,23,168,308,63750,69882


In [19]:
census_data$median_income[is.na(census_data$median_income)] <- mean(census_data$median_income, na.rm=TRUE)


In [20]:
summary(census_data)

    GEOID              tract            not_citizen   naturalized_citizen
 Length:183         Length:183         Min.   :   0   Min.   :  0.0      
 Class :character   Class :character   1st Qu.:  23   1st Qu.: 37.5      
 Mode  :character   Mode  :character   Median :  51   Median : 81.0      
                                       Mean   : 116   Mean   :106.6      
                                       3rd Qu.: 120   3rd Qu.:151.0      
                                       Max.   :1358   Max.   :511.0      
 native_citizen  foreign_born    work_from_home  desktop_or_laptop
 Min.   :1036   Min.   :   0.0   Min.   :  0.0   Min.   : 186.0   
 1st Qu.:2545   1st Qu.:  82.5   1st Qu.: 55.0   1st Qu.: 820.5   
 Median :3326   Median : 141.0   Median :116.0   Median :1141.0   
 Mean   :3519   Mean   : 222.6   Mean   :132.3   Mean   :1201.2   
 3rd Qu.:4216   3rd Qu.: 280.0   3rd Qu.:175.0   3rd Qu.:1497.5   
 Max.   :7437   Max.   :1714.0   Max.   :518.0   Max.   :2843.0   
 desktop_or_l

In [21]:
# write.csv(census_data, "../data/combined_data.csv")