In [1]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
from matplotlib import rcParams
rcParams['figure.figsize'] = (16, 100)

import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore") # Ignore all warnings
# warnings.filterwarnings("ignore", category=RRuntimeWarning) # Show some warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [2]:
%%javascript
// Disable auto-scrolling
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
%%R 

library(tidycensus)
library(tidyverse)
library(sf)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors


Linking to GEOS 3.12.1, GDAL 3.8.3, PROJ 9.3.1; sf_use_s2() is TRUE


In [4]:
!touch .env

In [5]:
import os
from census import Census
import dotenv

dotenv.load_dotenv()

key = os.getenv('APIKEY')

c = Census(key)

In [6]:
%%R 

# load variables for table B05005
load_variables(2020, "acs5", cache = TRUE) %>% 
    # check if name contains B05005
    filter(str_detect(name, "B05006")) %>% 
    # but does not contain "PR"
    filter(!str_detect(name, "PR")) 

# A tibble: 168 × 4
   name       label                                            concept geography
   <chr>      <chr>                                            <chr>   <chr>    
 1 B05006_001 Estimate!!Total:                                 PLACE … tract    
 2 B05006_002 Estimate!!Total:!!Europe:                        PLACE … tract    
 3 B05006_003 Estimate!!Total:!!Europe:!!Northern Europe:      PLACE … tract    
 4 B05006_004 Estimate!!Total:!!Europe:!!Northern Europe:!!De… PLACE … tract    
 5 B05006_005 Estimate!!Total:!!Europe:!!Northern Europe:!!Ir… PLACE … tract    
 6 B05006_006 Estimate!!Total:!!Europe:!!Northern Europe:!!No… PLACE … tract    
 7 B05006_007 Estimate!!Total:!!Europe:!!Northern Europe:!!Sw… PLACE … tract    
 8 B05006_008 Estimate!!Total:!!Europe:!!Northern Europe:!!Un… PLACE … tract    
 9 B05006_009 Estimate!!Total:!!Europe:!!Northern Europe:!!Un… PLACE … tract    
10 B05006_010 Estimate!!Total:!!Europe:!!Northern Europe:!!Un… PLACE … tract    
# ℹ 158 

In [7]:
%%R

# load variables for table B05006
b05005_vars <- load_variables(2020, "acs5", cache = TRUE) %>%
    # check if name contains B05006
    filter(str_detect(name, "B05006")) %>%
    # but does not contain "PR"
    filter(!str_detect(name, "PR"))

# Print all rows of the filtered data
print(b05005_vars, n = Inf)

# A tibble: 168 × 4
    name       label                                           concept geography
    <chr>      <chr>                                           <chr>   <chr>    
  1 B05006_001 Estimate!!Total:                                PLACE … tract    
  2 B05006_002 Estimate!!Total:!!Europe:                       PLACE … tract    
  3 B05006_003 Estimate!!Total:!!Europe:!!Northern Europe:     PLACE … tract    
  4 B05006_004 Estimate!!Total:!!Europe:!!Northern Europe:!!D… PLACE … tract    
  5 B05006_005 Estimate!!Total:!!Europe:!!Northern Europe:!!I… PLACE … tract    
  6 B05006_006 Estimate!!Total:!!Europe:!!Northern Europe:!!N… PLACE … tract    
  7 B05006_007 Estimate!!Total:!!Europe:!!Northern Europe:!!S… PLACE … tract    
  8 B05006_008 Estimate!!Total:!!Europe:!!Northern Europe:!!U… PLACE … tract    
  9 B05006_009 Estimate!!Total:!!Europe:!!Northern Europe:!!U… PLACE … tract    
 10 B05006_010 Estimate!!Total:!!Europe:!!Northern Europe:!!U… PLACE … tract    
 11 B050

In [8]:
%%R -o b05005_vars

# Set the width option to a large number
options(width = 1000)

# load variables for table B05006
b05005_vars <- load_variables(2020, "acs5", cache = TRUE) %>%
    # check if name contains B05006
    filter(str_detect(name, "B05006")) %>%
    # but does not contain "PR"
    filter(!str_detect(name, "PR"))

# Print all rows of the filtered data
print(b05005_vars, n = Inf)

# A tibble: 168 × 4
    name       label                                                                                                                                  concept                                                             geography
    <chr>      <chr>                                                                                                                                  <chr>                                                               <chr>    
  1 B05006_001 Estimate!!Total:                                                                                                                       PLACE OF BIRTH FOR THE FOREIGN-BORN POPULATION IN THE UNITED STATES tract    
  2 B05006_002 Estimate!!Total:!!Europe:                                                                                                              PLACE OF BIRTH FOR THE FOREIGN-BORN POPULATION IN THE UNITED STATES tract    
  3 B05006_003 Estimate!!Total:!!Europe:!!Northern Europe:          

In [9]:
%%R 

nyc_census_data <- get_acs(
    geography = "state", 
    variables = c(
        Denmark = "B05006_004",
        Ireland = "B05006_005",
        Norway = "B05006_006",
        Sweden = "B05006_007",
        United_Kingdom = "B05006_008",
        Austria = "B05006_014",
        Belgium = "B05006_015",
        France = "B05006_016",
        Germany = "B05006_017",
        Netherlands = "B05006_018",
        Switzerland = "B05006_019",
        Greece = "B05006_022",
        Italy = "B05006_023",
        Portugal = "B05006_024",
        Spain = "B05006_026",
        Albania = "B05006_029",
        Belarus = "B05006_030",
        Bosnia_and_Herzegovina = "B05006_031",
        Bulgaria = "B05006_032",
        Croatia = "B05006_033",
        Czechoslovakia = "B05006_034",
        Hungary = "B05006_035",
        Latvia = "B05006_036",
        Lithuania = "B05006_037",
        Moldova = "B05006_038",
        North_Macedonia = "B05006_039",
        Poland = "B05006_040",
        Romania = "B05006_041",
        Russia = "B05006_042",
        Serbia = "B05006_043",
        Ukraine = "B05006_044",
        China = "B05006_049",
        Hong_Kong = "B05006_051",
        Taiwan = "B05006_052",
        Japan = "B05006_053",
        Korea = "B05006_054",
        Afghanistan = "B05006_057",
        Bangladesh = "B05006_058",
        India = "B05006_059",
        Iran = "B05006_060",
        Kazakhstan = "B05006_061",
        Nepal = "B05006_062",
        Pakistan = "B05006_063",
        Sri_Lanka = "B05006_064",
        Uzbekistan = "B05006_065",
        Burma = "B05006_068",
        Cambodia = "B05006_069",
        Indonesia = "B05006_070",
        Laos = "B05006_071",
        Malaysia = "B05006_072",
        Philippines = "B05006_073",
        Singapore = "B05006_074",
        Thailand = "B05006_075",
        Vietnam = "B05006_076",
        Armenia = "B05006_079",
        Iraq = "B05006_080",
        Israel = "B05006_081",
        Jordan = "B05006_082",
        Kuwait = "B05006_083",
        Lebanon = "B05006_084",
        Saudi_Arabia = "B05006_085",
        Syria = "B05006_086",
        Turkey = "B05006_087",
        Yemen = "B05006_088",
        Eritrea = "B05006_093",
        Ethiopia = "B05006_094",
        Kenya = "B05006_095",
        Somalia = "B05006_096",
        Uganda = "B05006_097",
        Zimbabwe = "B05006_098",
        Cameroon = "B05006_101",
        Congo = "B05006_102",
        Democratic_Republic_of_Congo = "B05006_103",
        Egypt = "B05006_106",
        Morocco = "B05006_107",
        Sudan = "B05006_108",
        South_Africa = "B05006_111",
        Cabo_Verde = "B05006_114",
        Ghana = "B05006_115",
        Liberia = "B05006_116",
        Nigeria = "B05006_117",
        Senegal = "B05006_118",
        Sierra_Leone = "B05006_119",
        Australia = "B05006_124",
        Fiji = "B05006_126",
        Bahamas = "B05006_132",
        Barbados = "B05006_133",
        Cuba = "B05006_134",
        Dominica = "B05006_135",
        Dominican_Republic = "B05006_136",
        Grenada = "B05006_137",
        Haiti = "B05006_138",
        Jamaica = "B05006_139",
        St_Vincent_and_the_Grenadines = "B05006_140",
        Trinidad_and_Tobago = "B05006_141",
        Belize = "B05006_145", 
        Costa_Rica = "B05006_146",
        El_Salvador = "B05006_147",
        Guatemala = "B05006_148",
        Honduras = "B05006_149",
        Mexico = "B05006_150",
        Nicaragua = "B05006_151",
        Panama = "B05006_152",
        Argentina = "B05006_155",
        Bolivia = "B05006_156",
        Brazil = "B05006_157",
        Chile = "B05006_158",
        Colombia = "B05006_159",
        Ecuador = "B05006_160",
        Guyana = "B05006_161",
        Peru = "B05006_162",
        Uruguay = "B05006_163",
        Venezuela = "B05006_164",
        Canada = "B05006_167"
    ), 
    year = 2020,
    survey = "acs5",
    geometry = TRUE
)

nyc_census_data


Simple feature collection with 5928 features and 5 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.1467 ymin: 17.88328 xmax: 179.7785 ymax: 71.38782
Geodetic CRS:  NAD83
First 10 features:
   GEOID       NAME       variable estimate moe                       geometry
1     35 New Mexico        Denmark       57  40 MULTIPOLYGON (((-109.0502 3...
2     35 New Mexico        Ireland      175 121 MULTIPOLYGON (((-109.0502 3...
3     35 New Mexico         Norway       99  68 MULTIPOLYGON (((-109.0502 3...
4     35 New Mexico         Sweden      101  65 MULTIPOLYGON (((-109.0502 3...
5     35 New Mexico United_Kingdom     3265 526 MULTIPOLYGON (((-109.0502 3...
6     35 New Mexico        Austria      218  93 MULTIPOLYGON (((-109.0502 3...
7     35 New Mexico        Belgium      163  85 MULTIPOLYGON (((-109.0502 3...
8     35 New Mexico         France      628 284 MULTIPOLYGON (((-109.0502 3...
9     35 New Mexico        Germany     4629 831 MULTIPOLYGON (((-109.

Getting data from the 2016-2020 5-year ACS
Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.


In [10]:
import pandas as pd

In [14]:
%%R

nyc_census_data_transformed <- nyc_census_data %>%
  group_by(NAME) %>%
  summarise(across(-geometry, max, na.rm = TRUE), .groups = 'drop') %>%
  arrange(desc(value)) %>%
  slice(1) %>%
  ungroup()

Error in `st_as_sf()`:
ℹ In argument: `..1 = value`.
Caused by error:
! object 'value' not found
Run `rlang::last_trace()` to see where the error occurred.
ℹ In argument: `across(-geometry, max, na.rm = TRUE)`.
ℹ In group 52: `GEOID = "72"`.
! no non-missing arguments to max; returning -Inf
Error in st_as_sf(NextMethod(), sf_column_name = sf_column_name) : 
Caused by error:
! object 'value' not found


RInterpreterError: Failed to parse and evaluate line "\nnyc_census_data_transformed <- nyc_census_data %>%\n  group_by(GEOID) %>%\n  summarise(across(-geometry, max, na.rm = TRUE), .groups = 'drop') %>%\n  arrange(desc(value)) %>%\n  slice(1) %>%\n  ungroup()\n".
R error message: "Error in st_as_sf(NextMethod(), sf_column_name = sf_column_name) : \nCaused by error:\n! object 'value' not found"
R stdout:
Warning message:
There were 2 warnings in `summarise()`.
The first warning was:
ℹ In argument: `across(-geometry, max, na.rm = TRUE)`.
ℹ In group 52: `GEOID = "72"`.
Caused by warning in `fn()`:
! no non-missing arguments to max; returning -Inf
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

In [12]:
# %%R

# # Merge with state geometries
# map_data <- left_join(us_states_sf, nyc_census_data_transformed, by = "state")

# # Plot
# ggplot(map_data) +
#   geom_sf(aes(fill = largest_population_country)) +
#   scale_fill_viridis_d() +
#   labs(title = "Main Country of Foreign-Born Population by State")

In [None]:
%%R

mapped_data <- left_join(us_states_sf, nyc_census_data_transformed, by = "state_identifier")

# Now 'mapped_data' should have both the data and geometries needed for mapping
# Example map plotting
ggplot(data = mapped_data) +
  geom_sf(aes(fill = main_country), color = "white") + # Replace 'main_country' with actual column name
  labs(title = "Main Country of Foreign-Born Population by State", fill = "Country") +
  theme_minimal()
