In [29]:
# 2023 ACS 1-YR PUMS Dashboard
# Kathleen created 5/30/25
# Last edited 6/4/25

In [30]:
# Import packages
import pandas as pd
import streamlit as st
import plotly.express as px

In [31]:
# Load data
df_inter_state = pd.read_csv('df_inter_state.csv')
df_inter_state["current_state_code"] = df_inter_state["state_code"]

In [32]:
# Remove unnecessary vars
df_inter_state = df_inter_state[["current_state", "previous_state", "sex", "age_group", "education", "marital_status", "person_weight"]]
df_inter_state = df_inter_state.dropna()

In [33]:
# Get counts grouped by current_state, sex, education, etc
inbound = df_inter_state.groupby(["current_state", "sex", "age_group", "education", "marital_status"])["person_weight"].sum().reset_index(name="count")
print(inbound)

     current_state     sex age_group                 education marital_status  \
0          Alabama  Female      0-17  Elementary/Middle school  Never Married   
1          Alabama  Female      0-17              No schooling  Never Married   
2          Alabama  Female      0-17          Some high school  Never Married   
3          Alabama  Female     18-24        Associate's degree  Never Married   
4          Alabama  Female     18-24         Bachelor's degree        Married   
...            ...     ...       ...                       ...            ...   
9946       Wyoming    Male       65+           Graduate degree      Separated   
9947       Wyoming    Male       65+      High school graduate        Married   
9948       Wyoming    Male       65+      High school graduate        Widowed   
9949       Wyoming    Male       65+              Some college       Divorced   
9950       Wyoming    Male       65+              Some college        Married   

      count  
0      3202  

In [34]:
# Get "all" counts for each variable"
# AM I MISSING ANY COMBINATIONS BELOW???
inbound_sex_all = df_inter_state.groupby(["current_state","age_group", "education", "marital_status"])["person_weight"].sum().reset_index(name="count")
inbound_sex_all["sex"] = "All"

inbound_age_all = df_inter_state.groupby(["current_state","sex", "education", "marital_status"])["person_weight"].sum().reset_index(name="count")
inbound_age_all["age_group"] = "All"

inbound_ed_all = df_inter_state.groupby(["current_state","sex", "age_group", "marital_status"])["person_weight"].sum().reset_index(name="count")
inbound_ed_all["education"] = "All"

inbound_mar_all = df_inter_state.groupby(["current_state","sex", "age_group", "education"])["person_weight"].sum().reset_index(name="count")
inbound_mar_all["marital_status"] = "All"

inbound_sa_all = df_inter_state.groupby(["current_state", "education", "marital_status"])["person_weight"].sum().reset_index(name="count")
inbound_sa_all["sex"] = "All"
inbound_sa_all["age_group"] = "All"

inbound_se_all = df_inter_state.groupby(["current_state", "age_group", "marital_status"])["person_weight"].sum().reset_index(name="count")
inbound_se_all["sex"] = "All"
inbound_se_all["education"] = "All"

inbound_sm_all = df_inter_state.groupby(["current_state", "age_group", "education"])["person_weight"].sum().reset_index(name="count")
inbound_sm_all["sex"] = "All"
inbound_sm_all["marital_status"] = "All"

inbound_ae_all = df_inter_state.groupby(["current_state", "sex", "marital_status"])["person_weight"].sum().reset_index(name="count")
inbound_ae_all["age_group"] = "All"
inbound_ae_all["education"] = "All"

inbound_am_all = df_inter_state.groupby(["current_state", "sex", "education"])["person_weight"].sum().reset_index(name="count")
inbound_am_all["age_group"] = "All"
inbound_am_all["marital_status"] = "All"

inbound_em_all = df_inter_state.groupby(["current_state", "sex", "age_group"])["person_weight"].sum().reset_index(name="count")
inbound_em_all["education"] = "All"
inbound_em_all["marital_status"] = "All"

inbound_sae_all = df_inter_state.groupby(["current_state", "marital_status"])["person_weight"].sum().reset_index(name="count")
inbound_sae_all["sex"] = "All"
inbound_sae_all["age_group"] = "All"
inbound_sae_all["education"] = "All"

inbound_sam_all = df_inter_state.groupby(["current_state", "education"])["person_weight"].sum().reset_index(name="count")
inbound_sam_all["sex"] = "All"
inbound_sam_all["age_group"] = "All"
inbound_sam_all["marital_status"] = "All"

inbound_sem_all = df_inter_state.groupby(["current_state", "age_group"])["person_weight"].sum().reset_index(name="count")
inbound_sem_all["sex"] = "All"
inbound_sem_all["education"] = "All"
inbound_sem_all["marital_status"] = "All"

inbound_aem_all = df_inter_state.groupby(["current_state", "sex"])["person_weight"].sum().reset_index(name="count")
inbound_aem_all["age_group"] = "All"
inbound_aem_all["education"] = "All"
inbound_aem_all["marital_status"] = "All"

inbound_saem_all = df_inter_state.groupby(["current_state"])["person_weight"].sum().reset_index(name="count")
inbound_saem_all["sex"] = "All"
inbound_saem_all["age_group"] = "All"
inbound_saem_all["education"] = "All"
inbound_saem_all["marital_status"] = "All"

inbound = pd.concat([inbound,inbound_sex_all, inbound_age_all, inbound_ed_all, inbound_mar_all, inbound_sa_all, inbound_se_all, inbound_sm_all, inbound_ae_all, inbound_em_all,
                      inbound_sae_all, inbound_sam_all, inbound_sem_all, inbound_aem_all, inbound_saem_all])

In [35]:
# Merge in state abbreviations
url ="https://www2.census.gov/geo/docs/reference/state.txt"
states = pd.read_csv(url, sep='|')

states["current_state"] = states["STATE_NAME"]
states["current_fips"] = states["STATE"]
states["current_state_code"] = states["STUSAB"]

inbound_states = states[["current_state", "current_state_code", "current_fips"]]

inbound = pd.merge(inbound,inbound_states,on="current_state",how="left")

inbound = inbound.dropna()
print(inbound)


       current_state     sex age_group                 education  \
0            Alabama  Female      0-17  Elementary/Middle school   
1            Alabama  Female      0-17              No schooling   
2            Alabama  Female      0-17          Some high school   
3            Alabama  Female     18-24        Associate's degree   
4            Alabama  Female     18-24         Bachelor's degree   
...              ...     ...       ...                       ...   
34120       Virginia     All       All                       All   
34121     Washington     All       All                       All   
34122  West Virginia     All       All                       All   
34123      Wisconsin     All       All                       All   
34124        Wyoming     All       All                       All   

      marital_status   count current_state_code  current_fips  
0      Never Married    3202                 AL             1  
1      Never Married    5595                 AL        

In [36]:
# Save to CSV
inbound.to_csv('inbound.csv', index=False)

In [37]:
# Test map
filtered_df = inbound[(inbound["sex"] == "Female") & (inbound["age_group"] == "0-17") & (inbound["education"] == "Elementary/Middle school") & (inbound["marital_status"] == "Never Married")]

choropleth = px.choropleth(filtered_df, locations="current_state_code", color="count", locationmode="USA-states",color_continuous_scale="reds",
                               scope="usa",
                              )

choropleth.show()

In [38]:
# Get counts grouped by current_state, sex, education, etc
outbound = df_inter_state.groupby(["previous_state", "sex", "age_group", "education", "marital_status"])["person_weight"].sum().reset_index(name="count")

In [39]:
# Get "all" counts for each variable
# AM I MISSING ANY COMBINATIONS BELOW???
outbound_sex_all = df_inter_state.groupby(["previous_state","age_group", "education", "marital_status"])["person_weight"].sum().reset_index(name="count")
outbound_sex_all["sex"] = "All"

outbound_age_all = df_inter_state.groupby(["previous_state","sex", "education", "marital_status"])["person_weight"].sum().reset_index(name="count")
outbound_age_all["age_group"] = "All"

outbound_ed_all = df_inter_state.groupby(["previous_state","sex", "age_group", "marital_status"])["person_weight"].sum().reset_index(name="count")
outbound_ed_all["education"] = "All"

outbound_mar_all = df_inter_state.groupby(["previous_state","sex", "age_group", "education"])["person_weight"].sum().reset_index(name="count")
outbound_mar_all["marital_status"] = "All"

outbound_sa_all = df_inter_state.groupby(["previous_state", "education", "marital_status"])["person_weight"].sum().reset_index(name="count")
outbound_sa_all["sex"] = "All"
outbound_sa_all["age_group"] = "All"

outbound_se_all = df_inter_state.groupby(["previous_state", "age_group", "marital_status"])["person_weight"].sum().reset_index(name="count")
outbound_se_all["sex"] = "All"
outbound_se_all["education"] = "All"

outbound_sm_all = df_inter_state.groupby(["previous_state", "age_group", "education"])["person_weight"].sum().reset_index(name="count")
outbound_sm_all["sex"] = "All"
outbound_sm_all["marital_status"] = "All"

outbound_ae_all = df_inter_state.groupby(["previous_state", "sex", "marital_status"])["person_weight"].sum().reset_index(name="count")
outbound_ae_all["age_group"] = "All"
outbound_ae_all["education"] = "All"

outbound_am_all = df_inter_state.groupby(["previous_state", "sex", "education"])["person_weight"].sum().reset_index(name="count")
outbound_am_all["age_group"] = "All"
outbound_am_all["marital_status"] = "All"

outbound_em_all = df_inter_state.groupby(["previous_state", "sex", "age_group"])["person_weight"].sum().reset_index(name="count")
outbound_em_all["education"] = "All"
outbound_em_all["marital_status"] = "All"

outbound_sae_all = df_inter_state.groupby(["previous_state", "marital_status"])["person_weight"].sum().reset_index(name="count")
outbound_sae_all["sex"] = "All"
outbound_sae_all["age_group"] = "All"
outbound_sae_all["education"] = "All"

outbound_sam_all = df_inter_state.groupby(["previous_state", "education"])["person_weight"].sum().reset_index(name="count")
outbound_sam_all["sex"] = "All"
outbound_sam_all["age_group"] = "All"
outbound_sam_all["marital_status"] = "All"

outbound_sem_all = df_inter_state.groupby(["previous_state", "age_group"])["person_weight"].sum().reset_index(name="count")
outbound_sem_all["sex"] = "All"
outbound_sem_all["education"] = "All"
outbound_sem_all["marital_status"] = "All"

outbound_aem_all = df_inter_state.groupby(["previous_state", "sex"])["person_weight"].sum().reset_index(name="count")
outbound_aem_all["age_group"] = "All"
outbound_aem_all["education"] = "All"
outbound_aem_all["marital_status"] = "All"

outbound_saem_all = df_inter_state.groupby(["previous_state"])["person_weight"].sum().reset_index(name="count")
outbound_saem_all["sex"] = "All"
outbound_saem_all["age_group"] = "All"
outbound_saem_all["education"] = "All"
outbound_saem_all["marital_status"] = "All"

outbound = pd.concat([outbound,outbound_sex_all, outbound_age_all, outbound_ed_all, outbound_mar_all, outbound_sa_all, outbound_se_all, outbound_sm_all, outbound_ae_all, outbound_em_all,
                      outbound_sae_all, outbound_sam_all, outbound_sem_all, outbound_aem_all, outbound_saem_all])

In [40]:
# Merge in state abbreviations
url ="https://www2.census.gov/geo/docs/reference/state.txt"
states = pd.read_csv(url, sep='|')

states["previous_state"] = states["STATE_NAME"]
states["previous_fips"] = states["STATE"]
states["previous_state_code"] = states["STUSAB"]

outbound_states = states[["previous_state", "previous_state_code", "previous_fips"]]

outbound = pd.merge(outbound,outbound_states,on="previous_state",how="left")

outbound = outbound.dropna()
print(outbound)

      previous_state     sex age_group                 education  \
0            Alabama  Female      0-17  Elementary/Middle school   
1            Alabama  Female      0-17      High school graduate   
2            Alabama  Female      0-17              No schooling   
3            Alabama  Female      0-17          Some high school   
4            Alabama  Female      0-17          Some high school   
...              ...     ...       ...                       ...   
34650       Virginia     All       All                       All   
34651     Washington     All       All                       All   
34652  West Virginia     All       All                       All   
34653      Wisconsin     All       All                       All   
34654        Wyoming     All       All                       All   

      marital_status   count previous_state_code  previous_fips  
0      Never Married    2579                  AL              1  
1      Never Married      49                  AL   

In [41]:
# Save to CSV
outbound.to_csv('outbound.csv', index=False)

In [42]:
# Test map
filtered_df2 = outbound[(outbound["sex"] == "All") & (outbound["age_group"] == "All") & (outbound["education"] == "All") & (outbound["marital_status"] == "All")]

choropleth = px.choropleth(filtered_df2, locations="previous_state_code", color="count", locationmode="USA-states",color_continuous_scale="reds",
                               scope="usa",
                              )

choropleth.show()