In [1]:
# Use the census to count the dating pool in your town

In [2]:
# data science imports
import pandas as pd
import matplotlib.pyplot as plt

# census data imports
import censusdis.data as ced
import censusdis.maps as cem
from censusdis import states

# other imports
import os

In [3]:
# Load the API key
with open("../census_api_key.txt", "r") as f:
    CENSUS_API_KEY = f.read().strip()

# print(CENSUS_API_KEY)

## The censusdis.states module

In [4]:
# the states module is just a bunch of useful lists and lookup tables
states.ALL_STATES_AND_DC[0:5]

['01', '02', '04', '05', '06']

In [5]:
# Try it out
states.MN

'27'

In [6]:
# Convert from state name to abbreviation
states.IDS_FROM_ABBREVIATIONS["MN"]

'27'

In [7]:
# Also the other way
states.ABBREVIATIONS_FROM_IDS["27"]

'MN'

# Counting the dating pool
Inspired by Jonathan Soma's "New, Interactive Singles Map": http://www.jonathansoma.com/singles/

In [25]:
# Table for marital status by age
table = "B12002"

# set your preference
# My friend is in her early 40s, so we'll look at 35 to 49
variables = {
    "NAME": "name",
    "B12002_001E": "total",
    "B12002_002E": "total_males",
    "B12002_009E": "35_39",
    "B12002_010E": "40_44",
    "B12002_011E": "45_49",
}

In [26]:
# Explore the table
ced.variables.group_tree(
    "acs/acs5",
    2023,
    group_name = table,
)

+ Estimate
    + Total: (B12002_001E)
        + Male: (B12002_002E)
            + Never married: (B12002_003E)
                + 15 to 17 years (B12002_004E)
                + 18 and 19 years (B12002_005E)
                + 20 to 24 years (B12002_006E)
                + 25 to 29 years (B12002_007E)
                + 30 to 34 years (B12002_008E)
                + 35 to 39 years (B12002_009E)
                + 40 to 44 years (B12002_010E)
                + 45 to 49 years (B12002_011E)
                + 50 to 54 years (B12002_012E)
                + 55 to 59 years (B12002_013E)
                + 60 to 64 years (B12002_014E)
                + 65 to 74 years (B12002_015E)
                + 75 to 84 years (B12002_016E)
                + 85 years and over (B12002_017E)
            + Now married: (B12002_018E)
                + Married, spouse present: (B12002_019E)
                    + 15 to 17 years (B12002_020E)
                    + 18 and 19 years (B12002_021E)
                    + 20 t

## Finding Cities in the Census

In [None]:
# In the census, cities are called "Places"
# You can find a FIPS here: https://www.census.gov/geo/reference/codes/place.html

# Use CensusDis to find FIPS
# Tip: use ACS1 to naturally filter for places with populations > 65,000
ced.download(
    "acs/acs1",
    2023,
    download_variables=["NAME"],
    state=states.NC,
    place="*",
)

Unnamed: 0,STATE,PLACE,NAME
0,37,1520,"Apex town, North Carolina"
1,37,2140,"Asheville city, North Carolina"
2,37,10740,"Cary town, North Carolina"
3,37,12000,"Charlotte city, North Carolina"
4,37,14100,"Concord city, North Carolina"
5,37,19000,"Durham city, North Carolina"
6,37,22920,"Fayetteville city, North Carolina"
7,37,25580,"Gastonia city, North Carolina"
8,37,28000,"Greensboro city, North Carolina"
9,37,28080,"Greenville city, North Carolina"


## Analyze Asheville, NC

In [28]:
# Set your city
asheville = "02140"

In [35]:
# use what we know to download the dating pool for Asheville, NC
pool = ced.download(
    # Have to use five because not all tables are available in 1-year estimates
    "acs/acs5",
    2023,
    download_variables = variables.keys(),
    state = states.NC,
    place = asheville,
    api_key = CENSUS_API_KEY
).rename(
    columns = variables
)

pool.head()

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49
0,37,2140,"Asheville city, North Carolina",81494,38767,2036,1061,867


In [None]:
# add up the total eligibility
pool["eligible"] = pool["35_39"] + pool["40_44"] + pool["45_49"]
pool

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49,eligible
0,37,2140,"Asheville city, North Carolina",81494,38767,2036,1061,867,3964


In [None]:
# calculate the percent
pool["eligible"]/pool["total_males"]

0    0.102252
dtype: float64

## Try another city

In [39]:
# try for another place

# same as above, find the city of interest
ced.download(
    "acs/acs1",
    2023,
    download_variables=["NAME"],
    state=states.MN,
    place="*",
    api_key = CENSUS_API_KEY
)

Unnamed: 0,STATE,PLACE,NAME
0,27,6382,"Blaine city, Minnesota"
1,27,6616,"Bloomington city, Minnesota"
2,27,7966,"Brooklyn Park city, Minnesota"
3,27,17000,"Duluth city, Minnesota"
4,27,17288,"Eagan city, Minnesota"
5,27,35180,"Lakeville city, Minnesota"
6,27,40166,"Maple Grove city, Minnesota"
7,27,43000,"Minneapolis city, Minnesota"
8,27,51730,"Plymouth city, Minnesota"
9,27,54880,"Rochester city, Minnesota"


In [None]:
# Locate Minneapolis from the table
minneapolis = "43000"

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49
0,27,43000,"Minneapolis city, Minnesota",358609,183837,8236,5334,3233


In [None]:
# download the Minneapolis data
mn_pool = ced.download(
    "acs/acs5",
    2023,
    download_variables = variables.keys(),
    state = states.MN,
    place = minneapolis,
    api_key = CENSUS_API_KEY
).rename(
    columns = variables
)

mn_pool.head()

In [40]:
# add up the total eligibility
mn_pool["eligible"] = mn_pool["35_39"] + mn_pool["40_44"] + mn_pool["45_49"]

mn_pool

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49,eligible
0,27,43000,"Minneapolis city, Minnesota",358609,183837,8236,5334,3233,16803


In [54]:
# calculate the percent
mn_pool["eligible"]/mn_pool["total_males"] * 100

# see that MN is not any better

0    9.140162
dtype: float64

## Compare Across Major Cities

In [50]:
# what's actually normal?

# download the data for all cities
all_cities = ced.download(
    "acs/acs5",
    2023,
    download_variables = variables.keys(),
    state = "*",
    place = "*",
    api_key = CENSUS_API_KEY
).rename(
    columns = variables
)

# filter for city size
all_cities = all_cities[all_cities["total"] > 10000]

# take a look
all_cities.head()

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49
5,1,820,"Alabaster city, Alabama",26763,12646,63,99,41
6,1,988,"Albertville city, Alabama",16767,7930,13,70,64
7,1,1132,"Alexander City city, Alabama",11826,5826,122,204,65
14,1,1852,"Anniston city, Alabama",17883,8148,146,166,162
23,1,2956,"Athens city, Alabama",22820,10671,58,203,213


In [55]:
# build the stats

# total the eligibility
all_cities["eligible"] = all_cities["35_39"] + all_cities["40_44"] + all_cities["45_49"]

# calculate the percent
all_cities["percent_eligible"] = all_cities["eligible"]/all_cities["total_males"] * 100

# take a look by percent
all_cities.sort_values("percent_eligible", ascending = False).head()


Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49,eligible,percent_eligible
3593,6,84410,"West Hollywood city, California",33778,18782,1721,892,1217,3830,20.391865
1613,5,24430,"Forrest City city, Arkansas",10403,6241,565,483,150,1198,19.195642
17209,34,7600,"Bridgeton city, New Jersey",19481,10708,924,643,386,1953,18.2387
2352,6,16224,"Corcoran city, California",18717,12982,1007,736,530,2273,17.508858
12128,24,41250,"Hyattsville city, Maryland",17053,9017,779,581,217,1577,17.489187


In [59]:
# use a shortcut method to check the distribution
all_cities["percent_eligible"].describe()

# The everage is pretty low
# Check the upper quartile

count    3543.000000
mean        5.944006
std         2.694864
min         0.000000
25%         4.024419
50%         5.718340
75%         7.496889
max        20.391865
Name: percent_eligible, dtype: float64

---
---
---