In [1]:
# Use the census to count the dating pool in your town

In [None]:
# data science imports
import pandas as pd
import matplotlib.pyplot as plt

# census data imports
import censusdis.data as ced
import censusdis.maps as cem
from censusdis import states

# other imports
import os

In [4]:
# Load the API key
with open("../census_api_key.txt", "r") as f:
    CENSUS_API_KEY = f.read().strip()

# print(CENSUS_API_KEY)

## The censusdis.states module

In [19]:
# the states module is just a bunch of useful lists and lookup tables
states.ALL_STATES_AND_DC[0:5]

['01', '02', '04', '05', '06']

In [20]:
states.MN

'27'

In [21]:
states.IDS_FROM_ABBREVIATIONS["MN"]

'27'

In [22]:
states.ABBREVIATIONS_FROM_IDS["27"]

'MN'

In [None]:
# Counting the dating pool
# Inspired by Jonathan Soma's "New, Interactive Singles Map" http://www.jonathansoma.com/singles/

In [27]:
# Table for marital status by age
table = "B12002"

In [28]:
# Explore the table
ced.variables.group_tree(
    "acs/acs5",
    2023,
    group_name = table
)

+ Estimate
    + Total: (B12002_001E)
        + Male: (B12002_002E)
            + Never married: (B12002_003E)
                + 15 to 17 years (B12002_004E)
                + 18 and 19 years (B12002_005E)
                + 20 to 24 years (B12002_006E)
                + 25 to 29 years (B12002_007E)
                + 30 to 34 years (B12002_008E)
                + 35 to 39 years (B12002_009E)
                + 40 to 44 years (B12002_010E)
                + 45 to 49 years (B12002_011E)
                + 50 to 54 years (B12002_012E)
                + 55 to 59 years (B12002_013E)
                + 60 to 64 years (B12002_014E)
                + 65 to 74 years (B12002_015E)
                + 75 to 84 years (B12002_016E)
                + 85 years and over (B12002_017E)
            + Now married: (B12002_018E)
                + Married, spouse present: (B12002_019E)
                    + 15 to 17 years (B12002_020E)
                    + 18 and 19 years (B12002_021E)
                    + 20 t

In [87]:
# set your preference
# My friend is in her early 40s, so we'll look at 35 to 49
variables = {
    "NAME": "name",
    "B12002_001E": "total",
    "B12002_002E": "total_males",
    "B12002_009E": "35_39",
    "B12002_010E": "40_44",
    "B12002_011E": "45_49",
}

In [88]:
plus_divorced = {
    "B12002_002E": "total_males",
    "B12002_179E": "35_39",
    "B12002_180E": "40_44",
    "B12002_181E": "45_49",
}

In [89]:
# In the census, cities are called "Places"
# You'll need the FIPS code of your city
# You can find it here: https://www.census.gov/geo/reference/codes/place.html

# Tip: use ACS1 to naturally filter for places with populations > 65,000
ced.download(
    "acs/acs1",
    2023,
    download_variables=["NAME"],
    state=states.NC,
    place="*",
)

Unnamed: 0,STATE,PLACE,NAME
0,37,1520,"Apex town, North Carolina"
1,37,2140,"Asheville city, North Carolina"
2,37,10740,"Cary town, North Carolina"
3,37,12000,"Charlotte city, North Carolina"
4,37,14100,"Concord city, North Carolina"
5,37,19000,"Durham city, North Carolina"
6,37,22920,"Fayetteville city, North Carolina"
7,37,25580,"Gastonia city, North Carolina"
8,37,28000,"Greensboro city, North Carolina"
9,37,28080,"Greenville city, North Carolina"


In [90]:
asheville = "02140"
# use what we know to download the dating pool for Asheville, NC
pool = ced.download(
    "acs/acs5",
    2023,
    download_variables = variables.keys(),
    state = states.NC,
    place = asheville
).rename(
    columns = variables
)

pool.head()

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49
0,37,2140,"Asheville city, North Carolina",81494,38767,2036,1061,867


In [91]:
pool["eligible"] = pool["35_39"] + pool["40_44"] + pool["45_49"]
pool

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49,eligible
0,37,2140,"Asheville city, North Carolina",81494,38767,2036,1061,867,3964


In [92]:
pool["eligible"]/pool["total_males"]

0    0.102252
dtype: float64

In [93]:
# try for another place

# same as above, find the city of interest
ced.download(
    "acs/acs1",
    2023,
    download_variables=["NAME"],
    state=states.MN,
    place="*",
)

Unnamed: 0,STATE,PLACE,NAME
0,27,6382,"Blaine city, Minnesota"
1,27,6616,"Bloomington city, Minnesota"
2,27,7966,"Brooklyn Park city, Minnesota"
3,27,17000,"Duluth city, Minnesota"
4,27,17288,"Eagan city, Minnesota"
5,27,35180,"Lakeville city, Minnesota"
6,27,40166,"Maple Grove city, Minnesota"
7,27,43000,"Minneapolis city, Minnesota"
8,27,51730,"Plymouth city, Minnesota"
9,27,54880,"Rochester city, Minnesota"


In [94]:
minneapolis = "43000"

mn_pool = ced.download(
    "acs/acs5",
    2023,
    download_variables = variables.keys(),
    state = states.MN,
    place = minneapolis
).rename(
    columns = variables
)

mn_pool.head()

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49
0,27,43000,"Minneapolis city, Minnesota",358609,183837,8236,5334,3233


In [95]:
mn_pool["eligible"] = mn_pool["35_39"] + mn_pool["40_44"] + mn_pool["45_49"]

mn_pool

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49,eligible
0,27,43000,"Minneapolis city, Minnesota",358609,183837,8236,5334,3233,16803


In [114]:
mn_pool["eligible"]/mn_pool["total_males"]

0    0.091402
dtype: float64

In [117]:
# what's actually normal?
all_cities = ced.download(
    "acs/acs5",
    2023,
    download_variables = variables.keys(),
    state = states.ALL_STATES_AND_DC,
    place = "*",
    with_geometry=True
).rename(
    columns = variables
).loc[ lambda x: x["total"] > 100000]

all_cities.sort_values("geometry")

Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49,geometry
6198,15,71550,"Urban Honolulu CDP, Hawaii",298726,148427,5935,4527,3416,"POLYGON ((-157.94837 21.31019, -157.94718 21.3..."
2311,06,13392,"Chula Vista city, California",220583,109381,3216,2051,1106,"POLYGON ((-117.12399 32.64636, -117.11707 32.6..."
3306,06,66000,"San Diego city, California",1168312,592144,21359,14600,9448,"MULTIPOLYGON (((-116.93192 32.64961, -116.9314..."
2511,06,22804,"Escondido city, California",121220,60601,2322,1503,718,"MULTIPOLYGON (((-117.02372 33.1806, -117.02199..."
2715,06,36770,"Irvine city, California",252521,122498,4384,1658,844,"POLYGON ((-117.86865 33.69005, -117.86763 33.6..."
...,...,...,...,...,...,...,...,...,...
4972,12,45975,"Miramar city, Florida",111260,53957,1927,2623,749,"POLYGON ((-80.44047 25.99318, -80.40816 25.993..."
5092,12,55775,"Pembroke Pines city, Florida",143055,67472,2168,1353,866,"MULTIPOLYGON (((-80.36538 26.03677, -80.3613 2..."
4779,12,32000,"Hollywood city, Florida",128339,65092,1842,1479,1245,"POLYGON ((-80.24852 26.03305, -80.24774 26.033..."
4683,12,24000,"Fort Lauderdale city, Florida",157006,82738,2738,2002,2186,"MULTIPOLYGON (((-80.14829 26.19879, -80.1474 2..."


In [None]:
# build the stats
all_cities["eligible"] = all_cities["35_39"] + all_cities["40_44"] + all_cities["45_49"]
all_cities["percent_eligible"] = all_cities["eligible"]/all_cities["total_males"]
all_cities.sort_values("percent_eligible", ascending = False)


Unnamed: 0,STATE,PLACE,name,total,total_males,35_39,40_44,45_49,eligible,percent_eligible,centroid
12830,26,22000,"Detroit city, Michigan",503237,233885,11917,9044,8549,29510,0.126173,POINT (-83.10358 42.38362)
21064,39,16000,"Cleveland city, Ohio",302969,145085,7262,5682,4435,17379,0.119785,POINT (-81.68053 41.47639)
11545,22,55000,"New Orleans city, Louisiana",313741,145635,8259,5110,4023,17392,0.119422,POINT (-89.92886 30.0687)
3310,06,67000,"San Francisco city, California",739303,380075,21329,14271,9545,45145,0.118779,POINT (-122.44503 37.75559)
4249,09,52000,"New Haven city, Connecticut",109134,51466,2367,2163,1414,5944,0.115494,POINT (-72.92458 41.31129)
...,...,...,...,...,...,...,...,...,...,...,...
1098,04,27400,"Gilbert town, Arizona",207483,102207,1657,1252,716,3625,0.035467,POINT (-111.74623 33.31001)
19774,37,10740,"Cary town, North Carolina",142887,69605,1378,712,336,2426,0.034854,POINT (-78.8204 35.78226)
7376,17,51622,"Naperville city, Illinois",119266,58558,780,436,561,1777,0.030346,POINT (-88.16584 41.74805)
27201,48,15976,"College Station city, Texas",104896,53864,915,358,277,1550,0.028776,POINT (-96.29595 30.58516)


In [151]:
all_cities["percent_eligible"].describe()

count    243.000000
mean       0.071670
std        0.019904
min        0.018812
25%        0.058592
50%        0.068446
75%        0.084448
max        0.126173
Name: percent_eligible, dtype: float64

---
---
---