# U.S. Census apportionment data in 2010 and 2020

In [1]:
%load_ext lab_black

In [36]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
import matplotlib.pyplot as plt

%matplotlib inline

In [37]:
from vega_datasets import data

states = alt.topo_feature(data.us_10m.url, "states")

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the data from the U.S. Census

In [4]:
# https://www.census.gov/data/tables/2010/dec/2010-apportionment-data.html

In [144]:
url2010 = "https://www2.census.gov/programs-surveys/decennial/2010/data/apportionment/apport2010-table1.xls"

In [145]:
df_2010 = pd.read_excel(url2010, encoding="Latin-1", skiprows=10, skipfooter=6)

In [146]:
df_2010.columns = ["state", "population", "drop1", "seats", "drop2", "seats_change"]

In [147]:
df_2010.drop(["drop1", "drop2"], axis=1, inplace=True)

In [148]:
df_2010.tail(10)

Unnamed: 0,state,population,seats,seats_change
40,South Dakota,819761,1,0
41,Tennessee,6375431,9,0
42,Texas,25268418,36,4
43,Utah,2770765,4,1
44,Vermont,630337,1,0
45,Virginia,8037736,11,0
46,Washington,6753369,10,1
47,West Virginia,1859815,3,0
48,Wisconsin,5698230,8,0
49,Wyoming,568300,1,0


In [149]:
df_2010["year"] = "2010"

In [150]:
url2020 = "https://www2.census.gov/programs-surveys/decennial/2020/data/apportionment/apportionment-2020-table01.xlsx"

In [192]:
df_2020 = pd.read_excel(url2020, encoding="Latin-1", skiprows=3, skipfooter=2)

In [193]:
df_2020.tail()

Unnamed: 0,STATE,"APPORTIONMENT POPULATION \n(APRIL 1, 2020)",NUMBER OF APPORTIONED REPRESENTATIVES BASED ON \n2020 CENSUS2,CHANGE FROM \n2010 CENSUS APPORTIONMENT
45,Virginia,8654542,11,0
46,Washington,7715946,10,0
47,West Virginia,1795045,2,-1
48,Wisconsin,5897473,8,0
49,Wyoming,577719,1,0


In [194]:
df_2020.columns = ["state", "population", "seats", "seats_change"]

In [195]:
df_2020["year"] = "2020"

In [196]:
df = pd.concat([df_2010, df_2020])

In [197]:
df_2020

Unnamed: 0,state,population,seats,seats_change,year
0,Alabama,5030053,7,0,2020
1,Alaska,736081,1,0,2020
2,Arizona,7158923,9,0,2020
3,Arkansas,3013756,4,0,2020
4,California,39576757,52,-1,2020
5,Colorado,5782171,8,1,2020
6,Connecticut,3608298,5,0,2020
7,Delaware,990837,1,0,2020
8,Florida,21570527,28,1,2020
9,Georgia,10725274,14,0,2020


---

### Historical apportionment

In [336]:
hist_df = pd.read_excel(
    "https://www2.census.gov/programs-surveys/decennial/2020/data/apportionment/apportionment-2020-tableC2.xlsx",
    skiprows=4,
    skipfooter=3,
)

In [337]:
hist_df.tail()

Unnamed: 0.1,Unnamed: 0,2020: Apportionment population,2020: \nNumber of representatives,2020: \nSeat change,2020: Average persons per representative,2010: Apportionment population,2010: \nNumber of representatives,2010: \nSeat change,2010: Average persons per representative,Unnamed: 9,2000: Apportionment population,2000: \nNumber of representatives,2000: \nSeat change,2000: Average persons per representative,1990: Apportionment population,1990: \nNumber of representatives,1990: \nSeat change,1990: Average persons per representative,Unnamed: 18,1980: Apportionment population,1980: \nNumber of representatives,1980: \nSeat change,1980: Average persons per representative,1970: Apportionment population,1970: \nNumber of representatives,...,1960: Apportionment population,1960: \nNumber of representatives,1960: \nSeat change,1960: Average persons per representative,1950: Apportionment population,1950: \nNumber of representatives,1950: \nSeat change,1950: Average persons per representative,Unnamed: 36,1940: Apportionment population,1940: \nNumber of representatives,1940: \nSeat change,1940: Average persons per representative,1930: Apportionment population,1930: \nNumber of representatives,1930: \nSeat change,1930: Average persons per representative,Unnamed: 45,1920: Apportionment population,1920: \nNumber of representatives,1920: Average persons per representative,1910: Apportionment population,1910: \nNumber of representatives,1910: \nSeat change\n(since 1900),1910: Average persons per representative
45,Virginia,8654542,11,0,786777,8037736,11,0,730703,Virginia,7100702,11,0,645518,6216568,11,1,565143,Virginia,5346279,10,0,534628,4690742,10,...,3966949,10,0,396695,3318680,10,1,331868,Virginia,2677773,9,0,297530,2421829,9,-1,269092,Virginia,2309187,10,230919,2061612,10,0,206161
46,Washington,7715946,10,0,771595,6753369,10,1,675337,Washington,5908684,9,0,656520,4887941,9,1,543105,Washington,4130163,8,1,516270,3443487,7,...,2853214,7,0,407602,2378963,7,1,339852,Washington,1736191,6,0,289365,1552423,6,1,258737,Washington,1354596,5,270919,1140134,5,2,228027
47,West Virginia,1795045,2,-1,897523,1859815,3,0,619938,West Virginia,1813077,3,0,604359,1801625,3,-1,600542,West Virginia,1949644,4,0,487411,1763331,4,...,1860421,5,-1,372084,2005552,6,0,334259,West Virginia,1901974,6,0,316996,1729199,6,0,288200,West Virginia,1463701,6,243950,1221119,6,1,203520
48,Wisconsin,5897473,8,0,737184,5698230,8,0,712279,Wisconsin,5371210,8,-1,671401,4906745,9,0,545194,Wisconsin,4705335,9,0,522815,4447013,9,...,3951777,10,0,395178,3434575,10,0,343458,Wisconsin,3137587,10,0,313759,2931721,10,-1,293172,Wisconsin,2631305,11,239210,2332853,11,0,212078
49,Wyoming,577719,1,0,577719,568300,1,0,568300,Wyoming,495304,1,0,495304,455975,1,0,455975,Wyoming,470816,1,0,470816,335719,1,...,330066,1,0,330066,290529,1,0,290529,Wyoming,250742,1,0,250742,223630,1,0,223630,Wyoming,193487,1,193487,144658,1,0,144658


In [338]:
hist_df.columns = [
    "name",
    "2020pop",
    "2020reps",
    "2020change",
    "2020pop_per_rep",
    "2010pop",
    "2010reps",
    "2010change",
    "2010pop_per_rep",
    "name2000",
    "2000pop",
    "2000reps",
    "2000change",
    "2000pop_per_rep",
    "1990pop",
    "1990reps",
    "1990change",
    "1990pop_per_rep",
    "name1980",
    "1980pop",
    "1980reps",
    "1980change",
    "1980pop_per_rep",
    "1970pop",
    "1970reps",
    "1970change",
    "1970pop_per_rep",
    "name1960",
    "1960pop",
    "1960reps",
    "1960change",
    "1960pop_per_rep",
    "1950pop",
    "1950reps",
    "1950change",
    "1950pop_per_rep",
    "name1940",
    "1940pop",
    "1940reps",
    "1940change",
    "1940pop_per_rep",
    "1930pop",
    "1930reps",
    "1930change",
    "1930pop_per_rep",
    "name1920",
    "1920pop",
    "1920reps",
    "1920pop_per_rep",
    "1910pop",
    "1910reps",
    "1910change",
    "1910pop_per_rep",
]

In [339]:
hist_df = hist_df[
    [
        "name",
        "2020pop",
        "2020reps",
        "2020change",
        "2020pop_per_rep",
        "2010pop",
        "2010reps",
        "2010change",
        "2010pop_per_rep",
        "2000pop",
        "2000reps",
        "2000change",
        "2000pop_per_rep",
        "1990pop",
        "1990reps",
        "1990change",
        "1990pop_per_rep",
        "1980pop",
        "1980reps",
        "1980change",
        "1980pop_per_rep",
        "1970pop",
        "1970reps",
        "1970change",
        "1970pop_per_rep",
        "1960pop",
        "1960reps",
        "1960change",
        "1960pop_per_rep",
        "1950pop",
        "1950reps",
        "1950change",
        "1950pop_per_rep",
        "1940pop",
        "1940reps",
        "1940change",
        "1940pop_per_rep",
        "1930pop",
        "1930reps",
        "1930change",
        "1930pop_per_rep",
        "1920pop",
        "1920reps",
        "1920pop_per_rep",
        "1910pop",
        "1910reps",
        "1910change",
        "1910pop_per_rep",
    ]
]

In [340]:
hist_df_melt = pd.melt(
    hist_df,
    id_vars="name",
    value_vars=[
        "2020pop",
        "2020reps",
        "2020change",
        "2020pop_per_rep",
        "2010pop",
        "2010reps",
        "2010change",
        "2010pop_per_rep",
        "2000pop",
        "2000reps",
        "2000change",
        "2000pop_per_rep",
        "1990pop",
        "1990reps",
        "1990change",
        "1990pop_per_rep",
        "1980pop",
        "1980reps",
        "1980change",
        "1980pop_per_rep",
        "1970pop",
        "1970reps",
        "1970change",
        "1970pop_per_rep",
        "1960pop",
        "1960reps",
        "1960change",
        "1960pop_per_rep",
        "1950pop",
        "1950reps",
        "1950change",
        "1950pop_per_rep",
        "1940pop",
        "1940reps",
        "1940change",
        "1940pop_per_rep",
        "1930pop",
        "1930reps",
        "1930change",
        "1930pop_per_rep",
        "1920pop",
        "1920reps",
        "1920pop_per_rep",
        "1910pop",
        "1910reps",
        "1910change",
        "1910pop_per_rep",
    ],
)

In [341]:
hist_df_melt["year"] = hist_df_melt["variable"].str[:4]

In [342]:
hist_df_melt["category"] = hist_df_melt["variable"].str.replace("\d+", "", regex=True)

In [343]:
hist_df_melt.drop(["variable"], axis=1, inplace=True)

In [344]:
hist_df_melt.head()

Unnamed: 0,name,value,year,category
0,Alabama,5030053,2020,pop
1,Alaska,736081,2020,pop
2,Arizona,7158923,2020,pop
3,Arkansas,3013756,2020,pop
4,California,39576757,2020,pop


In [345]:
hist_df_ca = hist_df_melt[hist_df_melt["name"] == "California"].copy()

In [355]:
hist_df_ca_pivot = pd.pivot_table(
    hist_df_ca,
    values="value",
    index=["name", "year"],
    columns="category",
    aggfunc=sum,
    fill_value=0,
).reset_index()

In [359]:
hist_df_ca_pivot.head(10)

category,name,year,change,pop,pop_per_rep,reps
0,California,1910,3,2376561,216051,11
1,California,1920,0,3426031,311457,11
2,California,1930,9,5668241,283412,20
3,California,1940,3,6907387,300321,23
4,California,1950,7,10586223,352874,30
5,California,1960,8,15717204,413611,38
6,California,1970,5,20098863,467415,43
7,California,1980,2,23668562,525968,45
8,California,1990,7,29839250,573832,52
9,California,2000,1,33930798,640204,53


In [362]:
bars = alt.Chart(hist_df_ca_pivot).mark_bar().encode(y="change:Q", x="year:O")

text = bars.mark_text(
    align="left",
    baseline="middle",
    dy=-10,  # Nudges text to right so it doesn't appear on top of the bar
    dx=-3,
).encode(text="change:Q")

(bars + text).properties(height=400)

In [363]:
lines = alt.Chart(hist_df_ca_pivot).mark_line().encode(x="year:O", y="pop:Q")

(lines).properties(height=400)

---

## Geography

### States map

In [172]:
state_geo = gpd.read_file("raw/states.geojson")
state_geo.columns = state_geo.columns.str.lower()

### Add A.P. states

In [173]:
ap_states = pd.read_csv("raw/ap_states.csv")

In [174]:
states_merge = state_geo.merge(ap_states, left_on="stusps", right_on="usps")

### Clean up

In [175]:
states_merge.drop(
    ["name_x", "stusps", "statefp", "statens", "affgeoid", "lsad", "aland", "awater"],
    axis=1,
    inplace=True,
)

In [176]:
states_merge.rename(columns={"name_y": "name"}, inplace=True)

In [177]:
states_merge = states_merge[["geoid", "name", "usps", "ap", "geometry"]]

In [178]:
states_merge.head()

Unnamed: 0,geoid,name,usps,ap,geometry
0,23,Maine,ME,Maine,"MULTIPOLYGON (((-68.92401 43.88541, -68.87478 ..."
1,15,Hawaii,HI,Hawaii,"MULTIPOLYGON (((-156.04965 19.78045, -156.0062..."
2,4,Arizona,AZ,Ariz.,"MULTIPOLYGON (((-114.79968 32.59362, -114.8093..."
3,5,Arkansas,AR,Ark.,"MULTIPOLYGON (((-94.61792 36.49941, -94.36120 ..."
4,10,Delaware,DE,Del.,"MULTIPOLYGON (((-75.77379 39.72220, -75.75323 ..."


---

## Export 

In [179]:
df_2010.to_csv("processed/apportionment/apportionment_2010.csv", index=False)

In [198]:
df_2020.to_csv("processed/apportionment/apportionment_2020.csv", index=False)

In [364]:
df.to_csv("processed/apportionment/apportionment_2010_2020.csv", index=False)

In [365]:
hist_df_ca_pivot.to_csv(
    "processed/apportionment/apportionment_in_ca_1910_2020.csv", index=False
)

In [92]:
states_merge.to_file("../../../data/GIS/states_geo_ap.geojson", driver="GeoJSON")