# US Senate representation, in context

#### Load Python tools and Jupyter config

In [2]:
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd

In [3]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [4]:
today = pd.Timestamp("today").strftime("%Y%m%d")

---

## Fetch

#### Get list of US Senate members

In [20]:
senate_url = "https://www.senate.gov/senators/"

In [None]:
senators_df = (
    pd.read_html(url)[0]
    .rename(
        columns={
            "Senator's Name": "name",
            "State": "state",
            "Party": "party",
        }
    )
    .drop(["Office Room*", "Phone", "Class"], axis=1)
)

In [19]:
senators_df.head()

Unnamed: 0,name,state,party
0,"Baldwin, Tammy",Wisconsin,Democratic
1,"Barrasso, John",Wyoming,Republican
2,"Bennet, Michael F.",Colorado,Democratic
3,"Blackburn, Marsha",Tennessee,Republican
4,"Blumenthal, Richard",Connecticut,Democratic


---

#### Get population by state from Census API

In [42]:
states_url = "https://api.census.gov/data/2021/pep/population?get=DENSITY_2021,POP_2021,NAME&for=state:*"

In [43]:
states_response = requests.get(states_url)
states_data = states_response.json()

#### Convert the list of lists into a DataFrame

In [44]:
header = data[0]
rows = data[1:]

states_df = pd.DataFrame(rows, columns=header)[
    ["state", "NAME", "DENSITY_2021", "POP_2021"]
].rename(
    columns={
        "state": "fips",
        "NAME": "state",
        "DENSITY_2021": "density",
        "POP_2021": "population",
    }
)

#### Convert numeric columns to appropriate data types

In [45]:
states_df["density"] = pd.to_numeric(states_df["density"], errors="coerce")
states_df["population"] = pd.to_numeric(states_df["population"], errors="coerce")

In [None]:
states_df.head()

In [88]:
national_pop = states_df.population.sum()
national_pop

335157329

---

## Merge

#### Add population figures to US Senate directory

In [None]:
df = pd.merge(senators_df, states_df, on="state")

In [59]:
df["population_portion"] = (df["population"] / 2).round()

In [60]:
df.head()

Unnamed: 0,name,state,party,fips,density,population,population_portion
0,"Baldwin, Tammy",Wisconsin,Democratic,55,108.846155,5895908,2947954.0
1,"Barrasso, John",Wyoming,Republican,56,5.961589,578803,289402.0
2,"Bennet, Michael F.",Colorado,Democratic,8,56.081421,5812069,2906034.0
3,"Blackburn, Marsha",Tennessee,Republican,47,169.167902,6975218,3487609.0
4,"Blumenthal, Richard",Connecticut,Democratic,9,744.556991,3605597,1802798.0


---

## Analyze

#### Population by party

In [None]:
pop_party = (
    df.groupby("party")
    .agg({"fips": "count", "population_portion": "sum"})
    .rename(columns={"fips": "count", "population_portion": "pop_represented"})
    .reset_index()
)

In [85]:
pop_party["pop_per_senator"] = (
    pop_party["pop_represented"] / pop_party["count"]
).round()

In [None]:
pop_party["pop_nation_share"] = (pop_party["pop_represented"] / national_pop).round(
    2
) * 100

In [None]:
pop_party

Unnamed: 0,party,count,pop_represented,pop_per_senator,pop_nation_share
0,Democratic,47,187269255.0,3984452.0,56.0
1,Independent,4,5538547.0,1384637.0,2.0
2,Republican,49,138415894.0,2824814.0,41.0


---

## Exports

#### JSON

In [96]:
df.to_json(
    f"data/processed/senate_members_state_population.json",
    indent=4,
    orient="records",
    lines=False,
)