# 2022 Appropriations earmarks

## Python tools and notebook config

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import pytz

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 5000
pd.options.display.max_colwidth = None

---

## Party labels

In [5]:
members = pd.read_csv("output/members_of_congress_117.csv")

In [6]:
house_gop = members[(members["chamber"] == "house") & (members["party"] == "R")][
    "last_name"
].to_list()

In [7]:
house_dems = members[(members["chamber"] == "house") & (members["party"] == "D")][
    "last_name"
].to_list()

In [8]:
senate_gop = members[(members["chamber"] == "senate") & (members["party"] == "R")][
    "last_name"
].to_list()

In [9]:
senate_dems = members[(members["chamber"] == "senate") & (members["party"] == "D")][
    "last_name"
].to_list()

---

## Read data

In [10]:
src = pd.read_csv("data/raw/earmarks_raw.csv", dtype={"amount": int})

In [11]:
df = src.fillna("").copy()

---

## Assign party labels

In [12]:
df_obj = df.select_dtypes(["object"])
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())

In [13]:
def asign_party(dataframe):
    if dataframe["house_requestor"] in house_dems:
        return "D"
    elif dataframe["house_requestor"] in house_gop:
        return "R"
    elif dataframe["senate_requestor"] in senate_dems:
        return "D"
    elif dataframe["senate_requestor"] in senate_gop:
        return "R"
    elif (dataframe["senate_requestor"] == "King") | (
        dataframe["senate_requestor"] == "Sanders"
    ):
        return "I"
    else:
        return "Unknown"

In [14]:
df["party"] = df.apply(asign_party, axis=1)

In [15]:
df.party.value_counts()

D          3611
R          1125
Unknown     169
I            64
Name: party, dtype: int64

In [16]:
known_party = df[df["party"] != "Unknown"]

In [17]:
unknown_party = pd.read_csv("data/raw/members_unknown_party.csv", encoding="Latin-1")

In [18]:
clean_df = pd.concat([known_party, unknown_party])

In [19]:
clean_df.party.value_counts()

D    3741
R    1164
I      64
Name: party, dtype: int64

In [20]:
clean_df.to_csv("data/processed/house_senate_earmarks_2022.csv", index=False)