In [1]:
import os
import bln
import pandas as pd

In [2]:
bln.pandas.register(pd)

In [10]:
def get_payees(file_name: str) -> None:
    # Get the table from biglocalnews.org via its API.
    print(f"Downloading {file_name}")
    df = pd.read_bln(
        "UHJvamVjdDo2MDVjNzdiYS0wODI4LTRlOTEtOGM3OC03ZjA4NGI2ZDEwZWE=",
        file_name,
        os.getenv("BLN_API_KEY"),
        dtype=str,
    )
    print(f"- {len(df)} records")
    
    # Cut out any records that have a first name. They will be people and not businesses. We don't want them.
    nopeople_df = df[pd.isnull(df.payee_firstname)].copy()

    # Get a distinct list of payees.
    distinct_payees = nopeople_df.payee_lastname.str.upper().unique()

    # Convert that back into a DataFrame.
    payee_df = pd.DataFrame(distinct_payees, columns=["payee"]).sort_values("payee")
    print(f"- {len(payee_df)} distinct payees")

    # Write it out
    payee_df.to_csv(file_name, index=False)

In [11]:
file_list = [
    "Form460ScheduleEItem.csv",
    "Form460ScheduleESubItem.csv"
]
for file_name in file_list:
    get_payees(file_name)

Downloading Form460ScheduleEItem.csv
- 4390182 records
- 216842 distinct payees
Downloading Form460ScheduleESubItem.csv
- 100491 records
- 16448 distinct payees
