In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import json


Let's walk through the input folder and save every bill as its json contents.


In [2]:
bills = []
for f in os.walk(os.getcwd() + "\\input\\"):
    if "bill" in f[0]:
        for file in f[-1]:
            if ".json" in file:
                fileName = f[0] + "\\" + file
                with open(fileName, "r") as b:
                    bills.append(json.loads(b.read()))


Let's take a look at what one of our bill files looks like


In [3]:
print(json.dumps(bills[0], indent=2))


{
  "bill": {
    "bill_id": 1299551,
    "change_hash": "3caacb77278d3e402301e8d93bd50962",
    "session_id": 1722,
    "session": {
      "session_id": 1722,
      "state_id": 6,
      "year_start": 2020,
      "year_end": 2020,
      "prefile": 0,
      "sine_die": 1,
      "prior": 1,
      "special": 0,
      "session_tag": "Regular Session",
      "session_title": "2020 Regular Session",
      "session_name": "2020 Regular Session"
    },
    "url": "https://legiscan.com/CO/bill/HB1001/2020",
    "state_link": "https://leg.colorado.gov/bills/hb20-1001",
    "completed": 1,
    "status": 4,
    "status_date": "2020-07-14",
    "progress": [
      {
        "date": "2020-01-08",
        "event": 1
      },
      {
        "date": "2020-01-08",
        "event": 9
      },
      {
        "date": "2020-02-12",
        "event": 10
      },
      {
        "date": "2020-02-12",
        "event": 9
      },
      {
        "date": "2020-03-02",
        "event": 10
      },
      {
      

Ok that's a lot of information, most of it useless. Let's take out what we care about.


In [4]:
statuses = ["Intro", "Engrossed", "Enrolled", "Passed", "Vetoed"]
status = statuses[bills[0]["bill"]["status"] - 1]
bill_name = bills[0]["bill"]["title"]
bill_number = bills[0]["bill"]["bill_number"]
bill_desc = bills[0]["bill"]["description"]
dates, types, links = [], [], []
for text in bills[0]["bill"]["texts"]:
    dates.append(text["date"])
    types.append(text["type"])
    links.append((text["url"], text["state_link"]))
ids, parties, names, districts = [], [], [], []
for person in bills[0]["bill"]["sponsors"]:
    ids.append(person["people_id"])
    parties.append(person["party"])
    names.append(person["first_name"] + " " + person["last_name"])
    districts.append(person["district"])
roll_call_ids = []
for vote in bills[0]["bill"]["votes"]:
    roll_call_ids.append(vote["roll_call_id"])

ids, parties, names, districts = (
    np.array(ids),
    np.array(parties),
    np.array(names),
    np.array(districts),
)
spon_data = pd.DataFrame(
    np.vstack((ids, parties, names, districts)).T,
    columns=["ID", "Party", "Name", "District"],
)
dates, types, links = np.array(dates), np.array(types), np.array(links)
links_LS, links_ST = links[:, 0], links[:, 1]
text_data = pd.DataFrame(
    np.vstack((dates, types, links_LS, links_ST)).T,
    columns=["Date", "Type", "Legiscan Link", "State Link"],
)
spon_data.set_index("ID", inplace=True)
text_data.set_index("Date", inplace=True)
pd_json = spon_data.to_json(orient="index")
txt_json = text_data.to_json(orient="index")
print(f"{bill_number}: {bill_name} ({status})")
print(bill_desc)
print("Sponsors:\n" + json.dumps(json.loads(pd_json), indent=2))
print("Texts:\n" + json.dumps(json.loads(txt_json), indent=2))
print(f"Vote IDs: {roll_call_ids}")


HB1001: Nicotine Product Regulation (Passed)
Concerning nicotine product regulations, and, in connection therewith, making an appropriation.
Sponsors:
{
  "20136": {
    "Party": "D",
    "Name": "Kyle Mullica",
    "District": "HD-034"
  },
  "20135": {
    "Party": "R",
    "Name": "Colin Larson",
    "District": "HD-022"
  },
  "19048": {
    "Party": "D",
    "Name": "Jeff Bridges",
    "District": "SD-026"
  },
  "3323": {
    "Party": "R",
    "Name": "Kevin Priola",
    "District": "SD-025"
  }
}
Texts:
{
  "2020-01-08": {
    "Type": "Introduced",
    "Legiscan Link": "https://legiscan.com/CO/text/HB1001/id/2088069",
    "State Link": "https://leg.colorado.gov/sites/default/files/documents/2020A/bills/2020a_1001_01.pdf"
  },
  "2020-03-09": {
    "Type": "Engrossed",
    "Legiscan Link": "https://legiscan.com/CO/text/HB1001/id/2166371",
    "State Link": "https://leg.colorado.gov/sites/default/files/documents/2020A/bills/2020a_1001_eng.pdf"
  },
  "2020-03-10": {
    "Type": "E

Much better, but accessing all that every time is gross, so let's make a class to save these bills as objects.


In [5]:
class Bill:
    def __init__(self, name, number, status, desc, sponsors, texts, votes) -> None:
        self.name = name
        self.number = number
        self.status = status
        self.description = desc
        self.sponsors = sponsors
        self.texts = texts
        self.votes = votes


Cool, now let's save that bill into one of these objects and check to make sure everything is working correctly.


In [6]:
example_bill = Bill(
    bill_name, bill_number, status, bill_desc, spon_data, text_data, roll_call_ids
)
print(example_bill.name, example_bill.number, example_bill.status, example_bill.votes)


Nicotine Product Regulation HB1001 Passed [921854, 925968, 925969, 925970, 925971, 925972, 939781, 944787, 944943, 946767, 947762, 960290, 962073, 963123, 963235, 963236, 965916, 965917]


Much cleaner! Now let's do it to every single bill we have.


In [7]:
statuses = [
    "N\\A",
    "Intro",
    "Engrossed",
    "Enrolled",
    "Passed",
    "Vetoed",
    "Failed",
    "Override",
    "Chaptered",
    "Refer",
    "Report Pass",
    "Report DNP",
    "Draft",
]
bill_objects = []
for bill in bills:
    status = statuses[bill["bill"]["status"]]
    bill_name = bill["bill"]["title"]
    bill_number = bill["bill"]["bill_number"]
    bill_desc = bill["bill"]["description"]
    dates, types, links = [], [], []
    for text in bill["bill"]["texts"]:
        dates.append(text["date"])
        types.append(text["type"])
        links.append((text["url"], text["state_link"]))
    ids, parties, names, districts = [], [], [], []
    for person in bill["bill"]["sponsors"]:
        ids.append(person["people_id"])
        parties.append(person["party"])
        names.append(person["first_name"] + " " + person["last_name"])
        districts.append(person["district"])
    vote_ids = []
    for vote in bill["bill"]["votes"]:
        vote_ids.append(vote["roll_call_id"])
    ids, parties, names, districts = (
        np.array(ids),
        np.array(parties),
        np.array(names),
        np.array(districts),
    )
    spon_data = pd.DataFrame(
        np.vstack((ids, parties, names, districts)).T,
        columns=["ID", "Party", "Name", "District"],
    )
    dates, types, links = np.array(dates), np.array(types), np.array(links)
    if len(links) != 0:
        links_LS, links_ST = links[:, 0], links[:, 1]
    else:
        links_LS, links_ST = [], []
    text_data = pd.DataFrame(
        np.vstack((dates, types, links_LS, links_ST)).T,
        columns=["Date", "Type", "Legiscan Link", "State Link"],
    )
    spon_data.set_index("ID", inplace=True)
    text_data.set_index("Date", inplace=True)
    bill_objects.append(
        Bill(bill_name, bill_number, status, bill_desc, spon_data, text_data, vote_ids)
    )


In [8]:
len(bill_objects)

710