In [1]:
import requests
import pandas as pd
import time
from datetime import datetime
import os
#######
# Configs
#######
# legiscan API
LEGISCAN_API_KEY = os.getenv("LEGISCAN_API_KEY")
LEGISCAN_BASE_URL = "https://api.legiscan.com/"

# openstats API
OPENSTATES_API_KEY = os.getenv("OPENSTATES_API_KEY")
OPENSTATES_BASE_URL = "https://v3.openstates.org"

KEYWORDS = [
    "energy", "grid", "resources", "electric", "power", "utility",
    "renewable", "transmission", "coal", "solar", "wind",
    "battery", "storage", "data center", "data centers", "quantum computing"
]

# MISO states
MISO_STATES = [
    "AR", "IL", "IN", "IA", "KY", "LA", "MI", "MN", "MS", "MO",
    "MT", "ND", "SD", "TX", "WI"
]
# OCD ids

MISO_OCD_IDS = [
    f"ocd-jurisdiction/country:us/state:{state.lower()}/government"
    for state in MISO_STATES
]

# Request settings
SLEEP_LEGISCAN = 1
SLEEP_OPENSTATES = 6
PER_PAGE = 20
MAX_PAGES = 5
CREATED_SINCE = "2023-01-01"


# parses tring or int timestamp into pandas datetime
def parse_date(val):
    if val is None or val == 0:
        return None
    if isinstance(val, (int, float)):
        return pd.to_datetime(val, unit='s', errors='coerce')
    if isinstance(val, str):
        return pd.to_datetime(val, errors='coerce')
    return None


# gathering info from legiscan
def get_legiscan_bill_details(bill_id):
    params = {"key": LEGISCAN_API_KEY, "op": "getBill", "id": bill_id}
    try:
        resp = requests.get(LEGISCAN_BASE_URL, params=params)
        resp.raise_for_status()
        data = resp.json()
        if data.get("status") != "OK":
            print(f"Error fetching bill {bill_id}: {data.get('message')}")
            return None

        bill = data.get("bill", {})
        introduced_date = parse_date(bill.get("introduced_date"))
        last_action_date = parse_date(bill.get("last_action_date"))
        date = introduced_date or last_action_date

        if date is None:
            for source in ("progress", "history"):
                for record in bill.get(source, []):
                    d = parse_date(record.get("date"))
                    if d is not None:
                        date = d
                        break
                if date is not None:
                    break

        return {
            "date": date,
            "status": bill.get("status"),
            "session": bill.get("session", {}).get("session_name") or bill.get("session_id")
        }
    except Exception as e:
        print(f"Exception fetching bill {bill_id}: {e}")
        return None


def search_legiscan_state_bills(state_code):
    all_bills = []
    query = " OR ".join([f'"{kw}"' if " " in kw else kw for kw in KEYWORDS])
    params = {"key": LEGISCAN_API_KEY, "op": "getSearch", "state": state_code, "query": query}
    try:
        resp = requests.get(LEGISCAN_BASE_URL, params=params)
        resp.raise_for_status()
        data = resp.json()
        if data.get("status") != "OK":
            print(f"Search failed for {state_code}: {data.get('message')}")
            return []

        for item in data.get("searchresult", {}).values():
            bill_id = item.get("bill_id")
            if not bill_id:
                continue
            details = get_legiscan_bill_details(bill_id)
            if not details:
                continue
            all_bills.append({
                "state": state_code,
                "bill_id": bill_id,
                "bill_number": item.get("bill_number"),
                "title": item.get("title"),
                "date": details["date"],
                "status": details["status"],
                "session": details["session"],
                "link": f"https://legiscan.com/{state_code}/bill/{bill_id}"
            })
            time.sleep(SLEEP_LEGISCAN)
    except Exception as e:
        print(f"Exception for {state_code}: {e}")
    return all_bills


# gathering information from openstates
def contains_keyword(text: str) -> bool:
    if not isinstance(text, str):
        return False
    text_lower = text.lower()
    return any(k in text_lower for k in KEYWORDS)


def fetch_openstates_bills(jurisdiction_id):
    headers = {"X-API-KEY": OPENSTATES_API_KEY}
    all_bills = []
    for page in range(1, MAX_PAGES + 1):
        params = {
            "jurisdiction": jurisdiction_id,
            "q": " OR ".join(KEYWORDS),
            "sort": "latest_action_desc",
            "per_page": PER_PAGE,
            "page": page,
            "created_since": CREATED_SINCE
        }
        while True:
            resp = requests.get(f"{OPENSTATES_BASE_URL}/bills", headers=headers, params=params)
            if resp.status_code == 429:
                print("Rate limit hit. Sleeping 7 seconds...")
                time.sleep(7)
                continue
            elif resp.status_code == 404:
                return all_bills
            elif resp.status_code != 200:
                print(f"Failed ({resp.status_code}) for {jurisdiction_id}: {resp.text[:100]}")
                break
            data = resp.json().get("results", [])
            if not data:
                return all_bills
            all_bills.extend(data)
            break
        time.sleep(SLEEP_OPENSTATES)
    return all_bills


# collection of legiscan and openstates bills
def collect_legiscan_bills():
    matched = []
    for state in MISO_STATES:
        print(f"Fetching LegiScan bills for {state}...")
        bills = search_legiscan_state_bills(state)
        matched.extend(bills)
    df = pd.DataFrame(matched)
    if not df.empty:
        df = df.sort_values(by="date", ascending=False).reset_index(drop=True)
    return df
def collect_openstates_bills():
    matched = []
    for ocd_id in MISO_OCD_IDS:
        print(f"Fetching OpenStates bills for {ocd_id}...")
        bills = fetch_openstates_bills(ocd_id)
        for b in bills:
            title = b.get("title", "")
            summary = b.get("summary", "")
            if contains_keyword(title) or contains_keyword(summary):
                matched.append({
                    "jurisdiction_id": ocd_id,
                    "bill_id": b.get("identifier"),
                    "title": title,
                    "summary": summary,
                    "latest_action_date": b.get("latest_action_date"),
                    "latest_action_description": b.get("latest_action_description"),
                    "link": b.get("openstates_url"),
                })
    df = pd.DataFrame(matched)
    if not df.empty:
        df["latest_action_date"] = pd.to_datetime(df["latest_action_date"], errors="coerce")
        df = df.sort_values(by="latest_action_date", ascending=False)
    return df


timestamp = datetime.now().strftime("%Y%m%d")

print("collecting LegiScan energy-related bills...")
legiscan_df = collect_legiscan_bills()
legiscan_df.to_csv(f"../data/bills/legiscan_energy_{timestamp}.csv", index=False)
print(f"saved {len(legiscan_df)} bills to ./data/bills/legiscan_energy_{timestamp}.csv")

print("collecting OpenStates energy-related bills...")
openstates_df = collect_openstates_bills()
openstates_df.to_csv(f"../data/bills/openstates_energy_{timestamp}.csv", index=False)
print(f"saved {len(openstates_df)} bills to ./data/bills/openstates_energy_{timestamp}.csv")


collecting LegiScan energy-related bills...
Fetching LegiScan bills for AR...
Fetching LegiScan bills for IL...
Fetching LegiScan bills for IN...
Fetching LegiScan bills for IA...
Fetching LegiScan bills for KY...
Fetching LegiScan bills for LA...
Fetching LegiScan bills for MI...
Fetching LegiScan bills for MN...
Fetching LegiScan bills for MS...
Fetching LegiScan bills for MO...
Fetching LegiScan bills for MT...
Fetching LegiScan bills for ND...
Fetching LegiScan bills for SD...
Fetching LegiScan bills for TX...
Fetching LegiScan bills for WI...
saved 750 bills to ./data/bills/legiscan_energy_20251008.csv
collecting OpenStates energy-related bills...
Fetching OpenStates bills for ocd-jurisdiction/country:us/state:ar/government...
Fetching OpenStates bills for ocd-jurisdiction/country:us/state:il/government...
Fetching OpenStates bills for ocd-jurisdiction/country:us/state:in/government...
Fetching OpenStates bills for ocd-jurisdiction/country:us/state:ia/government...
Fetching OpenSt