# CaLPA Scratch Folder

This is a scratch notebook used to test the code and functionality of the AI California Legislative Policy Analysis (CALPA) system. It is not intended for production use and may contain incomplete or experimental code. The purpose of this notebook is to facilitate the development and testing of the CALPA system, including its data processing, analysis, and visualization components. The notebook may include code snippets, comments, and notes related to the development process. Please refer to the official documentation and user guides for the CALPA system for more information on its usage and features.

In [None]:
# %reset

## Initialization

In [14]:
# Import required libraries
import os
import time
from datetime import date
from datetime import datetime
import json
import mimetypes
import glob
import base64
import zipfile
import io
import dotenv
import requests
import pandas as pd
import feedparser

In [15]:
# Load the Calpa module located in the scripts/python/calpa directory
from calpa import Calpa, LegiScan

# Load environment variables from .env file
dotenv.load_dotenv(os.path.join(os.getcwd(), '.env'))

# Instantiate the LegiScan and Calpa classes
calpa = Calpa()
legiscan = LegiScan()

# Create project metadata for the AI project
prjMetadata = calpa.projectMetadata("AI", "0")

# Create the project directories dictionary
prjDirs = calpa.projectDirectories(os.getcwd())

Project Global Settings:
- Name: California Legislative Policy Analysis
- Title: AI Legislative Policy Analysis
- Version: 1.0
- Author: Dr. Kostas Alexandridis, GISP
Data Dates
- Start Date: 2010-12-02
- End Date: 2025-04-19
- Periods: 2009-2010, 2011-2012, 2013-2014, 2015-2016, 2017-2018, 2019-2020, 2021-2022, 2023-2024, 2025-2026
Directory Global Settings:

General:
- Project: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA
- Admin: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA\admin
- Metadata: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA\metadata
- Analysis: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA\analysis
Scripts:
- Python Calpa Module: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA\calpa
- Markdown Scripts: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA\markdown
- RIS Scripts: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA\ris
Data:
- Main Data: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA\data
- Documents: c:\Users\ktale\OneDrive\Documents\GitHub\CaLPA\data\docs
- Le

## Session List

In [None]:
# Get the list of sessions from LegiScan
sessionList = legiscan.getSessionList()

# Convert the sessionList to a pandas DataFrame
sessionDf = pd.DataFrame(sessionList)
sessionDf.head()

In [None]:
# Obtain the stored sessions list from JSON dictionary on disk (data/lookup directory)
sessionListStored = legiscan.getStoredSessions()

In [None]:
# Compare the sessionList and sessionListStored dictionaries for any changes
unmatchedSessions = legiscan.matchHash(sessionList, sessionListStored, "session_hash", silent=True)

# if the unmatchedSessions is empty, print "All sessions match", and delete the unmatchedSessions variable
if unmatchedSessions is None:
    print("All sessions match")
    del unmatchedSessions

In [None]:
# Export the sessionList to a JSON file in the data/legiscan/json directory
with open(os.path.join(prjDirs["pathDataLegis"], "json", "sessionList.json"), "w", encoding="utf-8") as f:
    json.dump(sessionList, f, ensure_ascii=False, indent=4)

## Session People

In [None]:
# Get the list of session people from LegiScan
sessionPeople = {}
for key, value in sessionList.items():
    sessionId = value["session_id"]
    sessionPeople[key] = legiscan.getSessionPeople(sessionId)

In [None]:
# Obtain the stored session People list from JSON dictionary on disk (data/lookup directory)
sessionPeopleStored = legiscan.getStoredPeople()

In [None]:
# Compare the sessionPeople and sessionPeopleStored dictionaries for any changes
# Create a dictionary to store unmatched people
unmatchedPeople = {}
# Iterate through each session and compare the people lists
for key, value in sessionPeople.items():
    unmatchedPeople[key] = {}
    unmatched = legiscan.matchHash(sessionPeople[key]["people"], sessionPeopleStored[key]["people"], "person_hash", silent=True)
    # If there are unmatched people, store them in the unmatchedPeople dictionary
    unmatchedPeople[key] = unmatched if unmatched is not None else None

# if the unmatchedPeople is empty, print "All people match", and delete the unmatchedPeople variable
if all(not value for value in unmatchedPeople.values()):
    print("All people match")
    # Delete the unmatchedPeople variable
    del unmatchedPeople

In [None]:
# Export the sessionPeople to a JSON file in the data/legiscan/json directory
with open(os.path.join(prjDirs["pathDataLegis"], "json", "sessionPeople.json"), "w", encoding="utf-8") as f:
    json.dump(sessionPeople, f, ensure_ascii=False, indent=4)

## Dataset List

In [None]:
# Get the list of datasets from LegiScan for each legislative session
datasetList = legiscan.getDatasetList()

In [None]:
# Obtain the stored dataset list from JSON dictionary on disk (data/lookup directory)
datasetListStored = legiscan.getStoredDatasetList()

In [None]:
# Compare the datasetList and datasetListStored dictionaries for any changes
unmatchedDatasets = legiscan.matchHash(datasetList, datasetListStored, "dataset_hash", silent=True)

# if the unmatchedSessions is empty, print "All sessions match", and delete the unmatchedSessions variable
if unmatchedDatasets is None:
    print("All datasets match")
    del unmatchedDatasets

In [None]:
# export the datasetList to a JSON file in the data/legis/json directory
with open(os.path.join(prjDirs["pathDataLegis"], "json", "datasetList.json"), "w", encoding="utf-8") as f:
    json.dump(datasetList, f, ensure_ascii=False, indent=4)

## Misc

In [None]:
aiBills = legiscan.getStoredBills("AI")
lcBills = legiscan.getStoredBills("LC")

In [None]:
legiscan.updateStoredBills("AI", "XX999", 123555)

In [None]:
legiscan.getStoredBills("AI")

In [None]:
# Read the aiBills.json file from the data/lookup directory
aiBillsJson = os.path.join(prjDirs["pathDataLookup"], "aiBills.json")
with open(aiBillsJson, "r", encoding="utf-8") as f:
    aiBills = json.load(f)

In [None]:
# Write the aiBills dictionay to a pandas DataFrame
aiBillsDf = pd.DataFrame(aiBills)
aiBillsDf.head()

In [None]:
ocealcrss = "https://legiscan.com/gaits/feed/eb2f0a37a652094577c62225fc31828c.rss"

In [None]:
ocealcfeed = feedparser.parse(ocealcrss)

In [None]:
ocealcfeed.feed.title

In [None]:
ocealcfeed.feed.link

In [None]:
ocealcfeed["entries"][0]

In [None]:
ocealcfeed.keys()

In [None]:
ocealcfeed.entries[0]

In [None]:
"https://api.legiscan.com/?key=APIKEY&op=getMonitorList&record=current"

In [12]:
# from the medatada directory impor the "FieldDescriptions.xlsx" file into a pandas DataFrame
getBillCodebook = pd.read_excel(os.path.join(prjDirs["pathMetadata"], "FieldDescriptions.xlsx"), sheet_name="getBill")

getRollCallCodebook = pd.read_excel(os.path.join(prjDirs["pathMetadata"], "FieldDescriptions.xlsx"), sheet_name="getRollCall")

getBillTextCodebook = pd.read_excel(os.path.join(prjDirs["pathMetadata"], "FieldDescriptions.xlsx"), sheet_name="getBillText")

getAmendmentCodebook = pd.read_excel(os.path.join(prjDirs["pathMetadata"], "FieldDescriptions.xlsx"), sheet_name="getAmendment")

getSupplementCodebook = pd.read_excel(os.path.join(prjDirs["pathMetadata"], "FieldDescriptions.xlsx"), sheet_name="getSupplement")

getPersonCodebook = pd.read_excel(os.path.join(prjDirs["pathMetadata"], "FieldDescriptions.xlsx"), sheet_name="getPerson")

getSessionListCodebook = pd.read_excel(os.path.join(prjDirs["pathMetadata"], "FieldDescriptions.xlsx"), sheet_name="getSessionList")


In [13]:
# export the FieldDescriptions dataframe to a pickle file in the data/lookup directory
getBillCodebook.to_pickle(os.path.join(prjDirs["pathDataLookup"], "getBillCodebook.pkl"))
getRollCallCodebook.to_pickle(os.path.join(prjDirs["pathDataLookup"], "getRollCallCodebook.pkl"))
getBillTextCodebook.to_pickle(os.path.join(prjDirs["pathDataLookup"], "getBillTextCodebook.pkl"))
getAmendmentCodebook.to_pickle(os.path.join(prjDirs["pathDataLookup"], "getAmendmentCodebook.pkl"))
getSupplementCodebook.to_pickle(os.path.join(prjDirs["pathDataLookup"], "getSupplementCodebook.pkl"))
getPersonCodebook.to_pickle(os.path.join(prjDirs["pathDataLookup"], "getPersonCodebook.pkl"))
getSessionListCodebook.to_pickle(os.path.join(prjDirs["pathDataLookup"], "getSessionListCodebook.pkl"))

In [None]:
# Load the getBillCodebook pickle file from the data/lookup directory
getBillCodebook = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "getBillCodebook.pkl"))

# Load the getRollCallCodebook pickle file from the data/lookup directory
getRollCallCodebook = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "getRollCallCodebook.pkl"))

# Load the getBillTextCodebook pickle file from the data/lookup directory
getBillTextCodebook = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "getBillTextCodebook.pkl"))

# Load the getAmendmentCodebook pickle file from the data/lookup directory
getAmendmentCodebook = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "getAmendmentCodebook.pkl"))

# Load the getSupplementCodebook pickle file from the data/lookup directory
getSupplementCodebook = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "getSupplementCodebook.pkl"))

# Load the getPersonCodebook pickle file from the data/lookup directory
getPersonCodebook = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "getPersonCodebook.pkl"))

# Load the getSessionListCodebook pickle file from the data/lookup directory
getSessionListCodebook = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "getSessionListCodebook.pkl"))

In [39]:
# Get the AI monitoring list from disk (data/lookup directory)
aiBills = legiscan.getStoredBills("AI")

# Get the LC monitoring list from disk (data/lookup directory)
lcBills = legiscan.getStoredBills("LC")

In [32]:
test = {}
for key, value in aiBills.items():
    test[key] = {}
    for bill, billid in value.items():
        test[key][bill] = legiscan.getBill(billid)
    

In [18]:
legiscan.getBill(581806)

{'bill_id': 581806,
 'change_hash': '20bb8b3a704cdb43730e1ecee46b680d',
 'session_id': 993,
 'session': {'session_id': 993,
  'state_id': 5,
  'year_start': 2013,
  'year_end': 2014,
  'prefile': 0,
  'sine_die': 1,
  'prior': 1,
  'special': 0,
  'session_tag': 'Regular Session',
  'session_title': '2013-2014 Regular Session',
  'session_name': '2013-2014 Session'},
 'url': 'https://legiscan.com/CA/bill/AB1465/2013',
 'state_link': 'http://www.leginfo.ca.gov/cgi-bin/postquery?bill_number=ab_1465&sess=1314&house=A',
 'completed': 0,
 'status': 2,
 'status_date': '2014-05-23',
 'progress': [{'date': '2014-01-09', 'event': 1},
  {'date': '2014-05-23', 'event': 2},
  {'date': '2014-05-23', 'event': 9},
  {'date': '2014-06-05', 'event': 9},
  {'date': '2014-06-12', 'event': 10},
  {'date': '2014-06-12', 'event': 9},
  {'date': '2014-06-15', 'event': 10}],
 'state': 'CA',
 'state_id': 5,
 'bill_number': 'AB1465',
 'bill_type': 'B',
 'bill_type_id': '1',
 'body': 'A',
 'body_id': 19,
 'curre

In [38]:
aiBills

{'2013-2024': {'AB1465': 581806, 'SB836': 577638, 'SB860': 581712},
 '2017-2018': {'AB1809': 1052898,
  'AB2662': 1090551,
  'ACR215': 1111231,
  'SB843': 1052926,
  'SB1470': 1092270},
 '2019-2020': {'AB156': 1140154,
  'AB459': 1199608,
  'AB485': 1200933,
  'AB594': 1205261,
  'AB976': 1214383,
  'AB1576': 1216111,
  'AB2269': 1341577,
  'AB3317': 1347660,
  'AB3339': 1347682,
  'ACR125': 1272951,
  'SB348': 1210745,
  'SB444': 1214166,
  'SB730': 1215535,
  'SB752': 1215850,
  'SCR13': 1205432,
  'SJR6': 1237237},
 '2021-2022': {'AB13': 1385509,
  'AB1400': 1458951,
  'AB1545': 1459096,
  'AB1651': 1559219,
  'AB178': 1398195,
  'AB179': 1398196,
  'AB2224': 1592488,
  'AB2826': 1594657,
  'AB587': 1450081,
  'SB54': 1385430,
  'SB178': 1398299,
  'SB179': 1398300,
  'SB1018': 1590455,
  'SB1216': 1593803,
  'SR11': 1453532},
 '2023-2024': {'AB100': 1649612,
  'AB103': 1649615,
  'AB104': 1649616,
  'AB106': 1649618,
  'AB107': 1649619,
  'AB108': 1649620,
  'AB158': 1649670,
  'AB