# CaLPA Scratch Folder

This is a scratch notebook used to test the code and functionality of the AI California Legislative Policy Analysis (CALPA) system. It is not intended for production use and may contain incomplete or experimental code. The purpose of this notebook is to facilitate the development and testing of the CALPA system, including its data processing, analysis, and visualization components. The notebook may include code snippets, comments, and notes related to the development process. Please refer to the official documentation and user guides for the CALPA system for more information on its usage and features.

In [None]:
#%reset

## Initialization

In [1]:
# Import required libraries
import os
import time
from datetime import date
from datetime import datetime
import json
import mimetypes
import glob
import base64
import zipfile
import io
import dotenv
import requests
import pandas as pd
import feedparser
import webbrowser
from mrkdwn_analysis import MarkdownAnalyzer

In [2]:
# Load the Calpa module located in the scripts/python/calpa directory
from calpa import Calpa, LegiScan

# Load environment variables from .env file
dotenv.load_dotenv(os.path.join(os.getcwd(), '.env'))

# Instantiate the LegiScan and Calpa classes
calpa = Calpa()
legiscan = LegiScan()

# Create project metadata for the AI project
prjMetadata = calpa.projectMetadata("AI", "0")

# Create the project directories dictionary
prjDirs = calpa.projectDirectories(os.getcwd())


NameError: name 'aiBills' is not defined

In [None]:
# Load the codebookBill pickle file from the data/lookup directory
codebookBill = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "codebookBill.pkl"))

# Load the codebookRollCall pickle file from the data/lookup directory
codebookRollCall = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "codebookRollCall.pkl"))

# Load the codebookBillText pickle file from the data/lookup directory
codebookBillText = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "codebookBillText.pkl"))

# Load the codebookAmendment pickle file from the data/lookup directory
codebookAmendment = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "codebookAmendment.pkl"))

# Load the codebookSupplement pickle file from the data/lookup directory
codebookSupplement = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "codebookSupplement.pkl"))

# Load the codebookPerson pickle file from the data/lookup directory
codebookPerson = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "codebookPerson.pkl"))

# Load the codebookSessionList pickle file from the data/lookup directory
codebookSessionList = pd.read_pickle(os.path.join(prjDirs["pathDataLookup"], "codebookSessionList.pkl"))

In [None]:
# Obtain the stored sessions list from JSON dictionary on disk (data/lookup directory)
sessionListStored = legiscan.getStoredData(dataType = "session")

# Obtain the stored session People list from JSON dictionary on disk (data/lookup directory)
sessionPeopleStored = legiscan.getStoredData(dataType = "people")

# Obtain the stored dataset list from JSON dictionary on disk (data/lookup directory)
datasetListStored = legiscan.getStoredData(dataType = "dataset")

# Get the stored raw master list from JSON dictionary on disk (data/lookup directory)
masterListRawStored = legiscan.getStoredData(dataType = "master", raw = True)
# Get the stored master list from JSON dictionary on disk (data/lookup directory)
masterListStored = legiscan.getStoredData(dataType = "master", raw = False)

# Get the AI monitoring list from disk (data/lookup directory)
aiBillListStored = legiscan.getStoredData(dataType = "bills", project = "AI")

# Get the AI full list of bills from dism (data/legis/json directory)
aiBills = legiscan.getStoredData(dataType = "data", project = "AI")

# Get the AI bill summries list from disk (data/lookup directory)
aiBillsSummariesStored = legiscan.getStoredData(dataType = "summaries", project = "AI")

## End of Initialization

In [None]:
# open test["main"] in a web browser
webbrowser.open(test["status"])

In [None]:
webbrowser.open(aiBills["2013-2014"]["AB1465"]["url"], new=2, autoraise=True)

In [None]:
webbrowser.open(aiBills["2013-2014"]["AB1465"]["state_link"], new=2, autoraise=True)

In [None]:
legiscan.summarizeBillSponsors(aiBills["2013-2014"]["SB836"], output="md")

- Primary Sponsor(s): [Ellen Corbett (D, SD10)](https://ballotpedia.org/Ellen_Corbett)
- Co-Sponsor(s): [Joel Anderson (R, SD38)](https://ballotpedia.org/Joel_Anderson_(California)), [Jim Beall (D, SD15)](https://ballotpedia.org/James_Beall_Jr.), [Marty Block (D, SD39)](https://ballotpedia.org/Martin_Block), [Anthony Cannella (R, SD12)](https://ballotpedia.org/Anthony_Cannella), [Lou Correa (D, SD34)](https://ballotpedia.org/Lou_Correa), [Noreen Evans (D, SD02)](https://ballotpedia.org/Noreen_Evans), [Cathleen Galgiani (D, SD05)](https://ballotpedia.org/Cathleen_Galgiani), [Loni Hancock (D, SD09)](https://ballotpedia.org/Loni_Hancock), [Jerry Hill (D, SD13)](https://ballotpedia.org/Gerald_Hill), [Ricardo Lara (D, SD33)](https://ballotpedia.org/Ricardo_Lara), [Ted Lieu (D, SD28)](https://ballotpedia.org/Ted_Lieu), [Carol Liu (D, SD25)](https://ballotpedia.org/Carol_Liu), [Jim Nielsen (R, SD04)](https://ballotpedia.org/Jim_Nielsen), [Richard Roth (D, SD31)](https://ballotpedia.org/Richard_Roth), [Lois Wolk (D, SD03)](https://ballotpedia.org/Lois_Wolk)
- Joint Sponsor(s): [Nancy Skinner (D, SD09)](https://ballotpedia.org/Nancy_Skinner_(California)), [Marie Waldron (R, AD75)](https://ballotpedia.org/Marie_Waldron)

In [None]:

analyzer = MarkdownAnalyzer(samplePath)

headers = analyzer.identify_headers()
paragraphs = analyzer.identify_paragraphs()
blockquotes = analyzer.identify_blockquotes()
links = analyzer.identify_links()
codeBlocks = analyzer.identify_code_blocks()

In [None]:
analysis = analyzer.analyse()

print(analysis)

In [None]:
headers

In [None]:
paragraphs

In [None]:
links

In [None]:
import io
import pypandoc
import panflute

def action(elem, doc):
    if isinstance(elem, panflute.Image):
        doc.images.append(elem)
    elif isinstance(elem, panflute.Link):
        doc.links.append(elem)

if __name__ == '__main__':
    data = pypandoc.convert_file('example.md', 'json')
    doc = panflute.load(io.StringIO(data))
    doc.images = []
    doc.links = []
    doc = panflute.run_filter(action, prepare=prepare, doc=doc)

    print("\nList of image URLs:")
    for image in doc.images:
        print(image.url)

In [None]:
# Directory for the markdown notes
mdNotesPath = os.path.join(prjDirs["pathScriptsMd"], "notes")

for key in aiBills.keys():
    # check if a directory exists for the bill period
    if not os.path.exists(os.path.join(mdNotesPath, key)):
        # create the directory
        os.makedirs(os.path.join(mdNotesPath, key))
    for billId in aiBills[key].keys():
        # Check if the bill has a markdown file
        if not os.path.exists(os.path.join(mdNotesPath, key, f"{billId}.md")):
            # Create the markdown file
            with open(os.path.join(mdNotesPath, key, f"{billId}.md"), 'w') as mdFile:
                # Write the bill information to the markdown file
                mdFile.write(f"## {billId} AI Notes\n\n")
        # othewise replace the file with the new one
        else:
            # Replace the markdown file with the new one
            with open(os.path.join(mdNotesPath, key, f"{billId}.md"), 'w') as mdFile:
                # Write the bill information to the markdown file
                mdFile.write(f"## {billId} AI Notes\n\n")


In [None]:
def aiBillMarkdown(billPeriod, billId, billsDict = aiBills, billsSummariesDict = aiBillsSummariesStored, obsidianSync = False):
    """
    Function to create an md file for the AI bill
    """
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Part 1: Define Variables and Input Data
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    # Get the bill data from the AI bills dictionary
    myBill = billsDict[billPeriod][billId]

    # Basic Bill Information
    myBillCode = f"{myBill['body']}{myBill['bill_type']}"
    myBillNumber = myBill['bill_number'].replace(f"{myBillCode}", "")
    myBillAlias1 = f"{myBillCode}-{myBillNumber}"
    myBillAlias2 = f"{myBillCode} {myBillNumber}"
    myBillYear = int(datetime.strptime(myBill['history'][0]['date'], "%Y-%m-%d").date().year)

    # Summary, tags, keywords, and hash tags from the summaries dictionary
    mySummary = billsSummariesDict[billPeriod][billId]["summary"]
    myTags = billsSummariesDict[billPeriod][billId]["tags"]
    myKeywords = ", ".join(myTags)
    myHashTags = ", ".join([f"#{tag}" for tag in myTags])
    
    # Sponsors (dictionary and markdown)
    mySponsorsDict = legiscan.summarizeBillSponsors(myBill, output="dict")
    mySponsorsMd = legiscan.summarizeBillSponsors(myBill, output="md")
    
    # Session Status, Bill Action and Chaptered Information
    if myBill["session"]["sine_die"] == 0:
        mySessionStatus = "Active"
    elif myBill["session"]["sine_die"] == 1:
        mySessionStatus = "Inactive"
    # Action status
    myBillAction = myBill['history'][-1]['action']
    if ":" in myBillAction:
        myBillAction = myBillAction.replace(":", " -")
    # If action is Chaptered, get the chapter number and year
    if myBillAction.startswith("Chaptered"):
        chaptered = True
        chapterNo = myBillAction.split("Chapter ")[1].split(",")[0]
        if chapterNo.isnumeric():
            chapterNo = int(chapterNo)
        chapterYear = myBillAction.split(" ")[-1].split(".")[0]
        if chapterYear.isnumeric():
            chapterYear = int(chapterYear)        
    # Otherwise, set chaptered to False and chapterNo and chapterYear to None
    else:
        chaptered = False
        chapterYear = ""
        chapterNo = ""
    
    # Get the legislative links for the bill
    myLinks = calpa.getCaLegisLinks(billPeriod, billId)
    
    # Get the bill notes
    myNotesPath = os.path.join(prjDirs["pathScriptsMd"], "notes", billPeriod, f"{billId}.md")
    # Read the notes markdown file and determine the sections for AI and LC notes
    with open(myNotesPath, 'r') as src:
        # Read the lines of the markdown file
        myBillNotes = src.readlines()
        # Initialize variables for the sections and notes
        section = aiNotes = lcNotes = ""
        # Loop through the lines of the markdown file
        for i, line in enumerate(myBillNotes):
            # Find the section for AI and LC notes and set the section variable
            if line.startswith(f"## {billId} AI Notes"):
                section = "AI"
            elif line.startswith(f"## {billId} LC Notes"):
                section = "LC"
            # Append the line to the appropriate notes variable based on the section
            if section == "AI":
                aiNotes += line
            elif section == "LC":
                lcNotes += line
    
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Part 2: Create the YAML Properties Section of the Markdown File
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    # Create the markdown file (path)
    mdFile = os.path.join(prjDirs["pathScriptsMd"], "AI", billPeriod, f"{billId}.md")
    
    # Create the YAML section
    with open(mdFile, 'w') as mdf:
        
        # Begin writing the YAML file
        mdf.write(f"---\n")
        
        # Aliases (vector)
        mdf.write(f"aliases:\n")
        mdf.write(f"  - {myBillAlias1}\n")
        mdf.write(f"  - {myBillAlias2}\n")
        
        # Type (vector)
        mdf.write(f"type:\n")
        mdf.write(f"  - {legiscan.billCode[myBillCode]}\n")
        mdf.write(f"  - California Legislature Bill\n")
        mdf.write(f"  - AI Legislation\n")
        
        # Tags (vector)
        mdf.write(f"tags:\n")
        mdf.write(f"  - Zotero\n")
        mdf.write(f"  - california-legislature\n")
        mdf.write(f"  - {myBill['session']['session_title'].lower().replace(' ', '-')}\n")
        for tag in myTags:
            mdf.write(f"  - {tag}\n")
        
        # Keywords
        mdf.write(f"keywords: {myKeywords}\n")
        
        # Hash tags
        mdf.write(f"hashTags: '{myHashTags}'\n")
        
        # Bill and Session Information
        mdf.write(f"billNumber: {myBillNumber}\n")
        mdf.write(f"billType: {legiscan.billCode[myBillCode]}\n")
        mdf.write(f"billYear: {myBillYear}\n")
        mdf.write(f"legislativeBody: California Legislature\n")
        mdf.write(f"legislativePeriod: {billPeriod}\n")
        mdf.write(f"session: {billPeriod} {myBill['session']['session_tag']}\n")
        mdf.write(f"""topic: "{myBillAlias1}: {myBill['title']}"\n""")
        mdf.write(f"""title': "{myBillAlias1}: {myBill['description']}"\n""")
        mdf.write(f"""summary: "{mySummary}"\n""")
        
        # Sponsors
        if "Primary Sponsor" not in mySponsorsDict.keys():
            mdf.write(f"sponsors: None\n")
        else:
            mdf.write(f"sponsors: {', '.join(mySponsorsDict['Primary Sponsor'])}\n")
        if "Co-Sponsor" not in mySponsorsDict.keys():
            mdf.write(f"coSponsors: None\n")
        else:
            mdf.write(f"coSponsors: {', '.join(mySponsorsDict['Co-Sponsor'])}\n")
        if "Joint Sponsor" not in mySponsorsDict.keys():
            mdf.write(f"jointSponsors: None\n")
        else:
            mdf.write(f"jointSponsors: {', '.join(mySponsorsDict['Joint Sponsor'])}\n")
        
        # Bill and session status
        mdf.write(f"billStatus: {legiscan.statusType[myBill['status']]}\n")
        mdf.write(f"sessionStatus: {mySessionStatus}\n")
        
        # Dates
        mdf.write(f"dateStatus: {myBill['status_date']}\n")
        mdf.write(f"dateIntroduced: {myBill['history'][0]['date']}\n")
        mdf.write(f"dateAssessed: {myBill['history'][-1]['date']}\n")
        
        # Bill Actions
        mdf.write(f"lastAction: {myBillAction}\n")
        mdf.write(f"chaptered: {chaptered}\n")
        mdf.write(f"chapterNo: {chapterNo}\n")
        mdf.write(f"chapterYear: {chapterYear}\n")
        
        # Legiscan link
        mdf.write(f"linkLegiscan: {myBill['url']}\n")
        
        # California legislature bill links
        mdf.write(f"linkMain: {myLinks['main']}\n")
        mdf.write(f"linkText: {myLinks['text']}\n")
        mdf.write(f"linkVotes: {myLinks['votes']}\n")
        mdf.write(f"linkHistory: {myLinks['history']}\n")
        mdf.write(f"linkAnalysis: {myLinks['analysis']}\n")
        mdf.write(f"linkTodaysLaw: {myLinks['todaysLaw']}\n")
        mdf.write(f"linkCompare: {myLinks['compare']}\n")
        mdf.write(f"linkStatus: {myLinks['status']}\n")
        
        # Obsidian PDF link
        mdf.write(f"pdfLink: '[[Documents/CA Legislative Bills/{billPeriod}/{billId}.pdf]]'\n")
        
        # Legiscan IDs
        mdf.write(f"legiscanBillId: {myBill['bill_id']}\n")
        mdf.write(f"legiscanBillHash: {myBill['change_hash']}\n")
        mdf.write(f"legiscanSessionId: {myBill['session_id']}\n")
        
        # Related (vector)
        mdf.write(f"related:\n")
        mdf.write(f"  - '[[Artificial Intelligence]]'\n")
        mdf.write(f"  - '[[California Government]]'\n")
        
        # Dates for Obsidian
        mdf.write(f"generated: \n")
        mdf.write(f"modified: \n")
        
        # Close the YAML file
        mdf.write(f"---\n")
        
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Part 3: Create the Main Markdown Content Section of the File
        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        
        # Main Markdown Content
        #~~~~~~~~~~~~~~~~~~~~~~
        
        mdf.write("\n")
        # Session Title
        mdf.write(f"## {myBillAlias1}: {myBill['title']}\n\n")
        
        # Begin Summary Info Box
        #~~~~~~~~~~~~~~~~~~~~~~~
        
        mdf.write(f">[!tldr] **{billId} TL;DR Summary**\n")
        mdf.write(f"> {mySummary}\n\n")
        
        # Begin Metadata Info Box
        #~~~~~~~~~~~~~~~~~~~~~~~~
        
        mdf.write(f">[!legislative] **{billId} Metadata**\n")
        
        # Basic Bill Information
        mdf.write(f">- **Bill Number**: {myBillAlias1}\n")
        mdf.write(f">- **Year**: {myBillYear}\n")
        mdf.write(f">- **Legislative Period**: {billPeriod}\n")
        mdf.write(f">- **Legislative Body**: California Legislature, {billPeriod} {myBill['session']['session_tag']}\n")
        mdf.write(f">- **Bill Type**: {legiscan.billCode[myBillCode]}\n")
        
        # Titles and Summaries
        mdf.write(f">- **Topic**: {myBillAlias1}: {myBill['title']}\n")
        mdf.write(f">- **Title**: {myBillAlias1}: {myBill['description']}\n")
        mdf.write(f">- **TL;DR Summary**: {billsSummariesDict[billPeriod][billId]['summary']}\n")
        
        # Keywords and Hash Tags
        mdf.write(f">- **Keywords**: {myKeywords}\n")
        mdf.write(f">- **Hash Tags**: {myHashTags}\n")
        
        # Sponsors
        if "Primary Sponsor" not in mySponsorsMd.keys():
            mdf.write(f">- **Sponsor(s)**: None\n")
        else:
            mdf.write(f">- **Sponsor(s)**: {mySponsorsMd['Primary Sponsor']}\n")
        if "Co-Sponsor" not in mySponsorsMd.keys():
            mdf.write(f">- **Co-Sponsor(s)**: None\n")
        else:
            mdf.write(f">- **Co-Sponsor(s)**: {mySponsorsMd['Co-Sponsor']}\n")
        if "Joint Sponsor" not in mySponsorsMd.keys():
            mdf.write(f">- **Joint Sponsor(s)**: None\n")
        else:
            mdf.write(f">- **Joint Sponsor(s)**: {mySponsorsMd['Joint Sponsor']}\n")
        
        # Dates and Status
        mdf.write(f">- **Introduced Date**: {calpa.convertStrToDate(myBill['history'][0]['date'])}\n")
        mdf.write(f">- **Bill Status**: {legiscan.statusType[myBill['status']]}\n")
        mdf.write(f">- **Session Status**: {mySessionStatus}\n")
        mdf.write(f">- **Status Date**: {calpa.convertStrToDate(myBill['status_date'])}\n")
        mdf.write(f">- **Last Action**: {myBillAction}\n")
        mdf.write(f">- **Last Action Date**: {calpa.convertStrToDate(myBill['history'][-1]['date'])}\n")
        if chaptered:
            mdf.write(f">- **Chaptered**: {chaptered}\n")
            mdf.write(f">- **Chapter No**: {chapterNo}\n")
            mdf.write(f">- **Chapter Year**: {chapterYear}\n")
        else:
            mdf.write(f">- **Chaptered**: {chaptered}\n")
                    
        # Legiscan IDs
        mdf.write(f">- **LegiScan Bill ID**: {myBill['bill_id']}\n")
        mdf.write(f">- **LegiScan Bill Hash**: {myBill['change_hash']}\n")
        mdf.write(f">- **LegiScan Session ID**: {myBill['session_id']}\n")
        
        # Bill Links
        mdf.write(f">- **Bill Links**: ")
        mdf.write(f"[LegiScan]({myBill['url']}), ")
        mdf.write(f"[State Main]({myLinks['main']}), ")
        mdf.write(f"[State Text]({myLinks['text']}), ")
        mdf.write(f"[State Votes]({myLinks['votes']}), ")
        mdf.write(f"[State History]({myLinks['history']}), ")
        mdf.write(f"[State Analysis]({myLinks['analysis']}), ")
        mdf.write(f"[State Today's Law]({myLinks['todaysLaw']}), ")
        mdf.write(f"[State Compare]({myLinks['compare']}), ")
        mdf.write(f"[State Status]({myLinks['status']})\n")
        mdf.write(f">- **Obsidian PDF Link**: [[Documents/CA Legislative Bills/{billPeriod}/{billId}.pdf]]\n")
        mdf.write(f">- **Related**: [[Artificial Intelligence]], [[California Government]]\n\n")
        
        # Citation Info Box
        #~~~~~~~~~~~~~~~~~~
        mdf.write(f">[!cite] **{billId} Citation**\n")
        mdf.write(f"> {myBillAlias1}: {myBill['description']}, ")
        mdf.write(f"{legiscan.billCode[myBillCode]} {billId}, ")
        mdf.write(f"California Legislature, {billPeriod} {myBill['session']['session_tag']}. ")
        mdf.write(f"{legiscan.statusType[myBill['status']]} ")
        mdf.write(f"{legiscan.billType[int(myBill['bill_type_id'])]['type']}. ")
        if chaptered:
            mdf.write(f"{myBillAction} ")
        else:
            mdf.write(f"{mySessionStatus} ")
        mdf.write(f"({myBillYear}). ")
        mdf.write(f"{myLinks['main']}\n\n\n")
                
        # Write the bill Notes
        mdf.write(f"{aiNotes}\n\n")  
        
        # Webpage (iframe)
        #~~~~~~~~~~~~~~~~~
        
        mdf.write(f"## State Webpage\n\n")
        mdf.write(f"""<iframe src="{myLinks['main']}" allow="fullscreen" allowfullscreen="" style="height: 100%;width:100%;aspect-ratio: 16/ 10;"</iframe>\n""")
        
        mdf.write("\n")
    
    # Variables if the obsidianSync is True
    if obsidianSync:
        # Get the obsidian location for the markdown file
        obsidianPath = os.path.join(prjDirs["pathObsidian"], "AI Bills", billPeriod)
        
        # Copy the markdown file to the Obsidian vault
        destFile = os.path.join(obsidianPath, f"{billId}.md")
        with open(mdFile, 'r') as src:
            with open(destFile, 'w') as dest:
                dest.write(src.read())

    #~~~~~~~~~~~~~~~~~~~~~~~~~
    # End of the markdown file
    #~~~~~~~~~~~~~~~~~~~~~~~~~


In [None]:
for key, value in aiBills.items():
    print(f"{key} Legislative Session:")
    for billId, bill in value.items():
        aiBillMarkdown(key, billId, obsidianSync = True)
        print(f"- {billId}: {bill['title']}")

In [None]:
for key, value in aiBills.items():
    print(f"{key} Legislative Session:")
    for billId, bill in value.items():
        legiscan.aiBillMarkdown(key, billId, obsidianSync = True)
        print(f"- {billId}: {bill['title']}")