In [1]:
# TODO: move all of this into the ingestion script

In [2]:
%pip install -qU openai
%pip install -qU "psycopg[binary]"

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
import sys
sys.path.insert(0, '..')

In [4]:
# Establish a connection to the muni database

from psycopg import connect

def connection():
    return connect(
        dbname="regrag",
        host="localhost",
        port="5432",
        autocommit=True
    )

In [5]:
# Go through rows in the muni database and identify definitions

from muni.llm import definition, analyze_context

sql_select = """
    SELECT  id,
        L1_ref, L1_heading,
        L2_ref, L2_heading,
        L3_ref, L3_heading,
        L4_ref, L4_heading,
        text
    FROM muni;
    """

sql_unique = """
    BEGIN
        IF NOT EXISTS (
            SELECT FROM pg_constraint
            WHERE conname = 'unique_associations')
            AND   conrelid = 'muni_associations'::regclass
        ) 
        THEN
            ALTER TABLE muni_associations
            ADD CONSTRAINT unique_associations UNIQUE (jurisdiction, association, left_id, right_id);
        END IF;
    END;
    """

sql_assoc = """
    INSERT INTO muni_associations (jurisdiction, association, left_id, right_id)
    VALUES (%s, %s, %s, %s)
    ON CONFLICT (jurisdiction, association, left_id, right_id) DO NOTHING;
    """

def scope_map(scope):
    """For a given scope, what are the columns in muni that need to match?"""
    table = {'global': ['jurisdiction'],
             'title': ['jurisdiction', 'L1_ref'],
             'chapter': ['jurisdiction', 'L1_ref', 'L2_ref'],
             'article': ['jurisdiction', 'L1_ref', 'L2_ref', 'L3_ref'],
             'section': ['jurisdiction', 'L1_ref', 'L2_ref', 'L3_ref', 'L4_ref']
             }
    if scope not in table.keys():
        return None
    return table[scope]

In [14]:

def set_defs(conn, id_, scope, association):
    """Set associations with a row in muni with all rows matching the scope.
    Args:
        conn: a connection to the database
        id_: the id of the row to associate
        scope: the scope of the association (e.g. 'title', 'chapter', 'article', 'section')
        association: the type of association (e.g. 'definition')
    """
    with conn.cursor() as cursor:
        # TODO: this should be done as a single query
        # get the jurisdiction and the references
        cursor.execute(f"SELECT jurisdiction, L1_ref, L2_ref, L3_ref, L4_ref FROM muni WHERE id = {id_}")
        jurisdiction, L1_ref, L2_ref, L3_ref, L4_ref = cursor.fetchone()
        # get the columns that need to match
        columns = scope_map(scope)
        if not columns:
            return
        # get the rows that match the scope
        match_str = ' AND '.join([f"{col} = '{val}'" for col, val in zip(columns, [jurisdiction, L1_ref, L2_ref, L3_ref, L4_ref])])
        cursor.execute(f"SELECT id FROM muni WHERE {match_str} AND id != {id_}")
        rows = cursor.fetchall()
        # set the associations
        for row in rows:
            cursor.execute(sql_assoc, (jurisdiction, association, id_, row[0]))

def find_defs(conn):
    with conn.cursor() as cursor:
        cursor.execute(sql_select)
        rows = cursor.fetchall()
        for row in rows:
            id_, L1_ref, L1_heading, L2_ref, L2_heading, L3_ref, L3_heading, L4_ref, L4_heading, text = row
            heading_string = '; '.join([L1_heading, L2_heading, L3_heading, L4_heading])
            # this is simplest, but could extend to more complicated matches for interpretation of language, etc.
            is_definition = 'definition' in heading_string.lower()
            if is_definition:
                headings = {'title': L1_heading, 'chapter': L2_heading, 'article': L3_heading, 'section': L4_heading}
                r = definition(text, headings)
                if r:
                    print("* Setting associations")
                    print(f"  SCOPE: {r}\nHEADINGS: {heading_string}")
                    print("  --> %s ..." % text[:60].replace('\n', ' '))
                    set_defs(conn, id_, r, 'definition')
                    # call to function to set associations                #cursor.execute(sql_insert", (id_, L1, L2, L3, L4, text))

In [15]:

def set_associations(conn, id_, scope, context_type):
    """Set associations with a row in muni with all rows matching the scope.
    Args:
        conn: a connection to the database
        id_: the id of the row to associate
        scope: the scope of the association (e.g. 'title', 'chapter', 'article', 'section')
        context_type: the type of association (e.g. 'definition')
    """
    with conn.cursor() as cursor:
        # get the jurisdiction and the references
        cursor.execute(f"SELECT jurisdiction, L1_ref, L2_ref, L3_ref, L4_ref FROM muni WHERE id = {id_}")
        jurisdiction, L1_ref, L2_ref, L3_ref, L4_ref = cursor.fetchone()
        # get the columns that need to match
        columns = scope_map(scope)
        if not columns:
            return
        # get the rows that match the scope
        match_str = ' AND '.join([f"{col} = '{val}'" for col, val in zip(columns, [jurisdiction, L1_ref, L2_ref, L3_ref, L4_ref])])
        cursor.execute(f"SELECT id FROM muni WHERE {match_str} AND id != {id_}")
        rows = cursor.fetchall()
        # set the associations
        for row in rows:
            cursor.execute(sql_assoc, (jurisdiction, context_type, id_, row[0]))

def find_associations(conn):
    allowed_types = ['penalty', 'definition', 'interpretation', 'date']
    with conn.cursor() as cursor:
        cursor.execute(sql_select)
        rows = cursor.fetchall()
        for row in rows:
            id_, L1_ref, L1_heading, L2_ref, L2_heading, L3_ref, L3_heading, L4_ref, L4_heading, text = row
            headings = {'title': L1_heading, 'chapter': L2_heading, 'article': L3_heading, 'section': L4_heading}
            r = analyze_context(text, headings, model='gpt-4')
            if r:
                context_type, scope = r
                if context_type in allowed_types:
                    print(f"* Setting associations for id {id_}")
                    print(f"  Context type: {context_type}; Scope: {scope}")
                    print("  --> %s ..." % text[:80].replace('\n', ' '))
                    set_associations(conn, id_, scope, context_type)

In [16]:
with connection() as conn:
    find_associations(conn)

* Setting associations for id {id_}
  Context type: date; Scope: global
  --> This ordinance, consisting of Titles 1 through 18, inclusive, shall be known as  ...
* Setting associations for id {id_}
  Context type: date; Scope: global
  --> The Municipal Code of Chicago shall take effect and be in force from and after i ...
* Setting associations for id {id_}
  Context type: definition; Scope: global
  --> Unless the context requires other interpretations, the following words and terms ...
* Setting associations for id {id_}
  Context type: interpretation; Scope: global
  --> Whenever any words in any section of this Code import the plural number, the sin ...
* Setting associations for id {id_}
  Context type: interpretation; Scope: global
  --> Reference to any section of this Code shall be understood to refer to and includ ...
* Setting associations for id {id_}
  Context type: penalty; Scope: global
  --> Whenever in any section of this Code the doing of any act or the omission to d

  warn(f'LLM FAILED TO CLASSIFY SCOPE. Response: {response} not in {list(headings.keys())}')
  warn(f'LLM FAILED TO CLASSIFY SCOPE. Response: {response} not in {list(headings.keys())}')


* Setting associations for id {id_}
  Context type: penalty; Scope: article
  --> ARTICLE I. ALL FOOD ESTABLISHMENTS (7-38-001 et seq.) ...
* Setting associations for id {id_}
  Context type: definition; Scope: global
  --> For the purposes of this ordinance, the following definitions shall apply:     ( ...
* Setting associations for id {id_}
  Context type: definition; Scope: chapter
  --> For purposes of this chapter the following definitions apply:     "Collection co ...
* Setting associations for id {id_}
  Context type: penalty; Scope: section
  --> Notwithstanding any other provision of this code to the contrary and in addition ...
* Setting associations for id {id_}
  Context type: penalty; Scope: section
  --> Notwithstanding any other provision of this code to the contrary and in addition ...
* Setting associations for id {id_}
  Context type: definition; Scope: chapter
  --> For purposes of this chapter:     The word "costs" includes all costs of the cit ...
* Setting associa

  warn(f'LLM FAILED TO CLASSIFY SCOPE. Response: {response} not in {list(headings.keys())}')


* Setting associations for id {id_}
  Context type: penalty; Scope: chapter
  --> (a)   Except as otherwise provided in this chapter, the Commissioner of Health s ...
* Setting associations for id {id_}
  Context type: penalty; Scope: chapter
  --> The penalty for violations of this chapter is set forth in Section 7-42-090.  (A ...
* Setting associations for id {id_}
  Context type: penalty; Scope: article
  --> ARTICLE I. GENERAL PROVISIONS (7-40-005 et seq.) ...
* Setting associations for id {id_}
  Context type: penalty; Scope: chapter
  --> The penalty for violations of this chapter is set forth in Section 7-42-090.  (A ...
* Setting associations for id {id_}
  Context type: penalty; Scope: section
  --> A $100.00 reinspection fee shall be assessed against the licensee of any establi ...
* Setting associations for id {id_}
  Context type: penalty; Scope: chapter
  --> Any person who shall violate any of the provisions of this chapter shall be fine ...
* Setting associations for id 

  warn(f'LLM FAILED TO CLASSIFY CONTEXT. Response: {response} not in {list(CONTEXT_TYPES.keys())}')


* Setting associations for id {id_}
  Context type: definition; Scope: section
  --> (a)   For the purposes of this section, the following definitions shall apply:   ...
* Setting associations for id {id_}
  Context type: penalty; Scope: section
  --> Whoever, without reasonable cause, does any one or more of the following:        ...
* Setting associations for id {id_}
  Context type: penalty; Scope: section
  --> (a)   Definition. For purposes of this section, "public transportation safety zo ...
* Setting associations for id {id_}
  Context type: penalty; Scope: section
  --> (a)   Definitions. For purposes of this section, the following definitions apply ...


  warn(f'LLM FAILED TO CLASSIFY CONTEXT. Response: {response} not in {list(CONTEXT_TYPES.keys())}')


* Setting associations for id {id_}
  Context type: penalty; Scope: chapter
  --> Any person violating any of the provisions of this chapter, where no other penal ...
* Setting associations for id {id_}
  Context type: penalty; Scope: section
  --> The license of any such person or employees thereof directly involved in the vio ...
* Setting associations for id {id_}
  Context type: definition; Scope: section
  --> A.   For the purposes of this section the following words shall be defined as fo ...
* Setting associations for id {id_}
  Context type: penalty; Scope: section
  --> Any person violating any of the provisions of Section 8-8-110 shall be deemed gu ...
* Setting associations for id {id_}
  Context type: penalty; Scope: chapter
  --> Any person violating any of the provisions of this chapter, where no other penal ...
* Setting associations for id {id_}
  Context type: penalty; Scope: chapter
  --> Any person violating any provision of this chapter, where no other penalty is sp

In [None]:
# Create associations between definitions and the rows in the muni database

In [None]:
# Pull out definitions along with their associated rows

