In [None]:
%pip install -qU openai marvin
%pip install -qU "psycopg[binary]"

## Getting ready

Begin by:
1. creating a new directory `data/<jurisdiction>` and populate with one or more
docx files containing the jurisdiction's municipal code
2. run `scripts/convert_docx.sh` to convert those files into a single text file
3. make a copy of `notebooks/template-workflow.ipynb` to `notebooks/<jurisdiction>.ipynb`
and continue processing in that notebook

In [16]:
import sys
sys.path.insert(0, '..')

## set up auto-reloading for development
%reload_ext autoreload
%autoreload 2

## Specify heading patterns

Replace the `jurisdiction_headings` dict with examples from your jurisdiction

In [17]:
from muni.code import Jurisdiction

In [None]:
from openai import OpenAI
client = OpenAI()

response = client.embeddings.create(
  input="It was the best of times, it was the worst of times.",
  model="text-embedding-3-small"
)

print(response.data[0].embedding)

In [19]:
heading_examples = {
    1: ["TITLE 1\nGENERAL PROVISION\n",
        "TITLE 2\nCITY GOVERNMENT AND ADMINISTRATION\n",
        "TITLE 3\nREVENUE AND FINANCE\n",
        ],
    2: ["CHAPTER 1-4\nCODE ADOPTION - ORGANIZATION\n",
        "CHAPTER 1-8\nCITY SEAL AND FLAG\n",
        "CHAPTER 1-12\nCITY EMBLEMS\n",
        ],
    3: ["1-4-010 Municipal Code of Chicago adopted.\n",
        "2-1-020 Code to be kept up-to-date.\n",
        "3-4-030 Official copy on file.\n",
        ],
}

In [20]:
from muni.code import infer_heading_patterns, infer_level_names

In [None]:
## Verify that the regular expressions matching outline levels look okay
heading_patterns = infer_heading_patterns(heading_examples)
for level, pattern in heading_patterns.items():
    print(f"{level}: r'{pattern.regex}'")

print()

## Verify that the names of the sections look okay
level_names = infer_level_names(heading_patterns)
for level, name in level_names.items():
    print(f"{level}: {name}")

## Specify the parameters of the jurisdiction and parse the code

In [22]:
place = Jurisdiction(
    name="Chicago Mini",
    title="Municipal Code of Chicago",
    patterns=heading_patterns,
    level_names=level_names,
    source_local="../data/chicago-mini/code.txt",
    source_url="https://www.chicago.gov/city/en/depts/doit/supp_info/municipal_code.html",
)

place.parse()
place.chunkify(1000)

In [None]:
## Verify that the distribution of paragraphs and chunks looks okay
place.summarize()

## Upload data to the database

In [24]:
from muni.code import upload

db = {'dbname': 'muni',
      'user': 'muni',
      'password': '',
      'host': 'localhost',
      'port': 5432}

upload(db, place)

In [25]:
from muni.code import upload_embeddings

upload_embeddings(db, place)

## Find associations among sections

In [26]:
from muni.code import find_associations

find_associations(db, place)
# TODO: changing DB schema

## Queries & reports

In [27]:
from muni.code import connection, hybrid_query #, report

queries = ['Does the municipal code contain provisions restricting the use of drug paraphernalia?']

## FIXME: changing DB schema
#with connection(db) as conn:
#    results = [hybrid_query(conn, query) for query in queries]
#    reports = [report(conn, query) for query in queries]

## Upload results to database

In [28]:
## TODO

In [29]:
from IPython.display import Markdown, display

def print_markdown(text):
    display(Markdown(text))

# Example usage
print_markdown("# This is a heading\n\nThis is some **bold** text and this is *italic* text.")

# This is a heading

This is some **bold** text and this is *italic* text.