In [None]:
%pip install -qU openai marvin
%pip install -qU "psycopg[binary]"

## Getting ready

Begin by:
1. creating a new directory `data/<jurisdiction>` and populate with one or more
docx files containing the jurisdiction's municipal code
2. run `scripts/convert_docx.sh` to convert those files into a single text file
3. make a copy of `notebooks/template-workflow.ipynb` to `notebooks/<jurisdiction>.ipynb`
and continue processing in that notebook

In [None]:
import sys
sys.path.insert(0, '..')

## set up auto-reloading for development
%reload_ext autoreload
%autoreload 2

## Specify heading patterns

Replace the `jurisdiction_headings` dict with examples from your jurisdiction

In [None]:
from muni.code import Level, Jurisdiction

In [None]:
heading_examples = {
    Level.H1: ["TITLE 1\nGENERAL PROVISION\n",
              "TITLE 2\nCITY GOVERNMENT AND ADMINISTRATION\n",
              "TITLE 3\nREVENUE AND FINANCE\n",
    ],
    Level.H2: ["CHAPTER 1-4\nCODE ADOPTION - ORGANIZATION\n",
              "CHAPTER 1-8\nCITY SEAL AND FLAG\n",
              "CHAPTER 1-12\nCITY EMBLEMS\n",
     ],
    Level.H3: ["1-4-010 Municipal Code of Chicago adopted.\n",
              "2-1-020 Code to be kept up-to-date.\n",
              "3-4-030 Official copy on file.\n",
      ],
}

In [None]:
from muni.code import infer_heading_patterns, infer_level_names

In [None]:
## Verify that the regular expressions matching outline levels look okay
heading_patterns = infer_heading_patterns(heading_examples)
for level, pattern in heading_patterns.items():
    print(f"{level.name}: r'{pattern.regex}'")

print()

## Verify that the names of the sections look okay
level_names = infer_level_names(heading_patterns)
for level, name in level_names.items():
    print(f"{level.name}: {name}")

## Specify the parameters of the jurisdiction and parse the code

In [None]:
from muni.code import StateMachineParser

place = Jurisdiction(
    name="Chicago Mini",
    patterns=heading_patterns,
    level_names=level_names,
    source_local="../data/chicago-mini/code.txt",
    source_url="https://www.chicago.gov/city/en/depts/doit/supp_info/municipal_code.html",
)

place.parser = StateMachineParser(document_name=place.name + " Code",
                                  heading_patterns=place.patterns)

In [None]:
## Verify that the outline looks okay
place.parser.summarize_matches(place.raw_text)

In [None]:
place.document = place.parser.parse(place.raw_text)

In [None]:
## Verify that the distribution of paragraphs looks okay
from muni.code import summarize_document
summarize_document(place.document)

In [None]:
from muni.code import chunkify_document, summarize_chunks
chunkify_document(place.document, 1000)
summarize_chunks(place.document)

## Upload data to the database

In [None]:
from muni.code import upload

db = {'dbname': 'muni',
      'user': 'muni',
      'password': 'muni',
      'host': 'localhost',
      'port': 5432}

upload(db, place)
# TODO: changing DB schema

## Find associations among sections

In [None]:
from muni.code import find_associations

find_associations(db, place)
# TODO: changing DB schema

## Queries & reports

In [None]:
from muni.code import connection, hybrid_query #, report

queries = ['Does the municipal code contain provisions restricting the use of drug paraphernalia?']

## FIXME: changing DB schema
#with connection(db) as conn:
#    results = [hybrid_query(conn, query) for query in queries]
#    reports = [report(conn, query) for query in queries]

## Upload results to database

In [None]:
## TODO