In [None]:
from unstructured.partition.pdf import partition_pdf

# Returns a List[Element] present in the pages of the parsed pdf document
elements = partition_pdf("wa_rules.pdf")

In [None]:
import json

def extract(elements):
    key=None
    cache = []
    for e in elements:
        if e.text.startswith("RCW 46"):
            parts = e.text.split(" ",2)
            if len(parts)==3:
                if cache:
                    yield key, "\n".join(cache)
                    cache = []
                key = parts[0]+" "+parts[1]
        if key is not None:
            cache.append(e.text)
    if key is not None:
        yield key, "\n".join(cache)

with open("rules.json","w") as f:
    json.dump(dict(extract(elements)), f)

In [None]:
import json 

with open("rules.json","r") as f:
    data = json.load(f)

In [None]:
from tecton_gen_ai.api import VectorDB

vdb = VectorDB(
    "lancedb",
    embedding="openai/text-embedding-3-small",
    uri="/tmp/lancedb",
    table_name="wa_rules",
    mode="overwrite",
)

In [None]:
vdb.ingest(texts=list(data.values()), metadatas=list({"state":"wa", "rule_id":x, "rule_text":t} for x, t in data.items()))

In [None]:
vdb.search("left turn")

[{'rule_id': 'RCW 46.61.290',
  'rule_text': 'RCW 46.61.290 Required position and method of turning at\nintersections. The driver of a vehicle intending to turn shall do so as follows:\n(1) Right turns. Both the approach for a right turn and a right turn shall be made as close as practicable to the right-hand curb or edge of the roadway.\n(2) Left turns. The driver of a vehicle intending to turn left\nshall approach the turn in the extreme left-hand lane lawfully available to traffic moving in the direction of travel of the vehicle. Whenever practicable the left turn shall be made to the left of the center of the intersection and so as to leave the intersection or other location in the extreme left-hand lane lawfully available to traffic moving in the same direction as the vehicle on the roadway being entered.\n(3) Two-way left turn lanes. (a) The department of transportation and local authorities in\ntheir respective jurisdictions may designate a two-way left turn lane on a roadway. A

In [None]:
!rm -rf /tmp/lancedb/wa_rules.lance