#### Environment

In [None]:
# install stuff
! pip install -U langchain_community tiktoken langchain-openai langchain-cohere langchainhub langchain langgraph

In [4]:
# load environment
from dotenv import load_dotenv
import os

if os.path.exists('ws.env'):
    load_dotenv('ws.env', override=True)

    # Models
    #LLM = 'gpt-4o'
    LLM = 'gpt3.5-turbo'
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

    # LangSmith tracing
    LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

#### Index
My index is just going to be embeddings of the ordinance titles so skip for now

In [None]:
### Build Index

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

### from langchain_cohere import CohereEmbeddings

# Set embeddings
embd = OpenAIEmbeddings()

# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorstore
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=embd,
)
retriever = vectorstore.as_retriever()

#### LLMs

In [7]:
### Router

from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""

    datasource: Literal["vectorstore", "SQL_database"] = Field(
        ...,
        description="Given a user question choose to route it to a vectorstore or a SQL database.",
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
structured_llm_router = llm.with_structured_output(RouteQuery)

# Prompt
system = """
You are an expert at routing a user legal question to a vectorstore or SQL database.
The laws are ordinances passed in San Francisco in 2023.

The SQL database has the following schema:
- Table `legislation`:
  - Columns: id, file_number, type, introduced, on_agenda, enactment_date, name, status, in_control, final_action, enactment_number, title, sponsors, related_files, url
- Table `history`:
  - Columns: id, legislation_id, date, ver, action_by, action, result

Here are example ordinances:
[ordinance1]
file_number=231142
type=Ordinance
introduced=10/30/2023	
on_agenda=
enactment_date=
name=Planning Code - Housing Production
status=Killed
in_control=Land Use and Transportation Committee
final_action=12/5/2023
enactment_number=
title=Ordinance amending the Planning Code to encourage housing production by 1) exempting, under certain conditions, specified housing projects from the Conditional Use requirement of Section 317, in areas outside of Priority Equity Geographies, which are identified in the Housing Element as areas or neighborhoods with a high density of vulnerable populations 2) restoring the ownership eligibility requirement for density exceptions in residential districts, for lots that have two existing dwelling units or more; 3) sunsetting the Conditional Use requirements established by the Corona Heights Large Residence and the Central Neighborhoods Large Residence Special Use Districts at the end of 2024, and thereafter limiting the size of any Dwelling Units resulting from residential development in those Special Use Districts to 3,000 square feet of Gross Floor Area; affirming the Planning Department’s determination under the California Environmental Quality Act; and making public necessity, convenience, and welfare findings under Planning Code, Section 302, and findings of consistency with the General Plan and the eight priority policies of Planning Code, Section 101.1.
sponsors=Mayor, Joel Engardio
related_files=231258, 230446, 231219
url=
[/ordinance1]

[ordinance2]
file_number=230853
type=Ordinance
introduced=7/25/2023
on_agenda=
enactment_date=11/3/2023
name=Park Code - John F. Shelley Drive - Road Closure
status=Passed
in_control=Clerk of the Board
final_action=11/3/2023
enactment_number=222-23
title=Ordinance amending the Park Code to restrict private vehicles from a portion of John F. Shelley Drive, between the Upper Reservoir Parking Lot and Mansell Street, in McLaren Park.
sponsors=Mayor, Shamann Walton, Myrna Melgar, Dean Preston, Hillary Ronen, Ahsha Safai
related_files=
url=
[/ordinance2]

The vectorstore contains ordinance titles which describe what the ordinance does.

Use the following guidelines to route the query:
1. Route to SQL Database for factual questions that can be directly answered from the database schema.
   - Example: "Who sponsored the most legislation in 2023?"
   - Example: "When was Ordinance XYZ enacted?"

2. Route to Vectorstore for questions that require understanding the context or details from the ordinance titles.
   - Example: "Which laws changed housing policy?"
   - Example: "What ordinances were introduced to improve public transportation?"

Given the user question, determine the most relevant datasource.
"""

route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

question_router = route_prompt | structured_llm_router

print(question_router.invoke({"question": "Who was the primary sponsor on the most legislation in 2023?"}))
print(question_router.invoke({"question": "Which laws changed housing policy?"}))
print(question_router.invoke({"question": "How many laws affected housing policy?"}))
print(question_router.invoke({"question": "Who introduced the most legislation?"}))
print(question_router.invoke({"question": "Who missed the most votes?"}))
print(question_router.invoke({"question": "How long does law take to pass on average?"}))
print(question_router.invoke({"question": "How many ordinances failed to pass?"}))
print(question_router.invoke({"question": "How often is the Police Code changed?"}))

datasource='SQL_database'
datasource='vectorstore'
datasource='vectorstore'
datasource='SQL_database'
datasource='SQL_database'
datasource='SQL_database'
datasource='SQL_database'
datasource='vectorstore'
