# This notebook can Retrieve and Generate information about Titles 8, 9 and 10 of the NYC Administrative Text.

In [1]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [1]:
#Import Dependencies
import pandas as pd
import chromadb
import json
import torch
from transformers import BartTokenizer, BartModel
import numpy as np

In [2]:
#Check the chromadb collection is accessible
client = chromadb.PersistentClient(path="./chromadb")

# List all collections in your ChromaDB instance
collections = client.list_collections()

# Print the collections to see if "df_810" exists
print("Available collections:", collections)

Available collections: ['df_810_bart']


In [3]:
#Check the query is embedded and the documents are being retrieved successfully
# Load BART model and tokenizer
model_name = "facebook/bart-large"  # You can change this to a different variant if needed
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartModel.from_pretrained(model_name)

# Specify the path where ChromaDB stores the collection
chroma_path = "./chromadb"  # You can modify this to any path you prefer
chroma_client = chromadb.PersistentClient(path=chroma_path)

# Assuming your collection is called "df_810_bart" (you can change it to the name you prefer)
collection = chroma_client.get_collection("df_810_bart")

def encode_text_with_bart(text):
    # Tokenize the text
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=1024)
    # Get the embeddings from the model
    with torch.no_grad():
        outputs = model(**inputs)
    # We take the last hidden state of the model (output from BART)
    embeddings = outputs.last_hidden_state.mean(dim=1)  # Mean pooling across tokens
    # Ensure the embedding is exactly 1024 dimensions
    if embeddings.shape[1] < 1024:
        # If smaller, pad with zeros
        padding = torch.zeros(1, 1024 - embeddings.shape[1])
        embeddings = torch.cat([embeddings, padding], dim=1)
    elif embeddings.shape[1] > 1024:
        # If larger, truncate
        embeddings = embeddings[:, :1024]
    return embeddings.squeeze().tolist()  # Convert tensor to list

# Now use this function for both storing and querying
query_text = "What are the restrictions on carrying guns in NYC?"
query_embedding = encode_text_with_bart(query_text)
print(f"Query embedding dimension: {len(query_embedding)}")

# Query the collection
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5,
    include=['documents', 'metadatas', 'distances']
)
print(results)

Query embedding dimension: 1024
{'ids': [['4045', '2369', '2441', '2442', '4189']], 'embeddings': None, 'documents': [['11. Permit. The permit for purchase and possession of rifles and shotguns issued by the commissioner.Who issues permits for the purchase and possession of rifles and shotguns in NYC?According to NYC Administrative Code Title 10, Section 10.301(11), the commissioner issues permits for the purchase and possession of rifles and shotguns.', 'b. Air pistols and air rifles; selling or possessing.Are there age restrictions for purchasing air pistols and air rifles in NYC?The provided excerpt from NYC Administrative Code Section 10.131(b)', 'h. Rifles and shotguns; carrying or possessing.Under what conditions can a person legally carry or possess rifles and shotguns in NYC?According to NYC Administrative Code Title 10, Section 10.131(h), a person may legally carry or possess rifles and shotguns if they have obtained the proper permits and licenses as required by local law. Ri

# RAG #1: Open Source RAG application embeded with Facebook/Bart and generated with Google/Flan

In [3]:
#This code runs the model by embedding the query using Facebook/Bart and generating an answer using Google/Flan
import chromadb
from transformers import BartTokenizer, BartModel, AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import torch.mps  # Ensures Metal is utilized on Mac
from langchain.prompts import PromptTemplate

token = "your_token_here"  # Use your hugging face token for authentication

# Define a prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
    You are an AI assistant specializing in NYC legal regulations. 
    Your task is to provide legally accurate and precise responses for a non-legal expert, 
    based **only** on the provided legal text. 

    **Do not make assumptions** or provide information outside the given context.

    ---
    
    ### Legal Text:
    {context}

    ### User Question:
    {question}

    ### Answer:
    Provide a clear and concise response based strictly on the provided legal text.
    """
)

# Initialize ChromaDB client and collection
chroma_client = chromadb.PersistentClient(path="./chromadb")  # Ensure this points to your ChromaDB path
collection = chroma_client.get_collection("df_810_bart")  # Use your collection name

# Load the embedding model (BART)
model_name = "facebook/bart-large"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartModel.from_pretrained(model_name)

# Load the smaller model (Flan-T5-small)
llm_name = "google/flan-t5-base"
llm_tokenizer = AutoTokenizer.from_pretrained(llm_name)

# Ensure the model runs efficiently on macOS using MPS
llm_model = AutoModelForSeq2SeqLM.from_pretrained(
    llm_name,
    torch_dtype=torch.float32,  # MPS prefers float32 over float16
    device_map="auto"  # Let the system decide the best device (MPS or CPU)
)

# Use Metal (MPS) if available
device = "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)  # Move the embedding model to the device
llm_model.to(device)  # Move the LLM model to the device

def embed_text(text):
    """Generate embeddings using BART."""
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()

def retrieve_docs(query, top_k=3):
    """Retrieve relevant documents from ChromaDB."""
    query_embedding = embed_text(query)
    results = collection.query(query_embeddings=[query_embedding], n_results=top_k)
    return results["documents"][0] if results["documents"] else []

def generate_answer(query):
    """Retrieve documents and use the LLM to generate an answer."""
    retrieved_docs = retrieve_docs(query)
    context = "\n".join(retrieved_docs) if retrieved_docs else "No relevant documents found."

    # Format the prompt
    prompt = prompt_template.format(context=context, question=query)

    # Tokenize input and send to the device (MPS/CPU)
    inputs = llm_tokenizer(prompt, return_tensors="pt").to(device)

    # Generate the response
    with torch.no_grad():
        output = llm_model.generate(**inputs, max_length=512)

    return llm_tokenizer.decode(output[0], skip_special_tokens=True)


Answer: Rifles and shotguns must be registered with the city, and the person must comply with all applicable storage, transportation, and usage regulations.


# RAG #1: QUERY THE OPEN SOURCE (GOOGLE/FLAN) GENERATED ANSWER HERE:

In [None]:
#Modify the query text
query = "What are the restrictions on carrying guns in NYC?"
answer = generate_answer(query)
print("Answer:", answer)

# RAG #2: This version uses Facebook/Bart for embedding and then Anthropic's Claude for generating an answer it requires an Anthropic api_key. Queries cost about 1¢.

In [5]:
import chromadb
from transformers import BartTokenizer, BartModel
import torch
import torch.mps  # Ensures Metal is utilized on Mac
from langchain.prompts import PromptTemplate
import anthropic

api_key = "anthropic_api_key"

# Define a prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an AI assistant specializing in NYC legal regulations. Your task is to provide legally accurate and precise responses for a non-legal expert, based **only** on the provided legal text. **Do not make assumptions** or provide information outside the given context.

Legal Text:
{context}

User Question:
{question}
"""
)

# Initialize ChromaDB client and collection
chroma_client = chromadb.PersistentClient(path="./chromadb")  # Ensure this points to your ChromaDB path
collection = chroma_client.get_collection("df_810_bart")  # Use your collection name

# Load the embedding model (BART)
model_name = "facebook/bart-large"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartModel.from_pretrained(model_name)

# Initialize Anthropic client for Claude
claude_client = anthropic.Anthropic(api_key)

# Use Metal (MPS) if available
device = "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)  # Move the embedding model to the device

def embed_text(text):
    """Generate embeddings using BART."""
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()

def retrieve_docs(query, top_k=3):
    """Retrieve relevant documents from ChromaDB."""
    query_embedding = embed_text(query)
    results = collection.query(query_embeddings=[query_embedding], n_results=top_k)
    return results["documents"][0] if results["documents"] else []

def generate_answer(query):
    """Retrieve documents and use Claude to generate an answer."""
    retrieved_docs = retrieve_docs(query)
    context = "\n".join(retrieved_docs) if retrieved_docs else "No relevant documents found."

    # Format the prompt
    formatted_prompt = prompt_template.format(context=context, question=query)

    # Generate the response using Claude
    response = claude_client.messages.create(
        model="claude-3-7-sonnet-20250219",  # Use the correct model name
        max_tokens=1024,
        system="You are an AI assistant specializing in NYC legal regulations.",
        messages=[
            {"role": "user", "content": formatted_prompt}
        ]
    )

    # Extract the assistant's response from the returned messages
    assistant_message = response.content[0].text

    return assistant_message

Answer: Based solely on the provided legal text, I can only share limited information about restrictions on carrying guns in NYC, specifically about rifles and shotguns:

According to NYC Administrative Code Title 10, Section 10.131(h), a person may legally carry or possess rifles and shotguns if they:
1. Have obtained the proper permits and licenses as required by local law
2. Have registered the rifles and shotguns with the city
3. Comply with all applicable storage, transportation, and usage regulations

The text also mentions that specific restrictions apply to certain locations such as schools, government buildings, and public gatherings, but does not provide details about these restrictions.

The legal text does not provide information about restrictions on carrying other types of firearms beyond rifles and shotguns, nor does it specify the exact nature of the storage, transportation, and usage regulations mentioned.


# RAG #2: QUERY THE ANTHROPIC GENERATED ANSWER HERE:

In [None]:
# Example Query
query = "What are the restrictions on carrying guns in NYC?"
answer = generate_answer(query)
print("Answer:", answer)