# Components

## Chat model

In [1]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model="llama3.1")

## Embedding model

In [2]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="mxbai-embed-large")

## Vector store

In [3]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

# use a single GPU
res = faiss.StandardGpuResources()
# build a flat (CPU) index
cpu_index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))
# make it into a gpu index
gpu_index = faiss.index_cpu_to_gpu(res, 0, cpu_index)

vector_store = FAISS(
    embedding_function=embeddings,
    index=gpu_index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

# Indexing

## Loading documents

In [4]:
from langchain_community.document_loaders.csv_loader import CSVLoader

# combined_excel.xlsx needs to be converted to csv first
loader = CSVLoader(file_path="./combined_excel.csv", autodetect_encoding=True)
docs = loader.load()

print(f"Total number of documents: {len(docs)}")

Total number of documents: 3276


In [5]:
print(docs[0].page_content)

Resources: cases
body: Problems and Purpose  The  Citizens' Assembly  on Electoral Reform was a body created by the government of British Columbia, Canada. The Assembly was charged with investigating and recommending changes to improve the electoral system of the province. The body was composed of 160 citizens selected at random from throughout the province. These members met approximately every other weekend for one year to deliberate about alternative voting arrangements. After issuing their recommendations, the question would be put to the electorate-at-large in a  referendum  held concurrently with the 2005 provincial election.  Background History and Context  In 2004, 160 randomly selected residents of the province of British Columbia, Canada, participated in a bold and innovative experiment in deliberative democracy. They formed a Citizens’ Assembly whose mandate was to analyze the electoral system and, if warranted, design a new electoral law for the province. For the first time

## Splitting documents

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split documents into {len(all_splits)} sub-documents.")

Split documents into 20532 sub-documents.


## Storing documents

In [7]:
document_ids = vector_store.add_documents(documents=all_splits)

print(document_ids[:3])

['e2e8be83-071c-4bf7-84e6-6b9c1b6cb31d', '6ef341cd-4826-4f79-ab08-8bab7ab6e1a8', '22997f4c-accf-4f60-b367-268c0d8535d3']


## Saving vectors

In [14]:
# convert the index from gpu-based to cpu-based just for serialization
vector_store.index = faiss.index_gpu_to_cpu(vector_store.index)

vector_store.save_local("faiss_index")

## Loading vectors

In [15]:
vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

# convert the index from cpu-based back to gpu-based for accelerated search
vector_store.index = faiss.index_cpu_to_gpu(res, 0, vector_store.index)

# Retrieval and Generation

## Retriever

In [8]:
retriever = vector_store.as_retriever()

## Prompt

In [9]:
from langchain.prompts import PromptTemplate

# Define the prompt template for the LLM
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following documents to answer the question.
    If you don't know the answer, just say that you don't know.
    Question: {question}
    Documents: {documents}
    Answer:
    """,
    input_variables=["question", "documents"],
)

## Chain

In [10]:
from langchain_core.output_parsers import StrOutputParser

# Create a chain combining the prompt template and LLM
rag_chain = prompt | llm | StrOutputParser()

## Integration

In [11]:
# Define the RAG application class
class RAGApplication:
    def __init__(self, retriever, rag_chain):
        self.retriever = retriever
        self.rag_chain = rag_chain
    def run(self, question):
        # Retrieve relevant documents
        documents = self.retriever.invoke(question)
        # Extract content from retrieved documents
        doc_texts = "\\n".join([doc.page_content for doc in documents])
        for i in range(len(documents)):
            print(f"Document {i}: {documents[i]}")
        # Get the answer from the language model
        answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
        return answer

## Test

In [12]:
# Initialize the RAG application
rag_application = RAGApplication(retriever, rag_chain)

In [13]:
# Example usage 1
question = "What participation methods are used in the 2015 Metro Vancouver Transportation and Transit Plebiscite?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Document 0: page_content='body: Comparative Perspectives on Planning History and Futures [PLAN 500]School of Community and Regional Planning | UBCPARTICIPATORY PLANNING ANALYSISAaron Li, Erin Grace, Charlotte Lemieux, Jamie Tseng, Emily (Young Eun) Park, Brandon ChowIntroductionThe 2015 Metro Vancouver Transportation and Transit Plebiscite was a significant event in the region's political history. It arose from a campaign promise by the British Columbia Liberal Party led by Premier Christy Clark. The decision to hold a public vote on transportation funding was an unusual case in the context of Canadian governance, as major public infrastructure projects are seldom decided by a plebiscite or referendum. This move was seen as controversial, generating debate over the appropriateness of placing complex infrastructure planning in the hands of the general public.This plebiscite is unique in the history of public participation and will be critically analyzed using Fung’s Democracy Cube (2006

In [16]:
# Example usage 2
question = "What are the purposes of the 2015 Metro Vancouver Transportation and Transit Plebiscite event?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Document 0: page_content='body: Comparative Perspectives on Planning History and Futures [PLAN 500]School of Community and Regional Planning | UBCPARTICIPATORY PLANNING ANALYSISAaron Li, Erin Grace, Charlotte Lemieux, Jamie Tseng, Emily (Young Eun) Park, Brandon ChowIntroductionThe 2015 Metro Vancouver Transportation and Transit Plebiscite was a significant event in the region's political history. It arose from a campaign promise by the British Columbia Liberal Party led by Premier Christy Clark. The decision to hold a public vote on transportation funding was an unusual case in the context of Canadian governance, as major public infrastructure projects are seldom decided by a plebiscite or referendum. This move was seen as controversial, generating debate over the appropriateness of placing complex infrastructure planning in the hands of the general public.This plebiscite is unique in the history of public participation and will be critically analyzed using Fung’s Democracy Cube (2006

In [17]:
# Example usage 3
question = "What are the outcomes of the 2015 Metro Vancouver Transportation and Transit Plebiscite event?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Document 0: page_content='body: Comparative Perspectives on Planning History and Futures [PLAN 500]School of Community and Regional Planning | UBCPARTICIPATORY PLANNING ANALYSISAaron Li, Erin Grace, Charlotte Lemieux, Jamie Tseng, Emily (Young Eun) Park, Brandon ChowIntroductionThe 2015 Metro Vancouver Transportation and Transit Plebiscite was a significant event in the region's political history. It arose from a campaign promise by the British Columbia Liberal Party led by Premier Christy Clark. The decision to hold a public vote on transportation funding was an unusual case in the context of Canadian governance, as major public infrastructure projects are seldom decided by a plebiscite or referendum. This move was seen as controversial, generating debate over the appropriateness of placing complex infrastructure planning in the hands of the general public.This plebiscite is unique in the history of public participation and will be critically analyzed using Fung’s Democracy Cube (2006

In [18]:
# Example usage 4
question = "What participation methods are used in the Participatory Planning in the Sidewalk Toronto Project?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Document 0: page_content='from different segments of Toronto's population. The Resident Reference Panels allowed for structured feedback on key aspects of the project, including data governance, public space design, and environmental sustainability. Design Jams were creative sessions aimed at collaboratively brainstorming innovative solutions for the project. The Fellows Program was an initiative aimed at engaging young professionals to contribute to the project’s development process. Neighbourhood Meetings provided a forum for local residents to engage directly with project leaders, ensuring that those most affected by the project had their voices heard [3].Public ConsultationsPublic Consultations were a critical component of the Sidewalk Toronto project, designed to capture a broad spectrum of community input. These consultations were structured to include presentations by project leaders, followed by breakout sessions where participants could discuss specific topics in smaller group

In [19]:
# Example usage 5
question = "What are the purposes of the Participatory Planning in the Sidewalk Toronto Project?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Document 0: page_content='from different segments of Toronto's population. The Resident Reference Panels allowed for structured feedback on key aspects of the project, including data governance, public space design, and environmental sustainability. Design Jams were creative sessions aimed at collaboratively brainstorming innovative solutions for the project. The Fellows Program was an initiative aimed at engaging young professionals to contribute to the project’s development process. Neighbourhood Meetings provided a forum for local residents to engage directly with project leaders, ensuring that those most affected by the project had their voices heard [3].Public ConsultationsPublic Consultations were a critical component of the Sidewalk Toronto project, designed to capture a broad spectrum of community input. These consultations were structured to include presentations by project leaders, followed by breakout sessions where participants could discuss specific topics in smaller group

In [20]:
# Example usage 6
question = "What are the outcomes of the Participatory Planning in the Sidewalk Toronto Project?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Document 0: page_content='from different segments of Toronto's population. The Resident Reference Panels allowed for structured feedback on key aspects of the project, including data governance, public space design, and environmental sustainability. Design Jams were creative sessions aimed at collaboratively brainstorming innovative solutions for the project. The Fellows Program was an initiative aimed at engaging young professionals to contribute to the project’s development process. Neighbourhood Meetings provided a forum for local residents to engage directly with project leaders, ensuring that those most affected by the project had their voices heard [3].Public ConsultationsPublic Consultations were a critical component of the Sidewalk Toronto project, designed to capture a broad spectrum of community input. These consultations were structured to include presentations by project leaders, followed by breakout sessions where participants could discuss specific topics in smaller group