In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["GOOGLE_API_KEY"] = os.getenv('GG_API_KEY')
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [7]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_postgres.vectorstores import PGVector
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
import uuid

# MultiVectorRetriever

In [5]:
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
collection_name = "summaries"
embeddings_model = HuggingFaceEmbeddings()

In [9]:
loader = TextLoader("./readme.md", encoding="utf-8")
docs = loader.load()
	
print("length of loaded docs: ", len(docs[0].page_content))
# Split the document
splitter = RecursiveCharacterTextSplitter.from_language(language=Language.MARKDOWN, chunk_size=2000, chunk_overlap=200)
chunks = splitter.split_documents(docs)
	
# The rest of your code remains the same, starting from:
prompt_text = "Summarize the following document:\n\n{doc}"
	
prompt = ChatPromptTemplate.from_template(prompt_text)
summarize_chain = {
    "doc": lambda x: x.page_content} | prompt | llm | StrOutputParser()
	
# batch the chain across the chunks
summaries = summarize_chain.batch(chunks, {"max_concurrency": 1})

length of loaded docs:  15157


In [12]:
# The vectorstore to use to index the child chunks
vectorstore = PGVector(
    embeddings=embeddings_model,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,
)
# The storage layer for the parent documents
store = InMemoryStore()
id_key = "doc_id"
	
# indexing the summaries in our vector store, whilst retaining the original 
# documents in our document store:
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    docstore=store,
    id_key=id_key,
)
	
# Changed from summaries to chunks since we need same length as docs
doc_ids = [str(uuid.uuid4()) for _ in chunks]

# Each summary is linked to the original document by the doc_id
summary_docs = [Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(summaries)
]

# Add the document summaries to the vector store for similarity search
retriever.vectorstore.add_documents(summary_docs)
	
# Store the original documents in the document store, linked to their summaries 
# via doc_ids
# This allows us to first search summaries efficiently, then fetch the full 
# docs when needed
retriever.docstore.mset(list(zip(doc_ids, chunks)))


In [16]:
# vector store retrieves the summaries
sub_docs = retriever.vectorstore.similarity_search(
    "name of the project", k=2)
for doc in sub_docs:
    print(doc.page_content)
    print('\n')

The document highlights that while the project successfully demonstrated the SR module's effectiveness, the overall pipeline's speed (FPS) was low due to unoptimized detection and recognition models (YOLOv5). Future work should focus on optimizing the entire pipeline for speed by: using lighter YOLOv5 versions, applying model optimization techniques like quantization and pruning, converting models to TensorRT, refactoring the codebase for better modularity and reduced bottlenecks, and implementing a multi-threaded/asynchronous pipeline to parallelize tasks and maximize GPU utilization.


This document acknowledges the contributions of two key resources to the project. First, it credits GitHub user **chequanghuy** for their work on license plate detection and OCR models based on the YOLOv5 framework, specifically referencing their repository "[chequanghuy/Character-Time-series-Matching](https://github.com/chequanghuy/Character-Time-series-Matching)". Second, it thanks AI assistants like

In [19]:
retrieved_docs = retriever.invoke("what is the methodology of the project")
for doc in retrieved_docs:
    print(doc.page_content)
    print('===========\n')

3.  **Optimizing the Full Pipeline for Speed (FPS)**
    This project's primary focus was to prove the efficacy of the SR module. Consequently, the detection and recognition models (YOLOv5) were not optimized for inference speed, resulting in a low end-to-end FPS. Future work could focus on performance optimization, including:
    * **Utilizing lighter model backbones:** Employing more lightweight YOLOv5 versions (e.g., YOLOv5n, YOLOv5s) for the detection and OCR tasks.
    * **Applying model optimization techniques:** Using methods such as quantization and pruning to reduce the computational complexity of the models.
    * **Converting models to TensorRT:** Migrating the optimized models to NVIDIA's TensorRT engine to maximize inference throughput on target GPU hardware.
    * **Codebase Refactoring:** Refactoring the core pipeline for improved modularity, reducing I/O bottlenecks, and enhancing overall code quality for better maintainability and extensibility.
    * **Implementing a 