In [4]:
## Pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('flow_matching.pdf')
docs=loader.load()

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
documents[:5]

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-02-05T01:37:59+00:00', 'author': 'Ruiqi Feng, Tailin Wu, Chenglei Yu, Wenhao Deng, Peiyan Hu', 'keywords': 'Machine Learning, ICML', 'moddate': '2025-02-05T01:37:59+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': 'Proceedings of the International Conference on Machine Learning 2025', 'title': 'On the Guidance of Flow Matching', 'trapped': '/False', 'source': 'flow_matching.pdf', 'total_pages': 35, 'page': 0, 'page_label': '1'}, page_content='On the Guidance of Flow Matching\nRuiqi Feng 1 Tailin Wu1 Chenglei Yu1 Wenhao Deng1 Peiyan Hu 2\nAbstract\nFlow matching has shown state-of-the-art per-\nformance in various generative tasks, ranging\nfrom image generation to decision-making, where\nguided generation is pivotal. However, the guid-\nance of flow matching is more general than and\nthus substantially d

In [6]:
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm





In [7]:
## FAISS Vector Database
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(documents[:10], embedding_model)

In [8]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x222dc92e190>

In [9]:
query="Explain Flow matching "
result=db.similarity_search(query)
result[0].page_content

'2024; Chen & Lipman, 2024; Ben-Hamu et al., 2024). Flow\nmatching substantially extends diffusion models (Ho et al.,\n2020; Song et al., 2021). Most diffusion models leverage the\nscore matching process (Song & Ermon, 2019; Song et al.,\n2020; 2021), inherently limiting them to using the Gaussian\ndistribution as the source distribution to construct a special\nprobability path. Meanwhile, flow matching can learn the\nmapping between any source distribution and target distri-\nbutions (Lipman et al., 2023; 2024; Chen & Lipman, 2024;\nGat et al., 2024).\nGuiding flow matching models refers to steering the gen-\nerated samples toward desired properties, thus sampling\nfrom a distribution weighted with some objective function\n(Lu et al., 2023) or conditioned on class labels (Song et al.,\n2021). It is vital in many generative modeling applications\n(Song et al., 2023b; Zheng et al., 2023), but in contrary\nto well-studied guidance in diffusion models (Song et al.,'

In [10]:
from langchain_ollama import OllamaLLM

In [11]:
#ollama llama2
llm=OllamaLLM(model="qwen2.5:0.5b")


In [12]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [13]:
## Chain Introduction
## Create Stuff Docment Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)

In [14]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000222DC92E190>, search_kwargs={})

In [15]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [16]:
response=retrieval_chain.invoke({"input":"Flowmatching"})

In [18]:
response

{'input': 'Flowmatching',
 'context': [Document(id='753ffd30-2dfb-47f8-aa19-d1af052e79a7', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-02-05T01:37:59+00:00', 'author': 'Ruiqi Feng, Tailin Wu, Chenglei Yu, Wenhao Deng, Peiyan Hu', 'keywords': 'Machine Learning, ICML', 'moddate': '2025-02-05T01:37:59+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': 'Proceedings of the International Conference on Machine Learning 2025', 'title': 'On the Guidance of Flow Matching', 'trapped': '/False', 'source': 'flow_matching.pdf', 'total_pages': 35, 'page': 0, 'page_label': '1'}, page_content='but flow matching’s power of generating more flexible prob-\nability paths than diffusion models (Tong et al., 2024; Chen\n& Lipman, 2024; Gat et al., 2024) is restricted. There have\nbeen other controlled generation methods for flow matching,\nwith a notable stream following the paradi

In [19]:
response['answer']

"To answer this question, I'll follow the steps outlined in the task description:\n\n1. First, identify the context provided:\n   - The text discusses flow matching and its limitations.\n   - It mentions that diffusion models are more flexible than flow matching models (Tong et al., 2024; Chen & Lipman, 2024; Gat et al., 2024).\n\n2. Next, identify the main points in the text:\n   - Flow matching significantly extends diffusion models' abilities.\n   - Most diffusion models use score matching processes (Song & Ermon, 2019; Song et al., 2020; 2021).\n   - Flow matching is more versatile as it can learn to match any source distribution with target distributions.\n   - The text discusses guiding flow matching models and proposes a new training-free asymptotically exact guidance for flow matching.\n\n3. Look for specific examples or details that support the main points:\n   - There are no direct examples provided, but the text mentions the new training-free asymptotic exact guidance used i