In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import retrieval_qa

llm=ChatOpenAI()

cache_dir=LocalFileStore("./.cache/")

loader = UnstructuredFileLoader("./file_name.pdf")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=200,  
    chunk_overlap=50,
    splitter="\n"
    length_function=len,
)

docs = loader.load_and_split(text_splitter=splitter)

embedding = OpenAIEmbeddings()

cached_embeddings=CacheBackedEmbeddings.from_bytes_store(
    embedding, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

result = vectorstore.similarity_search("question")

chain = retrieval_qa.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import retrieval_qa
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough 

llm=ChatOpenAI(
    temperature = 0.1,
)

cache_dir=LocalFileStore("./.cache/")

loader = UnstructuredFileLoader("./file_name.pdf")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=200,  
    chunk_overlap=50,
    splitter="\n"
    length_function=len,
)

docs = loader.load_and_split(text_splitter=splitter)

embedding = OpenAIEmbeddings()

cached_embeddings=CacheBackedEmbeddings.from_bytes_store(
    embedding, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

result = vectorstore.similarity_search("question")

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant ..... \n\n{context}"),
    ("human", "{question}")

])

chain = {"context": retriever, "question":RunnablePassthrough(), } | prompt | llm 

chain.invoke("{question}")

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import retrieval_qa
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm=ChatOpenAI(
    temperature = 0.1,
)

cache_dir=LocalFileStore("./.cache/")

loader = UnstructuredFileLoader("./file_name.pdf")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=200,  
    chunk_overlap=50,
    splitter="\n"
    length_function=len,
)

docs = loader.load_and_split(text_splitter=splitter)

embedding = OpenAIEmbeddings()

cached_embeddings=CacheBackedEmbeddings.from_bytes_store(
    embedding, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

result = vectorstore.similarity_search("question")

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.format_messages([
    ("system", 
     """Use the portion of a long document to see if any of the text is 
     relevant to answer the question. Return any relevant text verbatim.
     -----
     {context}"""),
     ("human",{"question"})
])

map_docs_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(map_docs_chain.invokt({
        "context": doc.page_content,
        "question": question
    }).content for doc in documents)
#results = []
#for document in documents:
##result = map_docs_chain.invoke({
###"context": document.page_content,
###"question": question
##}).content
##results.append(result)
#results = "\n\n".join(results)
    

map_chain = {"documents":retriever, "question":RunnablePassthrough()} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant ..... \n\n{context}"),
    ("human", "{question}")

])

chain = {"context": map_chain, "question":RunnablePassthrough(), } | prompt | llm 

chain.invoke("{question}")