In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter

loader = TextLoader(r"./chapter_3.txt")

splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50,
)

loader.load_and_split(text_splitter=splitter)



[Document(metadata={'source': './chapter_3.txt'}, page_content="Chapter 3\n\n'There are three stages in your reintegration,' said O'Brien. 'There is\nlearning, there is understanding, and there is acceptance. It is time for\nyou to enter upon the second stage.'"),
 Document(metadata={'source': './chapter_3.txt'}, page_content='As always, Winston was lying flat on his back. But of late his bonds were\nlooser. They still held him to the bed, but he could move his knees a'),
 Document(metadata={'source': './chapter_3.txt'}, page_content='little and could turn his head from side to side and raise his arms from\nthe elbow. The dial, also, had grown to be less of a terror. He could'),
 Document(metadata={'source': './chapter_3.txt'}, page_content="evade its pangs if he was quick-witted enough: it was chiefly when he\nshowed stupidity that O'Brien pulled the lever. Sometimes they got through"),
 Document(metadata={'source': './chapter_3.txt'}, page_content='a whole session without use of the 

In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

cache_dir=LocalFileStore("./.cache/")



loader = UnstructuredFileLoader("./chapter_3.txt")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=200,  
    chunk_overlap=50,
    splitter="\n",
    length_function=len,
)

docs = loader.load_and_split(text_splitter=splitter)

embedding = OpenAIEmbeddings()

cached_embeddings=CacheBackedEmbeddings.from_bytes_store(
    embedding, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)



TypeError: langchain_text_splitters.character.CharacterTextSplitter() got multiple values for keyword argument 'length_function'

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma


embedder = OpenAIEmbeddings()

vector = embedder.embed_query("Hi")
vector = embedder.embed_documents(["Hi","how are you"])

len(vector)

  embedder = OpenAIEmbeddings()


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory

# Designate variables
llm = ChatOpenAI(
    temperature=0.1,
    model_name="gpt-4o"
)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages= True,
)

loader = TextLoader(r"./files/chapter_3.txt")

splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=100,
)

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cache_dir = LocalFileStore("./cache/")

cache_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir    
)

vectorstore = FAISS.from_documents(docs, cache_embeddings)

retriever = vectorstore.as_retriever()

# List of docs generation
map_doc_prompt = ChatPromptTemplate.from_messages([
    ("system", """
    Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim
    ------------------
    {portion}
    """),
    ("human", "{question}"),
])
map_doc_chain = map_doc_prompt | llm

# Merge list of docs
def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(map_doc_chain.invoke({
        "portion": doc.page_content,
        "question": question
    }).content for doc in documents)

map_chain = {"documents": retriever, "question": RunnablePassthrough()} | RunnableLambda(map_docs)

# Final document | prompt | llm
final_prompt = ChatPromptTemplate.from_messages([
    ("system", """
    Give the following extracted parts of a long document and a question, create a final answer.
    If you don't know the answer, just say that you don't know. Don't try to make up the answer.
    ---------
    {context}
    """),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{question}")
])

def load_memory(input):
    return memory.load_memory_variables({})["chat_history"]

chain = {"context": map_chain, "question": RunnablePassthrough()} | RunnablePassthrough.assign(chat_history=load_memory) | final_prompt | llm

def invoke_chain(question):
    result=chain.invoke(question)
    memory.save_context(
        {"input": question},
        {"output": result.content}
    )
    print(result)

invoke_chain("Is Aaronson guilty?")
