In [26]:
#pip install langchain openai faiss-cpu chromadb tiktoken sentence-transformers jupyter
#pip install -U langchain-community


In [27]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load your sample document
loader = TextLoader("Sample.txt")
docs = loader.load() # quite interesting here


# Split the document into manageable chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

# Preview the chunks
for i, chunk in enumerate(chunks):
    print(f"\nChunk {i+1}:\n{chunk.page_content}")



Chunk 1:
"The Eiffel Tower is located in Paris and was completed in 1889.\n"
    "It was designed by Gustave Eiffel and is 300 meters tall.\n"
    "Today, it is one of the most visited monuments in the world.\n"
    "The tower has three levels accessible to visitors.\n"
    "Originally, it was criticized by some of France's leading artists and intellectuals.\n"
    "Now, it's a global cultural icon of France and one of the most recognizable structures in the world.\n"

Chunk 2:
"It was built for the 1889 Exposition Universelle (World's Fair) to celebrate the 100th anniversary of the French Revolution.\n"
    "The tower is made of wrought iron and weighs about 10,000 tons.\n"


In [28]:
import os
from getpass import getpass
os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API Key: ")

In [29]:
# Embed the doc chunks:
from langchain.embeddings import OpenAIEmbeddings  # or HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
# now let us create the embeddings:
embeddings = OpenAIEmbeddings()

# create FAISS vector store from the document chunks
vectorstore = FAISS.from_documents(chunks, embeddings)

In [30]:
# then we want to create a retriever:
retriever = vectorstore.as_retriever()

In [31]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI  # or OpenAI, depending on use

qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(),               # Uses GPT-4 or GPT-3.5-turbo
    retriever=retriever,
    return_source_documents=True    # So you can see which chunk it used
)


In [35]:
# in this step lets run a query:
query = "Who designed the Eiffel Tower?"
result = qa_chain(query)

print("Answer", result["result"])

for i, doc in enumerate(result["source_documents"]):
    print(f"\nSource {i+1}:\n{doc.page_content}")

Answer The Eiffel Tower was designed by Gustave Eiffel.

Source 1:
"The Eiffel Tower is located in Paris and was completed in 1889.\n"
    "It was designed by Gustave Eiffel and is 300 meters tall.\n"
    "Today, it is one of the most visited monuments in the world.\n"
    "The tower has three levels accessible to visitors.\n"
    "Originally, it was criticized by some of France's leading artists and intellectuals.\n"
    "Now, it's a global cultural icon of France and one of the most recognizable structures in the world.\n"

Source 2:
"It was built for the 1889 Exposition Universelle (World's Fair) to celebrate the 100th anniversary of the French Revolution.\n"
    "The tower is made of wrought iron and weighs about 10,000 tons.\n"


In [36]:
# this is a test again