In [2]:
from langchain_community.document_loaders import TextLoader

# Instantiate the TextLoader with the file path
loader = TextLoader("speech.txt")

# Load the documents
documents = loader.load()

# Print the content of the first document (for demonstration)
if documents:
    print(documents[0].page_content)
    print(documents[0].metadata)

A Tryst With Destiny
Indian Prime Minister Jawaharlal Nehru's Inaugural Address
14 August 1947
Long years ago we made a tryst with destiny, and now the time comes when we shall redeem our
pledge, not wholly or in full measure, but very substantially. At the stroke of the midnight hour,
when the world sleeps, India will awake to life and freedom. A moment comes, which comes but
rarely in history, when we step out from the old to the new, when an age ends, and when the soul of
a nation, long suppressed, finds utterance. It is fitting that at this solemn moment, we take the
pledge of dedication to the service of India and her people and to the still larger cause of humanity.
At the dawn of history, India started on her unending quest, and trackless centuries are filled with
her striving and grandeur of her success and failures. Through good and ill fortune alike, she has
never lost sight of that quest, forgotten the ideals which gave her strength. We end today a period of
misfortunes and 

In [None]:
# # for pdf files
# from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader

# # Instantiate the DirectoryLoader, specifying the directory and the loader class for PDFs
# loader = DirectoryLoader("path/to/your/pdf_directory", glob="**/*.pdf", loader_cls=PyPDFLoader)

# # Load the documents
# documents = loader.load()

# # Print the number of loaded documents
# print(f"Loaded {len(documents)} documents.")

In [3]:
# chunk the data 
from langchain.text_splitter import RecursiveCharacterTextSplitter 
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # Adjust as needed
    chunk_overlap=50,  # Adjust as needed
    separators=["\n\n", "\n", ". ", " ", ""] # Customize separators if necessary
)

In [4]:
text_splitter

<langchain_text_splitters.character.RecursiveCharacterTextSplitter at 0x273d5f16030>

In [5]:
chunks = text_splitter.split_documents(documents)


In [7]:
len(chunks)

14

In [8]:
for i, chunk in enumerate(chunks):
    print(f"Chunk {i+1}:\n{chunk.page_content}\n")
    # You can also access metadata if needed
    # print(chunk.metadata) 


Chunk 1:
A Tryst With Destiny
Indian Prime Minister Jawaharlal Nehru's Inaugural Address
14 August 1947
Long years ago we made a tryst with destiny, and now the time comes when we shall redeem our
pledge, not wholly or in full measure, but very substantially. At the stroke of the midnight hour,
when the world sleeps, India will awake to life and freedom. A moment comes, which comes but
rarely in history, when we step out from the old to the new, when an age ends, and when the soul of

Chunk 2:
a nation, long suppressed, finds utterance. It is fitting that at this solemn moment, we take the
pledge of dedication to the service of India and her people and to the still larger cause of humanity.
At the dawn of history, India started on her unending quest, and trackless centuries are filled with
her striving and grandeur of her success and failures. Through good and ill fortune alike, she has

Chunk 3:
never lost sight of that quest, forgotten the ideals which gave her strength. We end today

In [9]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

# Assuming 'chunks' is your list of Document objects from the previous step (loading and splitting)

# Initialize OllamaEmbeddings
# Make sure the 'model' parameter matches the name of the Ollama model you pulled
embeddings = OllamaEmbeddings(model="nomic-embed-text") 

# Create the FAISS vector database
vectorstore = FAISS.from_documents(chunks, embeddings) 


  embeddings = OllamaEmbeddings(model="nomic-embed-text")


In [10]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x273f4f26c30>

In [11]:
db_path = "faiss_index_local_ollama" 
vectorstore.save_local(db_path)
print(f"FAISS vector store saved to: {db_path}")


FAISS vector store saved to: faiss_index_local_ollama


In [12]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings # Import again for loading

# Re-initialize the same embeddings you used to create the store
embeddings = OllamaEmbeddings(model="nomic-embed-text") 

new_vectorstore = FAISS.load_local(
    db_path, 
    embeddings, 
    allow_dangerous_deserialization=True # Necessary for loading the pickle file
)
print(f"FAISS vector store loaded from: {db_path}")


FAISS vector store loaded from: faiss_index_local_ollama


In [13]:
query_text = "What are the foundation of nation?"
k = 3  # Number of similar chunks to retrieve

results = new_vectorstore.similarity_search(query_text, k=k) 

print(f"\nQuery: {query_text}")
print(f"Top {k} similar chunks:")
for i, doc in enumerate(results):
    print(f"Chunk {i+1}:")
    print(doc.page_content)
    print(f"Metadata: {doc.metadata}\n")



Query: What are the foundation of nation?
Top 3 similar chunks:
Chunk 1:
a nation, long suppressed, finds utterance. It is fitting that at this solemn moment, we take the
pledge of dedication to the service of India and her people and to the still larger cause of humanity.
At the dawn of history, India started on her unending quest, and trackless centuries are filled with
her striving and grandeur of her success and failures. Through good and ill fortune alike, she has
Metadata: {'source': 'speech.txt'}

Chunk 2:
star never set and that hope never be betrayed!
We rejoice in that freedom, even though clouds surround us, and many of our people are sorrowstricken and difficult problems encompass us. But freedom brings responsibilities and burdens and
we have to face them in the spirit of a free and disciplined people.
On this day our first thoughts go to the architect of this freedom, the Father of our Nation, who,
Metadata: {'source': 'speech.txt'}

Chunk 3:
obligations. We cannot encou