In [1]:
#!pip install langchain langchain_community langchain_chroma pypdf ollama

In [2]:
import os
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

print("All libraries imported successfully!")

All libraries imported successfully!


# CONSTANTS

In [3]:
DATA_PATH = "documents"
CHROMA_PATH = "chroma"

# LOAD DOCUMENTS

In [4]:
loader = DirectoryLoader(DATA_PATH, glob="*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()
if not documents:
    print("No PDF documents found. Please add your notes to the 'documents' folder.")
else:
    print(f"Loaded {len(documents)} document(s).")


Loaded 120 document(s).


In [5]:
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader


DATA_PATH = "documents"

loader = DirectoryLoader(DATA_PATH, glob="*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()


print("--- List of Loaded Files ---")
for doc in documents:
    print(doc.metadata['source'])
print("--------------------------")
print(f"Total files found: {len(documents)}")

--- List of Loaded Files ---
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R17A0534).pdf
documents\MACHINE LEARNING(R

# SPILT DOCUMENTS INTO SMALLER CHUNKS , SO THAT IT CAN BE MANAGED EASILY

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
print(f"Split documents into {len(chunks)} chunks.")

Split documents into 337 chunks.


# CREATING VECTOR DATABASE(CHROMA)

In [7]:
def ingest_documents():
    print("Creating embeddings and storing in Chroma DB... (This may take a moment)")
    embeddings = OllamaEmbeddings(model="llama3")
    db = Chroma.from_documents(
        documents=chunks, 
        embedding=embeddings, 
        persist_directory=CHROMA_PATH
    )
    print(f"Successfully saved {len(chunks)} chunks to Chroma DB at '{CHROMA_PATH}'.")


# COMPLETE THE INGESTION PROCESS

In [7]:
ingest_documents()

Successfully saved 337 chunks to Chroma DB at 'chroma'.


In [13]:
CHROMA_PATH = "chroma"

In [10]:

def query_rag(question_text):
    print("Preparing to query the knowledge base...")

  
    embeddings = OllamaEmbeddings(model='llama3')
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)

    retriever = db.as_retriever()


    prompt_template = """
    Answer the question based only on the following context:

    {context}

    ---

    Answer the question based on the above context: {question}
    """
    prompt = ChatPromptTemplate.from_template(prompt_template)

 
    model = Ollama(model="llama3")

  
    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )

    
    print("Querying the AI...")
    response = chain.invoke(question_text)
    return response

In [11]:

my_question = "What is Machine learning?" 


answer = query_rag(my_question)


print("\n--- AI Answer ---\n")
print(answer)

Preparing to query the knowledge base...
Querying the AI...


  model = Ollama(model="llama3")



--- AI Answer ---

There is no direct mention of what Machine Learning is in the provided context. However, it does mention "Classification and regression tree tutorials" and "What is a CART in Machine Learning?" which suggests that CART (Classification and Regression Tree) is a topic related to Machine Learning.


In [12]:
import os
print(os.getcwd())

C:\Users\Amit\MINORPROJECT
