In [62]:
!pip install langchain_community[all]
!pip install llama-parse
!pip install qdrant_client
!pip install langchain_qdrant
!pip install ollama







Import Library



In [63]:
import os
import uuid
from langchain_community.document_loaders import PyPDFLoader, TextLoader, UnstructuredMarkdownLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient, models
from langchain_qdrant import FastEmbedSparse, Qdrant
from langchain_community.vectorstores import Qdrant
from langchain_core.documents import Document
import nest_asyncio
from llama_parse import LlamaParse
import ollama
from langchain_community.embeddings import OllamaEmbeddings

In [64]:
nest_asyncio.apply()

API

In [65]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [66]:
qdrant_url = os.environ["QDRANT_URL"]
qdrant_api_key = os.environ["QDRANT_API_KEY"]
collection_name = "Ollama-RAG"

qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)

In [67]:
# --- Vectorstore Initialization (Langchain Qdrant) ---
vectorstore = Qdrant(
    client=qdrant_client,
    collection_name=collection_name,
    embeddings=OllamaEmbeddings,  # We're handling embeddings separately
)

Hybrid Search Function

In [68]:
def hybrid_search(query: str, collection: str, limit: int = 5):
    dense_vector = ollama.embeddings(model='rjmalagon/gte-qwen2-1.5b-instruct-embed-f16', prompt=query)

    results = qdrant_client.search(
            collection_name=collection_name,
            query_vector=models.NamedVector(
                name="gte-qwen1.5",
                vector=dense_vector['embedding']
            ),
            limit=limit,
            search_params=models.SearchParams(
                hnsw_ef=128
            )
    )
    return results

In [69]:
hybrid_search("What is machine learning?", "Ollama-RAG")

[ScoredPoint(id=8, version=0, score=0.6242708, payload={'text': '1 Introduction\n1.1 What is Machine Learning\nMachine learning is a subﬁeld of computer science that is concerned with building algorithms\nwhich, to be useful, rely on a collection of examples of some phenomenon. These examples\ncan come from nature, be handcrafted by humans or generated by another algorithm.\nMachine learning can also be deﬁned as the process of solving a practical problem by 1)\ngathering a dataset, and 2) algorithmically building a statistical model based on that dataset.', 'metadata': {'source': 'C:/Users/shaoe/OneDrive/Desktop/AAI3008LLM/The_Hundred_page_Machine_Learning_Book_Andriy_Burkov_Z_Library.pdf', 'page': 2}}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=4, version=0, score=0.5859206, payload={'text': 'As you can see, just like artiﬁcial intelligence is not intelligence, machine learning is not\nlearning. However, machine learning is a universally recognized term that usua

In [70]:
def retrieve_documents(query):
    results = hybrid_search(query, collection=collection_name, limit=3)  # Use hybrid search here
    list = "Database Context: "
    for i in range(len(results)):
        list += str(results[i].payload['text'])
    return list

retrieve_documents("What is machine learning?")

'Database Context: 1 Introduction\n1.1 What is Machine Learning\nMachine learning is a subﬁeld of computer science that is concerned with building algorithms\nwhich, to be useful, rely on a collection of examples of some phenomenon. These examples\ncan come from nature, be handcrafted by humans or generated by another algorithm.\nMachine learning can also be deﬁned as the process of solving a practical problem by 1)\ngathering a dataset, and 2) algorithmically building a statistical model based on that dataset.As you can see, just like artiﬁcial intelligence is not intelligence, machine learning is not\nlearning. However, machine learning is a universally recognized term that usually refers\nto the science and engineering of building machines capable of doing various useful things\nwithout being explicitly programmed to do so. So, the word “learning” in the term is used\nby analogy with the learning in animals rather than literally.\nWho This Book is ForPreface\nLet’s start by telling 

RAG Chain

In [71]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser
from langchain_community.utilities import GoogleSearchAPIWrapper

In [72]:
def get_rag_chain(enable_search=False):
    """Creates and returns the RAG chain with hybrid search and optional Google Search."""

    # Initialize the LLM
    llm = Ollama(model="phi3:mini")

    # Create a prompt template
    template = """Answer the question based on the following context:
    {context}
    {online_context}

    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(template)

    # Function to retrieve relevant documents using hybrid search
    def retrieve_documents(query):
        results = hybrid_search(query, collection=collection_name, limit=3)  # Upgrade to hybrid search here
        list = "Database Context: "
        for i in range(len(results)):
            list += str(results[i].payload['text'])
        #print(f"{list}") # This one maybe fix to add metadata content
        return list 

    # Add online searching, defaulting to empty string if search is disabled
    def online_search(query):
        """Searches Google and returns a summary of the results."""
        search = GoogleSearchAPIWrapper()
        search_results = search.run(query)
        if not search_results:
            search_results = "No relevant information found online."  # Handle empty results
        #print(f"Online Search Results: {search_results}")  # Debugging
        return f"Online search results: {search_results}"
    
    def filler(query):
        #print("Online Search Switched off")
        return f"Online Search Switched Off"
    
    if enable_search:
        rag_chain = (
        {"context": retrieve_documents, ### This need 
         "question": RunnablePassthrough(),
         "online_context": online_search} ### This is correct
        | prompt
        | llm
        | StrOutputParser()
    )
    else:
        rag_chain = (
        {"context": retrieve_documents, ### This need 
         "question": RunnablePassthrough(),
         "online_context": filler} ### This is correct
        | prompt
        | llm
        | StrOutputParser()
    )

    return rag_chain

Search Tool

In [73]:
def agent_search(query):
    """Search for relevant information in the document and online."""
    rag_chain = get_rag_chain(enable_search=False)  # Enable online search
    response = rag_chain.invoke(query)
    return response

In [74]:
from langchain.agents import Tool

In [75]:
tools = [
    Tool(
        name="document_search",
        func=agent_search,
        description="useful for when you need to answer questions about the document and online."
    )
]

In [76]:
# --- Agent Initialization ---
from langchain_community.llms import Ollama
from langchain.agents import initialize_agent
from langchain.agents import AgentType

In [77]:
llm = Ollama(model="phi3:mini")
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [78]:
from langchain_google_community import GoogleSearchAPIWrapper

In [79]:
## WAKE UP
try:
    agent.run("Hello")
except:
    pass



[1m> Entering new AgentExecutor chain...[0m


In [80]:
import warnings
warnings.filterwarnings('ignore')

query = "What is machine learning?"
response = agent.run(query)
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Question: What is machine learning?
Thought: Machine learning seems like a broad topic; it would be beneficial to find reliable information about its definition and principles. I will use the document_search tool for this purpose.
Action: document_search
Action Input: "What is machine learning?"[0m
Observation: [36;1m[1;3mMachine Learning, often abbreviated to ML, is an essential branch of computer science that focuses on creating algorithms capable of making accurate predictions or taking appropriate actions using collected examples and datasets related to a specific phenomenon. These examples can be naturally occurring instances, human-designed data points, or outputs generated by other automated systems (referred to as "supervised learning"). The goal is for the machine's performance to improve on new input based solely on past experience without explicit programming after an initial setup phase that involves gathering