In [None]:
import os
import sys
import json
from typing import List, Tuple

from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
# from langchain_openai import ChatOpenAI # No longer using OpenAI
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools import DuckDuckGoSearchResults
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_openai import OpenAIEmbeddings # No longer using OpenAI embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFaceHub

# Load environment variables from a .env file
load_dotenv()

# Define files path (corrected relative path)
path = "Understanding_Climate_Change.pdf" # Assuming the PDF is in the same directory

# Helper function to load and encode PDF
def encode_pdf(pdf_path: str, chunk_size: int = 500, chunk_overlap: int = 50, embeddings_model_name: str = "all-MiniLM-L6-v2"):
    """Loads a PDF, splits it into chunks, and encodes it into a FAISS vector store."""
    try:
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
    except FileNotFoundError:
        print(f"Error: File not found at path: {pdf_path}")
        return None
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents)

    # Use Hugging Face Embeddings
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

    # Create a FAISS vector store
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore


#Initialize HuggingFaceHub language model
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv('HUGGINGFACEHUB_API_TOKEN')
llm = HuggingFaceHub(repo_id="google/gemma-7b-it", model_kwargs={"max_length": 1000, "temperature": 0.1})


# Initialize search tool
search = DuckDuckGoSearchResults()

# Define retrieval evaluator, knowledge refinement, and query rewriter llm chains
# Retrieval Evaluator
class RetrievalEvaluatorInput(BaseModel):
    relevance_score: float = Field(..., description="The relevance score of the document to the query. the score should be between 0 and 1.")

def retrieval_evaluator(query: str, document: str) -> float:
    prompt = PromptTemplate(
        input_variables=["query", "document"],
        template="On a scale from 0 to 1, how relevant is the following document to the query? Query: {query}\nDocument: {document}\nRelevance score:"
    )
    chain = prompt | llm | (lambda x: RetrievalEvaluatorInput(relevance_score=float(x))) #  Modified to work with HuggingFaceHub output
    input_variables = {"query": query, "document": document}
    try:
        result = chain.invoke(input_variables).relevance_score
        return float(result)
    except:
        return 0.5 # Return neutral value

# Knowledge Refinement
class KnowledgeRefinementInput(BaseModel):
    key_points: str = Field(..., description="The document to extract key information from.")

def knowledge_refinement(document: str) -> List[str]:
    prompt = PromptTemplate(
        input_variables=["document"],
        template="Extract the key information from the following document in bullet points:\n{document}\nKey points:"
    )
    chain = prompt | llm
    input_variables = {"document": document}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return [point.strip() for point in result.split('\n') if point.strip()]

# Web Search Query Rewriter
class QueryRewriterInput(BaseModel):
    query: str = Field(..., description="The query to rewrite.")

def rewrite_query(query: str) -> str:
    prompt = PromptTemplate(
        input_variables=["query"],
        template="Rewrite the following query to make it more suitable for a web search:\n{query}\nRewritten query:"
    )
    chain = prompt | llm
    input_variables = {"query": query}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return result.strip()

# Helper function to parse search results
def parse_search_results(results_string: str) -> List[Tuple[str, str]]:
    """
    Parse a JSON string of search results into a list of title-link tuples.

    Args:
        results_string (str): A JSON-formatted string containing search results.

    Returns:
        List[Tuple[str, str]]: A list of tuples, where each tuple contains the title and link of a search result.
                               If parsing fails, an empty list is returned.
    """
    try:
        # Attempt to parse the JSON string
        results = json.loads(results_string)
        # Extract and return the title and link from each result
        return [(result.get('title', 'Untitled'), result.get('link', '')) for result in results]
    except (json.JSONDecodeError, TypeError):  # Handle TypeError as well
        # Handle JSON decoding errors by returning an empty list
        print("Error parsing search results. Returning empty list.")
        return []

# Define sub functions for the CRAG process
def retrieve_documents(query: str, faiss_index: FAISS, k: int = 3) -> List[str]:
    """
    Retrieve documents based on a query using a FAISS index.

    Args:
        query (str): The query string to search for.
        faiss_index (FAISS): The FAISS index used for similarity search.
        k (int): The number of top documents to retrieve. Defaults to 3.

    Returns:
        List[str]: A list of the retrieved document contents.
    """
    docs = faiss_index.similarity_search(query, k=k)
    return [doc.page_content for doc in docs]

def evaluate_documents(query: str, documents: List[str]) -> List[float]:
    """
    Evaluate the relevance of documents based on a query.

    Args:
        query (str): The query string.
        documents (List[str]): A list of document contents to evaluate.

    Returns:
        List[float]: A list of relevance scores for each document.
    """
    return [retrieval_evaluator(query, doc) for doc in documents]

def perform_web_search(query: str) -> Tuple[List[str], List[Tuple[str, str]]]:
    """
    Perform a web search based on a query.

    Args:
        query (str): The query string to search for.

    Returns:
        Tuple[List[str], List[Tuple[str, str]]]:
            - A list of refined knowledge obtained from the web search.
            - A list of tuples containing titles and links of the sources.
    """
    rewritten_query = rewrite_query(query)
    web_results = search.run(rewritten_query)
    web_knowledge = knowledge_refinement(web_results)
    sources = parse_search_results(web_results)
    return web_knowledge, sources

def generate_response(query: str, knowledge: str, sources: List[Tuple[str, str]]) -> str:
    """
    Generate a response to a query using knowledge and sources.

    Args:
        query (str): The query string.
        knowledge (str): The refined knowledge to use in the response.
        sources (List[Tuple[str, str]]): A list of tuples containing titles and links of the sources.

    Returns:
        str: The generated response.
    """
    response_prompt = PromptTemplate(
        input_variables=["query", "knowledge", "sources"],
        template="Based on the following knowledge, answer the query. Include the sources with their links (if available) at the end of your answer:\nQuery: {query}\nKnowledge: {knowledge}\nSources: {sources}\nAnswer:"
    )
    input_variables = {
        "query": query,
        "knowledge": knowledge,
        "sources": "\n".join([f"{title}: {link}" if link else title for title, link in sources])
    }
    response_chain = response_prompt | llm
    return response_chain.invoke(input_variables)

# CRAG process
def crag_process(query: str, faiss_index: FAISS) -> str:
    """
    Process a query by retrieving, evaluating, and using documents or performing a web search to generate a response.

    Args:
        query (str): The query string to process.
        faiss_index (FAISS): The FAISS index used for document retrieval.

    Returns:
        str: The generated response based on the query.
    """
    print(f"\nProcessing query: {query}")

    # Retrieve and evaluate documents
    if faiss_index is None:
        return "Error: FAISS index is not initialized. Please check if the PDF was loaded successfully."

    retrieved_docs = retrieve_documents(query, faiss_index)
    eval_scores = evaluate_documents(query, retrieved_docs)

    print(f"\nRetrieved {len(retrieved_docs)} documents")
    print(f"Evaluation scores: {eval_scores}")

    # Determine action based on evaluation scores
    if eval_scores: # Ensure eval_scores is not empty
        max_score = max(eval_scores)
    else:
        max_score = 0 # Default to 0 if no documents were retrieved.

    sources = []

    if max_score > 0.7:
        print("\nAction: Correct - Using retrieved document")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        final_knowledge = best_doc
        sources.append(("Retrieved document", ""))
    elif max_score < 0.3:
        print("\nAction: Incorrect - Performing web search")
        final_knowledge, sources = perform_web_search(query)
    else:
        print("\nAction: Ambiguous - Combining retrieved document and web search")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        # Refine the retrieved knowledge
        retrieved_knowledge = knowledge_refinement(best_doc)
        web_knowledge, web_sources = perform_web_search(query)
        final_knowledge = "\n".join(retrieved_knowledge + web_knowledge)
        sources = [("Retrieved document", "")] + web_sources

    print("\nFinal knowledge:")
    print(final_knowledge)

    print("\nSources:")
    for title, link in sources:
        print(f"{title}: {link}" if link else title)

    # Generate response
    print("\nGenerating response...")
    response = generate_response(query, final_knowledge, sources)

    print("\nResponse generated")
    return response

# Initialize vector store
vectorstore = encode_pdf(path)


# Example queries
query1 = "What are the main causes of climate change?"
query2 = "how did harry beat quirrell?"

# Process queries and print results
if vectorstore:
    result1 = crag_process(query1, vectorstore)
    print(f"Query: {query1}")
    print(f"Answer: {result1}")

    result2 = crag_process(query2, vectorstore)
    print(f"Query: {query2}")
    print(f"Answer: {result2}")
else:
    print("Vectorstore was not initialized.  Please check the PDF loading process.")

In [None]:
!wget import os
import sys
import json
from typing import List, Tuple

from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools import DuckDuckGoSearchResults
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Load environment variables from a .env file
load_dotenv()

# Define files path (corrected relative path)
path = "Understanding_Climate_Change.pdf" # Assuming the PDF is in the same directory

# Helper function to load and encode PDF
def encode_pdf(pdf_path: str, chunk_size: int = 500, chunk_overlap: int = 50, embeddings_model_name: str = "all-MiniLM-L6-v2"):
    """Loads a PDF, splits it into chunks, and encodes it into a FAISS vector store."""
    try:
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
    except FileNotFoundError:
        print(f"Error: File not found at path: {pdf_path}")
        return None
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents)

    # Use Hugging Face Embeddings
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

    # Create a FAISS vector store
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore


#Initialize LlamaCpp language model
model_path = "path/to/your/llama3/model.gguf"  # Replace with the actual path to your downloaded model
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=0, # Adjust based on your GPU, set to 0 if no GPU
    n_batch=512,
    n_ctx=2048, # adjust based on your model
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=False, # Verbose is required to pass to the callback manager
)


# Initialize search tool
search = DuckDuckGoSearchResults()

# Define retrieval evaluator, knowledge refinement, and query rewriter llm chains
# Retrieval Evaluator
class RetrievalEvaluatorInput(BaseModel):
    relevance_score: float = Field(..., description="The relevance score of the document to the query. the score should be between 0 and 1.")

def retrieval_evaluator(query: str, document: str) -> float:
    prompt = PromptTemplate(
        input_variables=["query", "document"],
        template="On a scale from 0 to 1, how relevant is the following document to the query? Query: {query}\nDocument: {document}\nRelevance score:"
    )
    chain = prompt | llm
    input_variables = {"query": query, "document": document}
    try:
        result = chain.invoke(input_variables)
        # Parse the output to extract the relevance score (assuming the model outputs a number)
        try:
            score = float(result.strip()) # Try to directly convert to float
            return score
        except ValueError:
            # If direct conversion fails, try to extract the number from the text
            import re
            match = re.search(r"[-+]?\d*\.\d+|\d+", result) # Search for any float or integer
            if match:
                score = float(match.group(0))
                return score
            else:
                return 0.5  # Return a neutral score if no number is found
    except Exception as e:
        print(f"Error in retrieval evaluator: {e}")
        return 0.5  # Return a neutral score in case of an exception



# Knowledge Refinement
class KnowledgeRefinementInput(BaseModel):
    key_points: str = Field(..., description="The document to extract key information from.")

def knowledge_refinement(document: str) -> List[str]:
    prompt = PromptTemplate(
        input_variables=["document"],
        template="Extract the key information from the following document in bullet points:\n{document}\nKey points:"
    )
    chain = prompt | llm
    input_variables = {"document": document}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return [point.strip() for point in result.split('\n') if point.strip()]

# Web Search Query Rewriter
class QueryRewriterInput(BaseModel):
    query: str = Field(..., description="The query to rewrite.")

def rewrite_query(query: str) -> str:
    prompt = PromptTemplate(
        input_variables=["query"],
        template="Rewrite the following query to make it more suitable for a web search:\n{query}\nRewritten query:"
    )
    chain = prompt | llm
    input_variables = {"query": query}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return result.strip()

# Helper function to parse search results
def parse_search_results(results_string: str) -> List[Tuple[str, str]]:
    """
    Parse a JSON string of search results into a list of title-link tuples.

    Args:
        results_string (str): A JSON-formatted string containing search results.

    Returns:
        List[Tuple[str, str]]: A list of tuples, where each tuple contains the title and link of a search result.
                               If parsing fails, an empty list is returned.
    """
    try:
        # Attempt to parse the JSON string
        results = json.loads(results_string)
        # Extract and return the title and link from each result
        return [(result.get('title', 'Untitled'), result.get('link', '')) for result in results]
    except (json.JSONDecodeError, TypeError):  # Handle TypeError as well
        # Handle JSON decoding errors by returning an empty list
        print("Error parsing search results. Returning empty list.")
        return []

# Define sub functions for the CRAG process
def retrieve_documents(query: str, faiss_index: FAISS, k: int = 3) -> List[str]:
    """
    Retrieve documents based on a query using a FAISS index.

    Args:
        query (str): The query string to search for.
        faiss_index (FAISS): The FAISS index used for similarity search.
        k (int): The number of top documents to retrieve. Defaults to 3.

    Returns:
        List[str]: A list of the retrieved document contents.
    """
    docs = faiss_index.similarity_search(query, k=k)
    return [doc.page_content for doc in docs]

def evaluate_documents(query: str, documents: List[str]) -> List[float]:
    """
    Evaluate the relevance of documents based on a query.

    Args:
        query (str): The query string.
        documents (List[str]): A list of document contents to evaluate.

    Returns:
        List[float]: A list of relevance scores for each document.
    """
    return [retrieval_evaluator(query, doc) for doc in documents]

def perform_web_search(query: str) -> Tuple[List[str], List[Tuple[str, str]]]:
    """
    Perform a web search based on a query.

    Args:
        query (str): The query string to search for.

    Returns:
        Tuple[List[str], List[Tuple[str, str]]]:
            - A list of refined knowledge obtained from the web search.
            - A list of tuples containing titles and links of the sources.
    """
    rewritten_query = rewrite_query(query)
    web_results = search.run(rewritten_query)
    web_knowledge = knowledge_refinement(web_results)
    sources = parse_search_results(web_results)
    return web_knowledge, sources

def generate_response(query: str, knowledge: str, sources: List[Tuple[str, str]]) -> str:
    """
    Generate a response to a query using knowledge and sources.

    Args:
        query (str): The query string.
        knowledge (str): The refined knowledge to use in the response.
        sources (List[Tuple[str, str]]): A list of tuples containing titles and links of the sources.

    Returns:
        str: The generated response.
    """
    response_prompt = PromptTemplate(
        input_variables=["query", "knowledge", "sources"],
        template="Based on the following knowledge, answer the query. Include the sources with their links (if available) at the end of your answer:\nQuery: {query}\nKnowledge: {knowledge}\nSources: {sources}\nAnswer:"
    )
    input_variables = {
        "query": query,
        "knowledge": knowledge,
        "sources": "\n".join([f"{title}: {link}" if link else title for title, link in sources])
    }
    response_chain = response_prompt | llm
    return response_chain.invoke(input_variables)

# CRAG process
def crag_process(query: str, faiss_index: FAISS) -> str:
    """
    Process a query by retrieving, evaluating, and using documents or performing a web search to generate a response.

    Args:
        query (str): The query string to process.
        faiss_index (FAISS): The FAISS index used for document retrieval.

    Returns:
        str: The generated response based on the query.
    """
    print(f"\nProcessing query: {query}")

    # Retrieve and evaluate documents
    if faiss_index is None:
        return "Error: FAISS index is not initialized. Please check if the PDF was loaded successfully."

    retrieved_docs = retrieve_documents(query, faiss_index)
    eval_scores = evaluate_documents(query, retrieved_docs)

    print(f"\nRetrieved {len(retrieved_docs)} documents")
    print(f"Evaluation scores: {eval_scores}")

    # Determine action based on evaluation scores
    if eval_scores: # Ensure eval_scores is not empty
        max_score = max(eval_scores)
    else:
        max_score = 0 # Default to 0 if no documents were retrieved.

    sources = []

    if max_score > 0.7:
        print("\nAction: Correct - Using retrieved document")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        final_knowledge = best_doc
        sources.append(("Retrieved document", ""))
    elif max_score < 0.3:
        print("\nAction: Incorrect - Performing web search")
        final_knowledge, sources = perform_web_search(query)
    else:
        print("\nAction: Ambiguous - Combining retrieved document and web search")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        # Refine the retrieved knowledge
        retrieved_knowledge = knowledge_refinement(best_doc)
        web_knowledge, web_sources = perform_web_search(query)
        final_knowledge = "\n".join(retrieved_knowledge + web_knowledge)
        sources = [("Retrieved document", "")] + web_sources

    print("\nFinal knowledge:")
    print(final_knowledge)

    print("\nSources:")
    for title, link in sources:
        print(f"{title}: {link}" if link else title)

    # Generate response
    print("\nGenerating response...")
    response = generate_response(query, final_knowledge, sources)

    print("\nResponse generated")
    return response

# Initialize vector store
vectorstore = encode_pdf(path)


# Example queries
query1 = "What are the main causes of climate change?"
query2 = "how did harry beat quirrell?"

# Process queries and print results
if vectorstore:
    result1 = crag_process(query1, vectorstore)
    print(f"Query: {query1}")
    print(f"Answer: {result1}")

    result2 = crag_process(query2, vectorstore)
    print(f"Query: {query2}")
    print(f"Answer: {result2}")
else:
    print("Vectorstore was not initialized.  Please check the PDF loading process.")

In [1]:
!wget https://huggingface.co/bartowski/Dolphin3.0-Llama3.2-3B-GGUF/resolve/main/Dolphin3.0-Llama3.2-3B-IQ2_M.gguf

--2025-03-30 00:15:33--  https://huggingface.co/bartowski/Dolphin3.0-Llama3.2-3B-GGUF/resolve/main/Dolphin3.0-Llama3.2-3B-IQ2_M.gguf
Resolving huggingface.co (huggingface.co)... 54.230.71.28, 54.230.71.2, 54.230.71.103, ...
Connecting to huggingface.co (huggingface.co)|54.230.71.28|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.hf.co/repos/ea/41/ea41d1c9c4625bf2ce1b727f47334106ae8b50891147f1cf6d91a6c6a4c284a7/dc07e2f0dfd4a0882aa5b3f1e6fba1fde504e14209623e3b38d296384bcd29e5?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27Dolphin3.0-Llama3.2-3B-IQ2_M.gguf%3B+filename%3D%22Dolphin3.0-Llama3.2-3B-IQ2_M.gguf%22%3B&Expires=1743297333&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0MzI5NzMzM319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zL2VhLzQxL2VhNDFkMWM5YzQ2MjViZjJjZTFiNzI3ZjQ3MzM0MTA2YWU4YjUwODkxMTQ3ZjFjZjZkOTFhNmM2YTRjMjg0YTcvZGMwN2UyZjBkZmQ0YTA4ODJhYTViM2YxZTZmYm

In [2]:
!pip install langchain
!pip install faiss-cpu  # Or faiss-gpu if you have a GPU
!pip install python-dotenv
!pip install pypdf
!pip install duckduckgo-search
!pip install transformers
!pip install accelerate
!pip install sentence_transformersimport os

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m54.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.0
Collecting pypdf
  Downloading pypdf-5.4.0-py3-none-any.whl.metadata (7.3 kB)
Downloading pypdf-5.4.0-py3-none-any.whl (302 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.3/302.3 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.4.0
Collecting duckduckgo-search
  Downloading du

In [3]:
!pip install llama-cpp-python

Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.8.tar.gz (67.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.8-cp311-cp311-linux_x86_64.whl size=5959615 sha256=7d0e99d019bc18f242d6f

https://github.com/NirDiamant/RAG_Techniques

https://github.com/NirDiamant/RAG_Techniques/blob/main/all_rag_techniques/crag.ipynb

In [5]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.20-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-no

شغال جيد

In [6]:

import sys
import json
from typing import List, Tuple

from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools import DuckDuckGoSearchResults
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Load environment variables from a .env file
load_dotenv()

# Define files path (corrected relative path)
path = "Understanding_Climate_Change.pdf" # Assuming the PDF is in the same directory

# Helper function to load and encode PDF
def encode_pdf(pdf_path: str, chunk_size: int = 500, chunk_overlap: int = 50, embeddings_model_name: str = "all-MiniLM-L6-v2"):
    """Loads a PDF, splits it into chunks, and encodes it into a FAISS vector store."""
    try:
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
    except FileNotFoundError:
        print(f"Error: File not found at path: {pdf_path}")
        return None
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents)

    # Use Hugging Face Embeddings
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

    # Create a FAISS vector store
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore


#Initialize LlamaCpp language model
model_path = "/content/Dolphin3.0-Llama3.2-3B-IQ2_M.gguf"  # Replace with the actual path to your downloaded model
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=0, # Adjust based on your GPU, set to 0 if no GPU
    n_batch=512,
    n_ctx=2048, # adjust based on your model
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=False, # Verbose is required to pass to the callback manager
)


# Initialize search tool
search = DuckDuckGoSearchResults()

# Define retrieval evaluator, knowledge refinement, and query rewriter llm chains
# Retrieval Evaluator
class RetrievalEvaluatorInput(BaseModel):
    relevance_score: float = Field(..., description="The relevance score of the document to the query. the score should be between 0 and 1.")

def retrieval_evaluator(query: str, document: str) -> float:
    prompt = PromptTemplate(
        input_variables=["query", "document"],
        template="On a scale from 0 to 1, how relevant is the following document to the query? Query: {query}\nDocument: {document}\nRelevance score:"
    )
    chain = prompt | llm
    input_variables = {"query": query, "document": document}
    try:
        result = chain.invoke(input_variables)
        # Parse the output to extract the relevance score (assuming the model outputs a number)
        try:
            score = float(result.strip()) # Try to directly convert to float
            return score
        except ValueError:
            # If direct conversion fails, try to extract the number from the text
            import re
            match = re.search(r"[-+]?\d*\.\d+|\d+", result) # Search for any float or integer
            if match:
                score = float(match.group(0))
                return score
            else:
                return 0.5  # Return a neutral score if no number is found
    except Exception as e:
        print(f"Error in retrieval evaluator: {e}")
        return 0.5  # Return a neutral score in case of an exception



# Knowledge Refinement
class KnowledgeRefinementInput(BaseModel):
    key_points: str = Field(..., description="The document to extract key information from.")

def knowledge_refinement(document: str) -> List[str]:
    prompt = PromptTemplate(
        input_variables=["document"],
        template="Extract the key information from the following document in bullet points:\n{document}\nKey points:"
    )
    chain = prompt | llm
    input_variables = {"document": document}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return [point.strip() for point in result.split('\n') if point.strip()]

# Web Search Query Rewriter
class QueryRewriterInput(BaseModel):
    query: str = Field(..., description="The query to rewrite.")

def rewrite_query(query: str) -> str:
    prompt = PromptTemplate(
        input_variables=["query"],
        template="Rewrite the following query to make it more suitable for a web search:\n{query}\nRewritten query:"
    )
    chain = prompt | llm
    input_variables = {"query": query}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return result.strip()

# Helper function to parse search results
def parse_search_results(results_string: str) -> List[Tuple[str, str]]:
    """
    Parse a JSON string of search results into a list of title-link tuples.

    Args:
        results_string (str): A JSON-formatted string containing search results.

    Returns:
        List[Tuple[str, str]]: A list of tuples, where each tuple contains the title and link of a search result.
                               If parsing fails, an empty list is returned.
    """
    try:
        # Attempt to parse the JSON string
        results = json.loads(results_string)
        # Extract and return the title and link from each result
        return [(result.get('title', 'Untitled'), result.get('link', '')) for result in results]
    except (json.JSONDecodeError, TypeError):  # Handle TypeError as well
        # Handle JSON decoding errors by returning an empty list
        print("Error parsing search results. Returning empty list.")
        return []

# Define sub functions for the CRAG process
def retrieve_documents(query: str, faiss_index: FAISS, k: int = 3) -> List[str]:
    """
    Retrieve documents based on a query using a FAISS index.

    Args:
        query (str): The query string to search for.
        faiss_index (FAISS): The FAISS index used for similarity search.
        k (int): The number of top documents to retrieve. Defaults to 3.

    Returns:
        List[str]: A list of the retrieved document contents.
    """
    docs = faiss_index.similarity_search(query, k=k)
    return [doc.page_content for doc in docs]

def evaluate_documents(query: str, documents: List[str]) -> List[float]:
    """
    Evaluate the relevance of documents based on a query.

    Args:
        query (str): The query string.
        documents (List[str]): A list of document contents to evaluate.

    Returns:
        List[float]: A list of relevance scores for each document.
    """
    return [retrieval_evaluator(query, doc) for doc in documents]

def perform_web_search(query: str) -> Tuple[List[str], List[Tuple[str, str]]]:
    """
    Perform a web search based on a query.

    Args:
        query (str): The query string to search for.

    Returns:
        Tuple[List[str], List[Tuple[str, str]]]:
            - A list of refined knowledge obtained from the web search.
            - A list of tuples containing titles and links of the sources.
    """
    rewritten_query = rewrite_query(query)
    web_results = search.run(rewritten_query)
    web_knowledge = knowledge_refinement(web_results)
    sources = parse_search_results(web_results)
    return web_knowledge, sources

def generate_response(query: str, knowledge: str, sources: List[Tuple[str, str]]) -> str:
    """
    Generate a response to a query using knowledge and sources.

    Args:
        query (str): The query string.
        knowledge (str): The refined knowledge to use in the response.
        sources (List[Tuple[str, str]]): A list of tuples containing titles and links of the sources.

    Returns:
        str: The generated response.
    """
    response_prompt = PromptTemplate(
        input_variables=["query", "knowledge", "sources"],
        template="Based on the following knowledge, answer the query. Include the sources with their links (if available) at the end of your answer:\nQuery: {query}\nKnowledge: {knowledge}\nSources: {sources}\nAnswer:"
    )
    input_variables = {
        "query": query,
        "knowledge": knowledge,
        "sources": "\n".join([f"{title}: {link}" if link else title for title, link in sources])
    }
    response_chain = response_prompt | llm
    return response_chain.invoke(input_variables)

# CRAG process
def crag_process(query: str, faiss_index: FAISS) -> str:
    """
    Process a query by retrieving, evaluating, and using documents or performing a web search to generate a response.

    Args:
        query (str): The query string to process.
        faiss_index (FAISS): The FAISS index used for document retrieval.

    Returns:
        str: The generated response based on the query.
    """
    print(f"\nProcessing query: {query}")

    # Retrieve and evaluate documents
    if faiss_index is None:
        return "Error: FAISS index is not initialized. Please check if the PDF was loaded successfully."

    retrieved_docs = retrieve_documents(query, faiss_index)
    eval_scores = evaluate_documents(query, retrieved_docs)

    print(f"\nRetrieved {len(retrieved_docs)} documents")
    print(f"Evaluation scores: {eval_scores}")

    # Determine action based on evaluation scores
    if eval_scores: # Ensure eval_scores is not empty
        max_score = max(eval_scores)
    else:
        max_score = 0 # Default to 0 if no documents were retrieved.

    sources = []

    if max_score > 0.7:
        print("\nAction: Correct - Using retrieved document")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        final_knowledge = best_doc
        sources.append(("Retrieved document", ""))
    elif max_score < 0.3:
        print("\nAction: Incorrect - Performing web search")
        final_knowledge, sources = perform_web_search(query)
    else:
        print("\nAction: Ambiguous - Combining retrieved document and web search")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        # Refine the retrieved knowledge
        retrieved_knowledge = knowledge_refinement(best_doc)
        web_knowledge, web_sources = perform_web_search(query)
        final_knowledge = "\n".join(retrieved_knowledge + web_knowledge)
        sources = [("Retrieved document", "")] + web_sources

    print("\nFinal knowledge:")
    print(final_knowledge)

    print("\nSources:")
    for title, link in sources:
        print(f"{title}: {link}" if link else title)

    # Generate response
    print("\nGenerating response...")
    response = generate_response(query, final_knowledge, sources)

    print("\nResponse generated")
    return response

# Initialize vector store
vectorstore = encode_pdf(path)


# Example queries
query1 = "What are the main causes of climate change?"
query2 = "how did harry beat quirrell?"

# Process queries and print results
if vectorstore:
    result1 = crag_process(query1, vectorstore)
    print(f"Query: {query1}")
    print(f"Answer: {result1}")

    result2 = crag_process(query2, vectorstore)
    print(f"Query: {query2}")
    print(f"Answer: {result2}")
else:
    print("Vectorstore was not initialized.  Please check the PDF loading process.")

llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
  embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Processing query: What are the main causes of climate change?
 1 - 0 = 1

The score reflects the extent to which this document addresses the query and provides information about how recent changes are primarily driven by human activities, particularly the emission of greenhouse gases. 0.9
Relevance to the query: High 
- The main causes of climate change include, but not exclusively, human activities such as burning fossil fuels and deforestating plants for use in energy production. 0 (N/A) because there is no information about the document relevance to the query. 
Relevance score: 1 (Highly relevant, as it provides a comprehensive understanding of the causes and effects of climate change.)
The following document is relevant to the query regarding what are the main causes of climate change.
However, according to the provided document, Food and Water Security relates more closely to the cause and effect of climate change compared to respiratory and cardiovascular diseases.

Relevance sc

KeyboardInterrupt: 

In [1]:
!wget https://huggingface.co/bartowski/Dolphin3.0-Llama3.2-3B-GGUF/resolve/main/Dolphin3.0-Llama3.2-3B-Q8_0.gguf

--2025-03-30 00:33:09--  https://huggingface.co/bartowski/Dolphin3.0-Llama3.2-3B-GGUF/resolve/main/Dolphin3.0-Llama3.2-3B-Q8_0.gguf
Resolving huggingface.co (huggingface.co)... 54.230.71.56, 54.230.71.103, 54.230.71.2, ...
Connecting to huggingface.co (huggingface.co)|54.230.71.56|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.hf.co/repos/ea/41/ea41d1c9c4625bf2ce1b727f47334106ae8b50891147f1cf6d91a6c6a4c284a7/d7a51f65ebd35e7e7da5632274183c9b54f38330e015a6c73adca0bf3523fdae?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27Dolphin3.0-Llama3.2-3B-Q8_0.gguf%3B+filename%3D%22Dolphin3.0-Llama3.2-3B-Q8_0.gguf%22%3B&Expires=1743298389&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0MzI5ODM4OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zL2VhLzQxL2VhNDFkMWM5YzQ2MjViZjJjZTFiNzI3ZjQ3MzM0MTA2YWU4YjUwODkxMTQ3ZjFjZjZkOTFhNmM2YTRjMjg0YTcvZDdhNTFmNjVlYmQzNWU3ZTdkYTU2MzIyNzQxODNjO

In [1]:

import sys
import json
from typing import List, Tuple

from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools import DuckDuckGoSearchResults
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Load environment variables from a .env file
load_dotenv()

# Define files path (corrected relative path)
path = "Understanding_Climate_Change.pdf" # Assuming the PDF is in the same directory

# Helper function to load and encode PDF
def encode_pdf(pdf_path: str, chunk_size: int = 500, chunk_overlap: int = 50, embeddings_model_name: str = "all-MiniLM-L6-v2"):
    """Loads a PDF, splits it into chunks, and encodes it into a FAISS vector store."""
    try:
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
    except FileNotFoundError:
        print(f"Error: File not found at path: {pdf_path}")
        return None
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents)

    # Use Hugging Face Embeddings
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

    # Create a FAISS vector store
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore


#Initialize LlamaCpp language model
model_path = "/content/Dolphin3.0-Llama3.2-3B-Q8_0.gguf"  # Replace with the actual path to your downloaded model
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=0, # Adjust based on your GPU, set to 0 if no GPU
    n_batch=512,
    n_ctx=2048, # adjust based on your model
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=False, # Verbose is required to pass to the callback manager
)


# Initialize search tool
search = DuckDuckGoSearchResults()

# Define retrieval evaluator, knowledge refinement, and query rewriter llm chains
# Retrieval Evaluator
class RetrievalEvaluatorInput(BaseModel):
    relevance_score: float = Field(..., description="The relevance score of the document to the query. the score should be between 0 and 1.")

def retrieval_evaluator(query: str, document: str) -> float:
    prompt = PromptTemplate(
        input_variables=["query", "document"],
        template="On a scale from 0 to 1, how relevant is the following document to the query? Query: {query}\nDocument: {document}\nRelevance score:"
    )
    chain = prompt | llm
    input_variables = {"query": query, "document": document}
    try:
        result = chain.invoke(input_variables)
        # Parse the output to extract the relevance score (assuming the model outputs a number)
        try:
            score = float(result.strip()) # Try to directly convert to float
            return score
        except ValueError:
            # If direct conversion fails, try to extract the number from the text
            import re
            match = re.search(r"[-+]?\d*\.\d+|\d+", result) # Search for any float or integer
            if match:
                score = float(match.group(0))
                return score
            else:
                return 0.5  # Return a neutral score if no number is found
    except Exception as e:
        print(f"Error in retrieval evaluator: {e}")
        return 0.5  # Return a neutral score in case of an exception



# Knowledge Refinement
class KnowledgeRefinementInput(BaseModel):
    key_points: str = Field(..., description="The document to extract key information from.")

def knowledge_refinement(document: str) -> List[str]:
    prompt = PromptTemplate(
        input_variables=["document"],
        template="Extract the key information from the following document in bullet points:\n{document}\nKey points:"
    )
    chain = prompt | llm
    input_variables = {"document": document}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return [point.strip() for point in result.split('\n') if point.strip()]

# Web Search Query Rewriter
class QueryRewriterInput(BaseModel):
    query: str = Field(..., description="The query to rewrite.")

def rewrite_query(query: str) -> str:
    prompt = PromptTemplate(
        input_variables=["query"],
        template="Rewrite the following query to make it more suitable for a web search:\n{query}\nRewritten query:"
    )
    chain = prompt | llm
    input_variables = {"query": query}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return result.strip()

# Helper function to parse search results
def parse_search_results(results_string: str) -> List[Tuple[str, str]]:
    """
    Parse a JSON string of search results into a list of title-link tuples.

    Args:
        results_string (str): A JSON-formatted string containing search results.

    Returns:
        List[Tuple[str, str]]: A list of tuples, where each tuple contains the title and link of a search result.
                               If parsing fails, an empty list is returned.
    """
    try:
        # Attempt to parse the JSON string
        results = json.loads(results_string)
        # Extract and return the title and link from each result
        return [(result.get('title', 'Untitled'), result.get('link', '')) for result in results]
    except (json.JSONDecodeError, TypeError):  # Handle TypeError as well
        # Handle JSON decoding errors by returning an empty list
        print("Error parsing search results. Returning empty list.")
        return []

# Define sub functions for the CRAG process
def retrieve_documents(query: str, faiss_index: FAISS, k: int = 3) -> List[str]:
    """
    Retrieve documents based on a query using a FAISS index.

    Args:
        query (str): The query string to search for.
        faiss_index (FAISS): The FAISS index used for similarity search.
        k (int): The number of top documents to retrieve. Defaults to 3.

    Returns:
        List[str]: A list of the retrieved document contents.
    """
    docs = faiss_index.similarity_search(query, k=k)
    return [doc.page_content for doc in docs]

def evaluate_documents(query: str, documents: List[str]) -> List[float]:
    """
    Evaluate the relevance of documents based on a query.

    Args:
        query (str): The query string.
        documents (List[str]): A list of document contents to evaluate.

    Returns:
        List[float]: A list of relevance scores for each document.
    """
    return [retrieval_evaluator(query, doc) for doc in documents]

def perform_web_search(query: str) -> Tuple[List[str], List[Tuple[str, str]]]:
    """
    Perform a web search based on a query.

    Args:
        query (str): The query string to search for.

    Returns:
        Tuple[List[str], List[Tuple[str, str]]]:
            - A list of refined knowledge obtained from the web search.
            - A list of tuples containing titles and links of the sources.
    """
    rewritten_query = rewrite_query(query)
    web_results = search.run(rewritten_query)
    web_knowledge = knowledge_refinement(web_results)
    sources = parse_search_results(web_results)
    return web_knowledge, sources

def generate_response(query: str, knowledge: str, sources: List[Tuple[str, str]]) -> str:
    """
    Generate a response to a query using knowledge and sources.

    Args:
        query (str): The query string.
        knowledge (str): The refined knowledge to use in the response.
        sources (List[Tuple[str, str]]): A list of tuples containing titles and links of the sources.

    Returns:
        str: The generated response.
    """
    response_prompt = PromptTemplate(
        input_variables=["query", "knowledge", "sources"],
        template="Based on the following knowledge, answer the query. Include the sources with their links (if available) at the end of your answer:\nQuery: {query}\nKnowledge: {knowledge}\nSources: {sources}\nAnswer:"
    )
    input_variables = {
        "query": query,
        "knowledge": knowledge,
        "sources": "\n".join([f"{title}: {link}" if link else title for title, link in sources])
    }
    response_chain = response_prompt | llm
    return response_chain.invoke(input_variables)

# CRAG process
def crag_process(query: str, faiss_index: FAISS) -> str:
    """
    Process a query by retrieving, evaluating, and using documents or performing a web search to generate a response.

    Args:
        query (str): The query string to process.
        faiss_index (FAISS): The FAISS index used for document retrieval.

    Returns:
        str: The generated response based on the query.
    """
    print(f"\nProcessing query: {query}")

    # Retrieve and evaluate documents
    if faiss_index is None:
        return "Error: FAISS index is not initialized. Please check if the PDF was loaded successfully."

    retrieved_docs = retrieve_documents(query, faiss_index)
    eval_scores = evaluate_documents(query, retrieved_docs)

    print(f"\nRetrieved {len(retrieved_docs)} documents")
    print(f"Evaluation scores: {eval_scores}")

    # Determine action based on evaluation scores
    if eval_scores: # Ensure eval_scores is not empty
        max_score = max(eval_scores)
    else:
        max_score = 0 # Default to 0 if no documents were retrieved.

    sources = []

    if max_score > 0.7:
        print("\nAction: Correct - Using retrieved document")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        final_knowledge = best_doc
        sources.append(("Retrieved document", ""))
    elif max_score < 0.3:
        print("\nAction: Incorrect - Performing web search")
        final_knowledge, sources = perform_web_search(query)
    else:
        print("\nAction: Ambiguous - Combining retrieved document and web search")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        # Refine the retrieved knowledge
        retrieved_knowledge = knowledge_refinement(best_doc)
        web_knowledge, web_sources = perform_web_search(query)
        final_knowledge = "\n".join(retrieved_knowledge + web_knowledge)
        sources = [("Retrieved document", "")] + web_sources

    print("\nFinal knowledge:")
    print(final_knowledge)

    print("\nSources:")
    for title, link in sources:
        print(f"{title}: {link}" if link else title)

    # Generate response
    print("\nGenerating response...")
    response = generate_response(query, final_knowledge, sources)

    print("\nResponse generated")
    return response

# Initialize vector store
vectorstore = encode_pdf(path)


# Example queries
query1 = "What are the main causes of climate change?"
query2 = "how did harry beat quirrell?"

# Process queries and print results
if vectorstore:
    result1 = crag_process(query1, vectorstore)
    print(f"Query: {query1}")
    print(f"Answer: {result1}")

    result2 = crag_process(query2, vectorstore)
    print(f"Query: {query2}")
    print(f"Answer: {result2}")
else:
    print("Vectorstore was not initialized.  Please check the PDF loading process.")


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)
llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
  embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



Processing query: What are the main causes of climate change?
 1 0.5 1
Retrieved 3 documents
Evaluation scores: [1.0, 0.5, 1.0]

Action: Correct - Using retrieved document

Final knowledge:
provide a historical record that scientists use to understand past climate conditions and 
predict future trends. The evidence overwhelmingly shows that recent changes are primarily 
driven by human activities, particularly the emission of greenhouse gases. 
Chapter 2: Causes of Climate Change 
Greenhouse Gases 
The primary cause of recent climate change is the increase in greenhouse gases in the 
atmosphere. Greenhouse gases, such as carbon dioxide (CO2), methane (CH4), and nitrous

Sources:
Retrieved document

Generating response...
 The main causes of climate change are primarily driven by human activities, particularly the emission of greenhouse gases. Greenhouse gases in the atmosphere include carbon dioxide (CO2), methane (CH4), and nitrous oxide sources. Retrieved document.

Response generat

In [1]:

import sys
import json
from typing import List, Tuple

from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools import DuckDuckGoSearchResults
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Load environment variables from a .env file
load_dotenv()

# Define files path (corrected relative path)
path = "1.pdf" # Assuming the PDF is in the same directory

# Helper function to load and encode PDF
def encode_pdf(pdf_path: str, chunk_size: int = 500, chunk_overlap: int = 50, embeddings_model_name: str = "all-MiniLM-L6-v2"):
    """Loads a PDF, splits it into chunks, and encodes it into a FAISS vector store."""
    try:
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
    except FileNotFoundError:
        print(f"Error: File not found at path: {pdf_path}")
        return None
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents)

    # Use Hugging Face Embeddings
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

    # Create a FAISS vector store
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore


#Initialize LlamaCpp language model
model_path = "/content/Dolphin3.0-Llama3.2-3B-Q8_0.gguf"  # Replace with the actual path to your downloaded model
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=0, # Adjust based on your GPU, set to 0 if no GPU
    n_batch=512,
    n_ctx=2048, # adjust based on your model
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=False, # Verbose is required to pass to the callback manager
)


# Initialize search tool
search = DuckDuckGoSearchResults()

# Define retrieval evaluator, knowledge refinement, and query rewriter llm chains
# Retrieval Evaluator
class RetrievalEvaluatorInput(BaseModel):
    relevance_score: float = Field(..., description="The relevance score of the document to the query. the score should be between 0 and 1.")

def retrieval_evaluator(query: str, document: str) -> float:
    prompt = PromptTemplate(
        input_variables=["query", "document"],
        template="On a scale from 0 to 1, how relevant is the following document to the query? Query: {query}\nDocument: {document}\nRelevance score:"
    )
    chain = prompt | llm
    input_variables = {"query": query, "document": document}
    try:
        result = chain.invoke(input_variables)
        # Parse the output to extract the relevance score (assuming the model outputs a number)
        try:
            score = float(result.strip()) # Try to directly convert to float
            return score
        except ValueError:
            # If direct conversion fails, try to extract the number from the text
            import re
            match = re.search(r"[-+]?\d*\.\d+|\d+", result) # Search for any float or integer
            if match:
                score = float(match.group(0))
                return score
            else:
                return 0.5  # Return a neutral score if no number is found
    except Exception as e:
        print(f"Error in retrieval evaluator: {e}")
        return 0.5  # Return a neutral score in case of an exception



# Knowledge Refinement
class KnowledgeRefinementInput(BaseModel):
    key_points: str = Field(..., description="The document to extract key information from.")

def knowledge_refinement(document: str) -> List[str]:
    prompt = PromptTemplate(
        input_variables=["document"],
        template="Extract the key information from the following document in bullet points:\n{document}\nKey points:"
    )
    chain = prompt | llm
    input_variables = {"document": document}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return [point.strip() for point in result.split('\n') if point.strip()]

# Web Search Query Rewriter
class QueryRewriterInput(BaseModel):
    query: str = Field(..., description="The query to rewrite.")

def rewrite_query(query: str) -> str:
    prompt = PromptTemplate(
        input_variables=["query"],
        template="Rewrite the following query to make it more suitable for a web search:\n{query}\nRewritten query:"
    )
    chain = prompt | llm
    input_variables = {"query": query}
    result = chain.invoke(input_variables) # No with_structured_output needed for raw text
    return result.strip()

# Helper function to parse search results
def parse_search_results(results_string: str) -> List[Tuple[str, str]]:
    """
    Parse a JSON string of search results into a list of title-link tuples.

    Args:
        results_string (str): A JSON-formatted string containing search results.

    Returns:
        List[Tuple[str, str]]: A list of tuples, where each tuple contains the title and link of a search result.
                               If parsing fails, an empty list is returned.
    """
    try:
        # Attempt to parse the JSON string
        results = json.loads(results_string)
        # Extract and return the title and link from each result
        return [(result.get('title', 'Untitled'), result.get('link', '')) for result in results]
    except (json.JSONDecodeError, TypeError):  # Handle TypeError as well
        # Handle JSON decoding errors by returning an empty list
        print("Error parsing search results. Returning empty list.")
        return []

# Define sub functions for the CRAG process
def retrieve_documents(query: str, faiss_index: FAISS, k: int = 3) -> List[str]:
    """
    Retrieve documents based on a query using a FAISS index.

    Args:
        query (str): The query string to search for.
        faiss_index (FAISS): The FAISS index used for similarity search.
        k (int): The number of top documents to retrieve. Defaults to 3.

    Returns:
        List[str]: A list of the retrieved document contents.
    """
    docs = faiss_index.similarity_search(query, k=k)
    return [doc.page_content for doc in docs]

def evaluate_documents(query: str, documents: List[str]) -> List[float]:
    """
    Evaluate the relevance of documents based on a query.

    Args:
        query (str): The query string.
        documents (List[str]): A list of document contents to evaluate.

    Returns:
        List[float]: A list of relevance scores for each document.
    """
    return [retrieval_evaluator(query, doc) for doc in documents]

def perform_web_search(query: str) -> Tuple[List[str], List[Tuple[str, str]]]:
    """
    Perform a web search based on a query.

    Args:
        query (str): The query string to search for.

    Returns:
        Tuple[List[str], List[Tuple[str, str]]]:
            - A list of refined knowledge obtained from the web search.
            - A list of tuples containing titles and links of the sources.
    """
    rewritten_query = rewrite_query(query)
    web_results = search.run(rewritten_query)
    web_knowledge = knowledge_refinement(web_results)
    sources = parse_search_results(web_results)
    return web_knowledge, sources

def generate_response(query: str, knowledge: str, sources: List[Tuple[str, str]]) -> str:
    """
    Generate a response to a query using knowledge and sources.

    Args:
        query (str): The query string.
        knowledge (str): The refined knowledge to use in the response.
        sources (List[Tuple[str, str]]): A list of tuples containing titles and links of the sources.

    Returns:
        str: The generated response.
    """
    response_prompt = PromptTemplate(
        input_variables=["query", "knowledge", "sources"],
        template="Based on the following knowledge, answer the query. Include the sources with their links (if available) at the end of your answer:\nQuery: {query}\nKnowledge: {knowledge}\nSources: {sources}\nAnswer:"
    )
    input_variables = {
        "query": query,
        "knowledge": knowledge,
        "sources": "\n".join([f"{title}: {link}" if link else title for title, link in sources])
    }
    response_chain = response_prompt | llm
    return response_chain.invoke(input_variables)

# CRAG process
def crag_process(query: str, faiss_index: FAISS) -> str:
    """
    Process a query by retrieving, evaluating, and using documents or performing a web search to generate a response.

    Args:
        query (str): The query string to process.
        faiss_index (FAISS): The FAISS index used for document retrieval.

    Returns:
        str: The generated response based on the query.
    """
    print(f"\nProcessing query: {query}")

    # Retrieve and evaluate documents
    if faiss_index is None:
        return "Error: FAISS index is not initialized. Please check if the PDF was loaded successfully."

    retrieved_docs = retrieve_documents(query, faiss_index)
    eval_scores = evaluate_documents(query, retrieved_docs)

    print(f"\nRetrieved {len(retrieved_docs)} documents")
    print(f"Evaluation scores: {eval_scores}")

    # Determine action based on evaluation scores
    if eval_scores: # Ensure eval_scores is not empty
        max_score = max(eval_scores)
    else:
        max_score = 0 # Default to 0 if no documents were retrieved.

    sources = []

    if max_score > 0.7:
        print("\nAction: Correct - Using retrieved document")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        final_knowledge = best_doc
        sources.append(("Retrieved document", ""))
    elif max_score < 0.3:
        print("\nAction: Incorrect - Performing web search")
        final_knowledge, sources = perform_web_search(query)
    else:
        print("\nAction: Ambiguous - Combining retrieved document and web search")
        best_doc = retrieved_docs[eval_scores.index(max_score)]
        # Refine the retrieved knowledge
        retrieved_knowledge = knowledge_refinement(best_doc)
        web_knowledge, web_sources = perform_web_search(query)
        final_knowledge = "\n".join(retrieved_knowledge + web_knowledge)
        sources = [("Retrieved document", "")] + web_sources

    print("\nFinal knowledge:")
    print(final_knowledge)

    print("\nSources:")
    for title, link in sources:
        print(f"{title}: {link}" if link else title)

    # Generate response
    print("\nGenerating response...")
    response = generate_response(query, final_knowledge, sources)

    print("\nResponse generated")
    return response

# Initialize vector store
vectorstore = encode_pdf(path)


# Example queries
query1 = "What is the name of the author of the book?"
query2 = "What is the topic of the book?"

# Process queries and print results
if vectorstore:
    result1 = crag_process(query1, vectorstore)
    print(f"Query: {query1}")
    print(f"Answer: {result1}")

    result2 = crag_process(query2, vectorstore)
    print(f"Query: {query2}")
    print(f"Answer: {result2}")
else:
    print("Vectorstore was not initialized.  Please check the PDF loading process.")


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)
llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
  embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



Processing query: What is the name of the author of the book?
 1 1 0 (not at all relevant) or 1 (completely irrelevant).
Retrieved 3 documents
Evaluation scores: [1.0, 1.0, 0.0]

Action: Correct - Using retrieved document

Final knowledge:
To my father, Rick Riordan, Sr., who read
me my first book of mythology
—R.R.
To my heroes of illustration: N. C. Wyeth,
Maxfield Parrish, Arthur Rackham, and
Frank Frazetta
—J.R.

Sources:
Retrieved document

Generating response...
 The name of the author of the book is Rick Riordan. The sources from where I got this information are: Retrieved document
Response generated
Query: What is the name of the author of the book?
Answer:  The name of the author of the book is Rick Riordan. The sources from where I got this information are: Retrieved document

Processing query: What is the topic of the book?
 0/1

Explanation: The document provided information about a book published by Disney Hyperion. The relevant information is the title of the book and it