# **CRAG: Corrective RAG process**

## Downloading Neccesary libraries

In [None]:
pip install -q faiss-cpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
pip install -q pypdf

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/304.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m297.0/304.2 kB[0m [31m13.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.2/304.2 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
pip install -q python-dotenv langchain-cohere langchain-community langchain-groq langchain-tavily

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.5/259.5 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.5/12.5 MB[0m [31m91.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m69.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

## Importing Neccesary libraries

In [21]:
import os
from dotenv import load_dotenv
from langchain_cohere import CohereEmbeddings
from langchain_community.document_loaders import TextLoader, PyPDFLoader, DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field
from langchain.chains import LLMChain
from typing import List
from langchain.prompts import PromptTemplate
import json
from typing import List, Tuple
import time
from langchain_tavily import TavilySearch
from langchain.prompts import PromptTemplate

# Embedding the documents


In [None]:
from google.colab import files
uploaded=files.upload()

In [22]:
import zipfile
import os

zip_path = "data.zip"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("data")


os.listdir("data")


['data']

In [23]:
os.listdir("data/data")

['final_draft.pdf']

In [24]:
load_dotenv()
COHERE_API_KEY = os.getenv("COHERE_API_KEY")


def get_cohere_embedder():
    return CohereEmbeddings(
        cohere_api_key=COHERE_API_KEY,
        model="embed-english-v3.0"
    )

## Indexing the documents

In [25]:
def load_documents_from_dir(data_dir="data/"):
    loaders = [
        TextLoader,     # .txt
        PyPDFLoader,    # .pdf
    ]

    docs = []
    for file in os.listdir(data_dir):
        file_path = os.path.join(data_dir, file)
        ext = file.lower().split(".")[-1]

        if ext == "txt":
            docs.extend(TextLoader(file_path).load())
        elif ext == "pdf":
            docs.extend(PyPDFLoader(file_path).load())
        # add more formats here if needed

    return docs

def create_or_load_faiss(index_path="faiss_index", data_dir="data/"):
    embedder = get_cohere_embedder()
    if os.path.exists(index_path):
        return FAISS.load_local(
            index_path,
            embedder,
            allow_dangerous_deserialization=True
        )
    docs = load_documents_from_dir(data_dir)
    vectorstore = FAISS.from_documents(docs, embedder)
    vectorstore.save_local(index_path)
    return vectorstore

## LLM

In [26]:
load_dotenv()

def get_groq_llm(model="llama3-8b-8192"):
    return ChatGroq(
        groq_api_key=os.getenv("GROQ_API_KEY"),
        model_name=model,
    )

## Some useful tools

In [27]:

llm = get_groq_llm()

class RetrievalEvaluatorInput(BaseModel):
    relevance_score: float = Field(..., description="Relevance of the document to the query, from 0 to 1.")

import re

def retrieval_evaluator(query: str, document: str) -> float:
    prompt = PromptTemplate(
        input_variables=["query", "document"],
        template="On a scale from 0 to 1, how relevant is this document to the query?\nQuery: {query}\nDocument: {document}\nRelevance score:"
    )
    chain = prompt | llm
    result = chain.invoke({"query": query, "document": document}).content.strip()

    # Extract first float from response
    match = re.search(r"\b([01](?:\.\d+)?)\b", result)
    if match:
        return float(match.group(1))

    print(f"[Warning] Could not parse relevance score from LLM output: {result}")
    return 0.0



class KnowledgeRefinementInput(BaseModel):
    key_points: str = Field(..., description="Bullet-point summary of the document.")

def knowledge_refinement(doc: str) -> List[str]:
    prompt = PromptTemplate(
        input_variables=["document"],
        template="Extract key bullet points from this document:\n{document}\nBullet Points:"
    )
    chain = prompt | llm
    output = chain.invoke({"document": doc}).content
    return [line.strip("-• \n") for line in output.splitlines() if line.strip()]



# Websearch Tools

In [28]:
llm = get_groq_llm()

search = TavilySearch(api_key=os.getenv("TAVILY_API_KEY"))

def parse_search_results(results_string: str) -> List[Tuple[str, str]]:
    try:
        results = json.loads(results_string)
        return [(r.get("title", "Untitled"), r.get("link", "")) for r in results]
    except json.JSONDecodeError:
        return []

def rewrite_query(query: str) -> str:
    prompt = PromptTemplate(
        input_variables=["query"],
        template="Rewrite the following query to make it more suitable for a web search:\n{query}\nRewritten query:"
    )
    chain = prompt | llm
    result = chain.invoke({"query": query}).content.strip()
    return result

def perform_web_search(query: str):
    print("Performing web search...")
    rewritten_query = rewrite_query(query)

    results = search.invoke(rewritten_query)  # returns a dict with a "results" list

    sources = [(r.get("title", "Untitled"), r.get("url", "")) for r in results.get("results", [])]
    all_content = "\n\n".join([r.get("content", "") for r in results.get("results", [])])
    refined_knowledge = knowledge_refinement(all_content)

    return refined_knowledge, sources

## CRAG

In [30]:
def crag_pipeline(query: str):
    vs = create_or_load_faiss(index_path="faiss_index", data_dir=r"data/data")
    llm = get_groq_llm()
    docs = vs.similarity_search(query, k=3)

    # Evaluate relevance
    scores = [retrieval_evaluator(query, doc.page_content) for doc in docs]
    max_score = max(scores)
    print(f"Relevance scores: {scores}")

    if max_score > 0.7:
        knowledge = "\n".join([pt for doc in docs for pt in knowledge_refinement(doc.page_content)])
        sources = [("Local Document", "")]

    elif max_score < 0.3:
        knowledge, sources = perform_web_search(query)

    else:
        retrieved = "\n".join([doc.page_content for doc in docs])
        web_knowledge, sources = perform_web_search(query)
        refined = knowledge_refinement(retrieved + "\n\n" + "\n".join(web_knowledge))
        knowledge = "\n".join(refined)

    # Final Answer
    response_prompt = PromptTemplate(
        input_variables=["query", "knowledge", "sources"],
        template="""Use the following knowledge to answer the query.
          Query: {query}
          Knowledge:
          {knowledge}
          Sources:
          {sources}
          Answer:"""
    )

    formatted_sources = "\n".join([f"{t}: {l}" if l else t for t, l in sources])
    final = response_prompt | llm
    return final.invoke({"query": query, "knowledge": knowledge, "sources": formatted_sources}).content

if __name__ == "__main__":
    while True:
        query = input("Query (or type 'exit' to quit): ")
        if query.lower() == "exit":
            break
        print(crag_pipeline(query))




Query (or type 'exit' to quit): What is ScyllaAgent?
Relevance scores: [0.7, 0.0, 0.0]
Performing web search...
According to the provided knowledge, ScyllaAgent is a "Scalable and low-latency agentic chatbot system" that is built using modular LLM workflows and includes core modules such as NLP processing, transformer models, RAGs, LangGraph, and API-based chatbot deployment using platforms like Groq.
Query (or type 'exit' to quit): What are APIs?
Relevance scores: [1.0, 0.85, 0.8]
According to the provided knowledge, an API (Application Programming Interface) is a set of protocols (rules) that enables different software components to communicate and transfer data. It consists of a request and response cycle, where an API client sends a request to an API server, which retrieves data and returns it to the user. The API request includes an endpoint, method, parameters, request headers, and request body, while the API response includes a status code, response headers, and response body.

