In [10]:
import os
import time
import logging
import torch
import pandas as pd
from typing import List, Dict
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType, PeftModel, PeftConfig
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.retrievers import BaseRetriever
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_core.output_parsers import StrOutputParser
#import streamlit as st
from dotenv import load_dotenv
from langgraph.graph import END, StateGraph, START
from typing_extensions import TypedDict
from pydantic import BaseModel, Field

In [14]:
# Load environment variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [15]:
df = pd.read_csv('news.csv')
df = df[['Title', 'Authors', 'Article Text', 'Date Published']].dropna()
docs = []
for _, row in df.iterrows():
    metadata = {
        "title": row["Title"],
        "authors": row["Authors"],
        "date_published": row["Date Published"]
    }
    docs.append(Document(page_content=row["Article Text"], metadata=metadata))


In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
doc_splits = text_splitter.split_documents(docs)

In [49]:
# Define utility functions
def format_articles(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def process_documents(docs):
    processed_docs = []
    for doc in docs:
        # Clean and format the content
        content = doc.page_content.strip()
        
        # Enhanced metadata
        metadata = {
            'raw_text_index': str(len(processed_docs)),
            'title': doc.metadata.get('title', ''),
            'authors': doc.metadata.get('authors', ''),
            'date_published': str(doc.metadata.get('date_published', '')),
            'summary': content[:200] + "..."
        }
        
        new_doc = Document(
            page_content=content,
            metadata=metadata
        )
        processed_docs.append(new_doc)
    return processed_docs

## Basic LLM Model 

In [18]:
# Initialize LLM, embeddings, and vector store
def initialize_components():
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=openai_api_key)
    embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
    
    vectorstore = PineconeVectorStore(
        index_name="env-news",
        embedding=embeddings,
        pinecone_api_key=pinecone_api_key,
        text_key="raw_text_index"
    )
    
    return llm, embeddings, vectorstore

# Load components
llm, embeddings, vectorstore = initialize_components()

  embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")


## LoRA model

In [19]:
def load_lora_model(model_path="./lora_model"):
    try:
        config = PeftConfig.from_pretrained(model_path)
        base_model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
        lora_model = PeftModel.from_pretrained(base_model, model_path)
        merged_model = lora_model.merge_and_unload()
        tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
        merged_model.to(device)
        return merged_model, tokenizer
    except Exception as e:
        print(f"Error loading LoRA model: {str(e)}")
        # Fallback to base model if LoRA model fails to load
        tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
        model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
        return model, tokenizer
    
lora_model, lora_tokenizer = load_lora_model()

In [20]:
# Function to generate summaries using LoRA model
def generate_summary(model, tokenizer, article_text, max_length=130, min_length=30):
    input_text = "Summarize: " + article_text
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)
    
    with torch.no_grad():
        summary_ids = model.generate(
            inputs["input_ids"],
            max_length=max_length,
            min_length=min_length,
            num_beams=3,
            early_stopping=True
        )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

## Agentic RAG

In [23]:
vectorstore2 = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=OpenAIEmbeddings(model="text-embedding-3-small")
)
retriever = vectorstore2.as_retriever(search_kwargs={"k": 5})

In [24]:
# Define data models for grading
class RelevanceGrade(BaseModel):
    """Binary score for relevance check on retrieved documents."""
    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

class HallucinationGrade(BaseModel):
    """Binary score for hallucination present in generation answer."""
    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

class AnswerQualityGrade(BaseModel):
    """Binary score to assess if the answer addresses the question."""
    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )

# Create structured LLM graders
doc_relevance_grader = llm.with_structured_output(RelevanceGrade, method="function_calling")
hallucination_check_grader = llm.with_structured_output(HallucinationGrade, method="function_calling")
answer_quality_grader = llm.with_structured_output(AnswerQualityGrade, method="function_calling")

In [25]:
# Define prompts
relevance_check_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a grader assessing document relevance to a user question. "
     "If the document contains related keywords or semantic meaning, grade it as 'yes'."),
    ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
])

hallucination_check_prompt = ChatPromptTemplate.from_messages([
    ("system", "You assess if an LLM-generated response is grounded in facts."),
    ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
])

answer_quality_prompt = ChatPromptTemplate.from_messages([
    ("system", "You assess if an LLM-generated response sufficiently answers a given question."),
    ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
])

query_rewrite_prompt = ChatPromptTemplate.from_messages([
    ("system", "Rewrite input questions for better vectorstore retrieval."),
    ("human", "Initial question: \n\n {question} \n Reformulate an improved question."),
])

contextual_rag_prompt = ChatPromptTemplate.from_messages([
    ("system", "Use retrieved context to generate an answer in three concise sentences."),
    ("human", "Question: {question}\nContext: {context}"),
])

summary_fusion_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an assistant that integrates document summaries to provide comprehensive answers. "
     "Use the following summaries to answer the question. Focus on being accurate and concise."),
    ("human", "Question: {question}\nSummaries: {summaries}"),
])

In [26]:
# Create chains
relevance_grading_chain = relevance_check_prompt | doc_relevance_grader
hallucination_grading_chain = hallucination_check_prompt | hallucination_check_grader
answer_quality_chain = answer_quality_prompt | answer_quality_grader
query_rewriting_chain = query_rewrite_prompt | llm | StrOutputParser()
rag_response_chain = contextual_rag_prompt | llm | StrOutputParser()
summary_fusion_chain = summary_fusion_prompt | llm | StrOutputParser()

In [28]:
# Define graph state
class AgentWorkflowState(TypedDict):
    """
    Represents the state of our multi-agent RAG workflow.
    """
    rewritten_question: str
    original_question: str
    final_response: str
    retrieved_documents: List
    generated_summaries: List
    retry_count: int

In [29]:
# Define nodes (agents)

def retrieve_articles(state):
    print("---RETRIEVE---")
    question = state["rewritten_question"]

    # Initial document retrieval
    documents = retriever.invoke(question)
    if not documents:
        print("No documents retrieved. Trying vectorstore fallback.")
        documents = retriever.vectorstore.similarity_search(question, k=5)

    return {
        "retrieved_documents": documents,
        "rewritten_question": question,
        "original_question": state["original_question"],
        "retry_count": state.get("retry_count", 0),
        "generated_summaries": state.get("generated_summaries", []),
        "final_response": state.get("final_response", "")
    }


def grade_retrieved_articles(state: Dict) -> Dict:
    question = state["rewritten_question"]
    original_question = state["original_question"]
    documents = state["retrieved_documents"]
    retry_count = state.get("retry_count", 0)

    filtered_documents = []
    for doc in documents:
        if retry_count >= 3:
            filtered_documents = documents
            break
        score = relevance_grading_chain.invoke({"question": original_question, "document": doc.page_content})
        if score.binary_score.lower() == "yes":
            filtered_documents.append(doc)

    return {
        "retrieved_documents": filtered_documents,
        "rewritten_question": question,
        "original_question": original_question,
        "retry_count": retry_count,
        "final_response": state.get("final_response", ""),
        "generated_summaries": state.get("generated_summaries", [])
    }


def summarize_with_lora(state: Dict) -> Dict:
    question = state["rewritten_question"]
    documents = state["retrieved_documents"]
    summaries = [generate_summary(lora_model, lora_tokenizer, doc.page_content) for doc in documents]

    return {
        "retrieved_documents": documents,
        "rewritten_question": question,
        "original_question": state["original_question"],
        "generated_summaries": summaries,
        "retry_count": state.get("retry_count", 0),
        "final_response": state.get("final_response", "")
    }


def generate_final_response(state: Dict) -> Dict:
    summaries_text = "\n\n".join(state["generated_summaries"])
    final_response = summary_fusion_chain.invoke({
        "summaries": summaries_text,
        "question": state["original_question"]
    })

    return {
        "retrieved_documents": state["retrieved_documents"],
        "rewritten_question": state["rewritten_question"],
        "original_question": state["original_question"],
        "generated_summaries": state["generated_summaries"],
        "retry_count": state["retry_count"],
        "final_response": final_response
    }


def rewrite_user_query(state: Dict) -> Dict:
    retry_count = state.get("retry_count", 0) + 1

    if retry_count >= 5:
        fallback_response = (
            f"I've searched for information about '{state['original_question']}', "
            f"but couldn't find highly relevant documents. Here's a limited response."
        )
        return {
            "retrieved_documents": state["retrieved_documents"],
            "rewritten_question": state["rewritten_question"],
            "original_question": state["original_question"],
            "retry_count": retry_count,
            "final_response": fallback_response,
            "generated_summaries": state.get("generated_summaries", [])
        }

    new_question = query_rewriting_chain.invoke({"question": state["original_question"]})

    return {
        "retrieved_documents": state["retrieved_documents"],
        "rewritten_question": new_question,
        "original_question": state["original_question"],
        "retry_count": retry_count,
        "final_response": state.get("final_response", ""),
        "generated_summaries": state.get("generated_summaries", [])
    }


def generate_fallback_response(state: Dict) -> Dict:
    question = state["rewritten_question"]
    original_question = state["original_question"]
    retry_count = state.get("retry_count", 0)

    documents = vectorstore.similarity_search(question, k=3)
    summaries = [generate_summary(lora_model, lora_tokenizer, doc.page_content) for doc in documents]
    summaries_text = "\n\n".join(summaries)

    final_response = summary_fusion_chain.invoke({
        "summaries": summaries_text,
        "question": original_question
    })

    return {
        "retrieved_documents": documents,
        "rewritten_question": question,
        "original_question": original_question,
        "retry_count": retry_count,
        "final_response": final_response,
        "generated_summaries": summaries
    }


In [30]:
# Define edge functions

def decide_to_generate_response(state: Dict) -> str:
    """Decide whether to generate an answer or transform the question."""
    filtered_documents = state["retrieved_documents"]
    retry_count = state.get("retry_count", 0)

    if retry_count >= 3:
        return "force_generate"
    elif not filtered_documents:
        return "not_relevant"
    else:
        return "relevant"


def evaluate_final_response(state: Dict) -> str:
    """Check if the final response is grounded and answers the question."""
    original_question = state["original_question"]
    documents = state["retrieved_documents"]
    response = state["final_response"]

    # Format docs as string for hallucination checking
    document_context = "\n\n".join([doc.page_content for doc in documents])

    # Step 1: Check grounding (hallucination)
    hallucination_check = hallucination_grading_chain.invoke({
        "documents": document_context,
        "generation": response
    })

    if hallucination_check.binary_score.lower() == "yes":
        # Step 2: Check if it answers the question
        answer_check = answer_quality_chain.invoke({
            "question": original_question,
            "generation": response
        })

        if answer_check.binary_score.lower() == "yes":
            return "useful"
        else:
            return "not_useful"
    else:
        return "not_supported"


### With LoRA

In [31]:
# Build the graph
def build_agentic_rag_workflow():
    workflow = StateGraph(AgentWorkflowState)
    
    # Add nodes
    workflow.add_node("retrieve_articles", retrieve_articles)
    workflow.add_node("grade_retrieved_articles", grade_retrieved_articles)
    workflow.add_node("summarize_with_lora", summarize_with_lora)
    workflow.add_node("generate_final_response", generate_final_response)
    workflow.add_node("rewrite_user_query", rewrite_user_query)
    workflow.add_node("generate_fallback_response", generate_fallback_response)
    
    # Add edges
    workflow.add_edge(START, "retrieve_articles")
    workflow.add_edge("retrieve_articles", "grade_retrieved_articles")

    workflow.add_conditional_edges(
        "grade_retrieved_articles",
        decide_to_generate_response,
        {
            "not_relevant": "rewrite_user_query",
            "relevant": "summarize_with_lora",
            "force_generate": "generate_fallback_response"
        }
    )

    workflow.add_edge("summarize_with_lora", "generate_final_response")
    workflow.add_edge("generate_fallback_response", END)
    workflow.add_edge("rewrite_user_query", "retrieve_articles")

    workflow.add_conditional_edges(
        "generate_final_response",
        evaluate_final_response,
        {
            "not_supported": "generate_final_response",
            "useful": END,
            "not_useful": "rewrite_user_query"
        }
    )

    # Compile
    return workflow.compile()


In [41]:
lora_rag_app = build_agentic_rag_workflow()

### With Base LLM

In [36]:
def summarize_with_base_llm(state: Dict) -> Dict:
    """Generates summaries using the base LLM instead of the LoRA fine-tuned model."""
    question = state["rewritten_question"]
    original_question = state["original_question"]
    documents = state["retrieved_documents"]
    retry_count = state.get("retry_count", 0)

    summaries = []
    for doc in documents:
        summary_prompt = f"Summarize the following text in a concise manner: {doc.page_content}"
        summary = llm.invoke(summary_prompt).content
        summaries.append(summary)

    return {
        "retrieved_documents": documents,
        "rewritten_question": question,
        "original_question": original_question,
        "generated_summaries": summaries,
        "retry_count": retry_count,
        "final_response": state.get("final_response", "")
    }

In [37]:
# Create a modified workflow using base LLM for summarization
def build_base_llm_workflow():
    workflow = StateGraph(AgentWorkflowState)

    # Add nodes (same logic, base LLM replaces LoRA for summarization)
    workflow.add_node("retrieve_articles", retrieve_articles)
    workflow.add_node("grade_retrieved_articles", grade_retrieved_articles)
    workflow.add_node("summarize_with_base_llm", summarize_with_base_llm)  # <--- base model agent
    workflow.add_node("generate_final_response", generate_final_response)
    workflow.add_node("rewrite_user_query", rewrite_user_query)
    workflow.add_node("generate_fallback_response", generate_fallback_response)

    # Add edges
    workflow.add_edge(START, "retrieve_articles")
    workflow.add_edge("retrieve_articles", "grade_retrieved_articles")

    workflow.add_conditional_edges(
        "grade_retrieved_articles",
        decide_to_generate_response,
        {
            "not_relevant": "rewrite_user_query",
            "relevant": "summarize_with_base_llm",
            "force_generate": "generate_fallback_response"
        }
    )

    workflow.add_edge("summarize_with_base_llm", "generate_final_response")
    workflow.add_edge("generate_fallback_response", END)
    workflow.add_edge("rewrite_user_query", "retrieve_articles")

    workflow.add_conditional_edges(
        "generate_final_response",
        evaluate_final_response,
        {
            "not_supported": "generate_final_response",
            "useful": END,
            "not_useful": "rewrite_user_query"
        }
    )

    # Compile
    return workflow.compile()


In [40]:
# Compile the base workflow
base_rag_app = build_base_llm_workflow()

In [47]:
# Create basic RAG chain for comparison
def build_basic_rag_chain():
    basic_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    rag_prompt_template = hub.pull("rlm/rag-prompt")

    basic_rag_chain = (
        {"context": basic_retriever | format_articles, "question": RunnablePassthrough()}
        | rag_prompt_template
        | llm
        | StrOutputParser()
    )

    return basic_rag_chain


## Evaluate models

In [45]:
# Evaluation function
def evaluate_system(question):
    # Base LLM (no RAG)
    base_llm_response = llm.invoke(question).content

    # Basic RAG
    basic_rag_chain = build_basic_rag_chain()
    basic_rag_response = basic_rag_chain.invoke(question)

    # Advanced RAG with base model (no fine-tuning)
    base_inputs = {
        "rewritten_question": question,
        "original_question": question,
        "retry_count": 0,
        "final_response": "",
        "generated_summaries": []
    }
    base_result = base_rag_app.invoke(base_inputs)
    advanced_rag_base_response = base_result["final_response"]

    # Advanced RAG with LoRA model
    lora_inputs = {
        "rewritten_question": question,
        "original_question": question,
        "retry_count": 0,
        "final_response": "",
        "generated_summaries": []
    }
    lora_result = lora_rag_app.invoke(lora_inputs)
    advanced_rag_lora_response = lora_result["final_response"]

    return {
        "base_llm": base_llm_response,
        "basic_rag": basic_rag_response,
        "advanced_rag_base": advanced_rag_base_response,
        "advanced_rag_lora": advanced_rag_lora_response
    }


## Examples:

In [52]:
ques1 = "What are the latest developments in renewable energy covered in the news?"
result1 = evaluate_system(ques1)

print("\n" + "="*100)
print(f"QUESTION:\n{ques1}")
print("="*100)

print("\nBASE LLM (No RAG)")
print("-" * 100)
print(result1["base_llm"])

print("\nBASIC RAG")
print("-" * 100)
print(result1["basic_rag"])

print("\nADVANCED RAG (Base Model)")
print("-" * 100)
print(result1["advanced_rag_base"])

print("\nADVANCED RAG (LoRA Fine-Tuned Model)")
print("-" * 100)
print(result1["advanced_rag_lora"])

print("\n" + "="*100 + "\n")



---RETRIEVE---
---RETRIEVE---

QUESTION:
What are the latest developments in renewable energy covered in the news?

BASE LLM (No RAG)
----------------------------------------------------------------------------------------------------
1. Offshore wind farms are becoming increasingly popular as a source of renewable energy. The world's largest offshore wind farm, Hornsea One, recently opened off the coast of England, capable of powering over 1 million homes.

2. Solar power continues to grow in popularity, with new advancements in solar panel technology making it more efficient and cost-effective. In India, the government recently announced plans to build the world's largest solar power plant in the state of Gujarat.

3. The use of hydrogen as a clean energy source is gaining traction, with countries like Japan investing in hydrogen fuel cell technology for transportation and power generation. Toyota recently announced plans to build a hydrogen-powered city of the future in Japan.

4. G