## Installing required Libraries

In [None]:
!pip install -U -q langchain            # Main orchestrator
!pip install -U -q langchain-core      # Core components (Document, BaseRetriever, LLMChain)
!pip install -U -q langchain-community # All community loaders, vectorstores
!pip install -U -q langchain-openai    # For OpenAI
!pip install -U -q langchain-huggingface
!pip install -U -q langchain-text-splitters
!pip install -U -q langchain-groq
!pip install -U -q langchain-together

In [None]:
# !pip uninstall -y \
#   langchain langchain-core langchain-community langchain-openai \
#   langchain-text-splitters langchain-huggingface langchain-groq \
#   langchain-together langchain-classic langgraph langgraph-prebuilt

In [None]:
!pip install -U \
  "langchain==1.1.0" \
  "langchain-core==1.1.0" \
  "langchain-community==0.4.1" \
  "langchain-text-splitters==1.0.0" \
  "langchain-huggingface==1.1.0" \
  "langchain-openai==1.0.0" \
  "langchain-groq==1.1.0"

!pip install -U chromadb faiss-cpu sentence-transformers langchain-chroma

## Importing required Libraries

In [None]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_community.document_loaders import WebBaseLoader
# from langchain.chains import ConversationalRetrievalChain
from langchain_openai import ChatOpenAI # Import ChatOpenAI
from langchain_community.embeddings import HuggingFaceEmbeddings
import pandas as pd
import os
from langchain_groq import ChatGroq
from dotenv import load_dotenv # Corrected import statement
from pprint import pprint
from google.colab import drive
import pandas as pd

## Connect to Google Drive

In [None]:
# To do: Connect to google drive
drive.mount('/content/drive')

In [None]:
# Load the .env file
load_dotenv(".env")

# Access the key
openai_api_key = os.getenv("OPENAI_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

## Define the embedding model

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

## Load vector store from disk

In [None]:

persist_directory = "/content/drive/MyDrive/chroma_db"

db = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)

## Initialize the retriever

In [None]:
# initialize the retriever
retriever = db.as_retriever(
search_type="mmr", # (Maximal Marginal Relevance) is a retrieval strategy that balances relevance and diversity, helping avoid redundant results in your retrieved documents.

search_kwargs={
    'k': 3,
    #controls the trade-off between relevance and diversity (difference from already selected docs) — lower values favor diversity, higher values favor relevance.
    "lambda_mult": 0.25,
    }
)

## Load the LLM model


In [None]:
gpt_llm = ChatOpenAI(
    model="gpt-4o-mini",
    openai_api_key= ,
    temperature=0.0
)

## Define the chat prompt template
define a strict, context-aware prompt for your Retrieval-Augmented Generation (RAG) pipeline.

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

template = """
You are a precise assistant.
Use ONLY the context below to answer the question.
If the context does not contain the answer, reply EXACTLY with:
I do not know

Do NOT include any extra text, instructions, or explanations.

Context:
{context}

Question:
{query}

Answer:
"""

In [None]:
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate


In [None]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

gpt_llm_chain = (
    {
        "context": retriever,
        "query": RunnablePassthrough()
    }
    | prompt
    | gpt_llm
    | output_parser
)

## Define tests

In [None]:
test_questions = [
    # Elements of Crimes (ICC)
    "What are the elements required to establish genocide by killing under Article 6(a) of the Elements of Crimes?",
    "According to the general introduction of the Elements of Crimes, how can the existence of intent and knowledge be inferred?",

    # Genocide Convention
    "According to Article 2 of the Genocide Convention, what acts constitute genocide?",
    "Who can be punished for committing genocide or related acts under Article 4 of the Convention?",

    # Declaration on Protection of Women and Children in Armed Conflict
    "According to the Declaration, what acts committed by belligerents against women and children in military operations or occupied territories are considered criminal?",
    "What obligations do States have under the Geneva Protocol of 1925 and the Geneva Conventions of 1949 to protect women and children in armed conflicts?",

    # Optional Protocol to CEDAW
    "Under Article 4 of the Protocol, in what circumstances will the Committee declare a communication inadmissible?",
    "What powers does the Committee have under Article 8 when it receives reliable information about grave or systematic violations of women’s rights?",

    # Convention on the Suppression of Unlawful Acts against Maritime Navigation (Rome, 1988)
    "According to Article 6, under what circumstances must a State Party establish jurisdiction over offences committed against maritime navigation?",
    "What obligations does Article 10 impose on a State Party if it does not extradite an alleged offender found in its territory?"
]


## Model Invoking

## Gradio UI

In [None]:
!pip install gradio -q

In [None]:
import gradio as gr

def ask_rag_model(question):
    return gpt_llm_chain.invoke(question)

iface = gr.Interface(
    fn=ask_rag_model,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs=gr.Textbox(lines=10, label="Answer"),
    title="Human Rights Library",
    description="""Welcome to the Human Rights Library!\n\nAsk any question about human rights documents. Our intelligent assistant will provide answers based *only* on the available documentation. If the information isn't found within our library, it will simply state 'I do not know' to ensure accuracy and prevent hallucination.""",
    theme="soft"
)

iface.launch()