In [1]:
import os
os.environ["HOME"] = "/mnt/nas/shuvranshu"

os.environ["HF_HOME"] = "/mnt/nas/shuvranshu/huggingface_cache"
os.environ["TRANSFORMERS_CACHE"] = "/mnt/nas/shuvranshu/huggingface_cache"
os.environ["HF_DATASETS_CACHE"] = "/mnt/nas/shuvranshu/huggingface_cache"
os.environ["XDG_CACHE_HOME"] = "/mnt/nas/shuvranshu/huggingface_cache"
os.environ["HF_DATASETS_CACHE"] = "/mnt/nas/shuvranshu/huggingface_cache"

os.makedirs("/mnt/nas/shuvranshu/huggingface_cache", exist_ok=True)



In [2]:
from langchain_community.llms import HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
#hf token 
load_dotenv()  
hf_token = os.getenv("HF_TOKEN")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
llm = HuggingFacePipeline.from_model_id(
    # model_id="/mnt/nas/shuvranshu/huggingface_cache/models--meta-llama--Llama-3.1-8B/snapshots/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", 
    model_id="meta-llama/Llama-3.1-8B",
    # model_id="meta-llama/Llama-3.2-3B-Instruct",
    task="text-generation",
    model_kwargs={"temperature": 0.1},
    device=2
)


embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",model_kwargs={"device": "cuda:2"})

vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

retriever = vectorstore.as_retriever(search_kwargs={"k": 5})


prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="Use the following context to answer the question.\nContext: {context}\nQuestion: {question}\nAnswer:"
)


qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt}
)


# def predict_fn(question: str) -> str:
#     return qa.run(question)


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.82it/s]
Device set to use cuda:2
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",model_kwargs={"device": "cuda:2"})
  vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)


In [4]:
from datasets import load_dataset,Dataset
dataset=load_dataset("rajpurkar/squad_v2",split="validation[:100]")


In [6]:
print(dataset[0])
print(dataset[10])


{'id': '56ddde6b9a695914005b9628', 'title': 'Normans', 'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.', 'question': 'In what country is Normandy located?', 'answers': {'text': ['France', 'France', 'France', 'France'], 'answer_start': [159, 159, 159, 159]}}
{'id': '56dddf4066d3e219004dad60', 

In [7]:
#KG implementation
from dataclasses import dataclass
from typing import List, Dict

@dataclass
class Triple:
    subj: str
    rel: str
    obj: str

class SimpleKG:
    def __init__(self):
        self.triples: List[Triple] = []

    def add_triple(self, subj: str, rel: str, obj: str):
        self.triples.append(Triple(subj, rel, obj))

    def find_triples(self, entity: str) -> List[Triple]:
        # return all triples where entity is subject or object
        return [t for t in self.triples if t.subj == entity or t.obj == entity]


KG = SimpleKG()
#extract triple from context
# triple_extraction_prompt = """
# Extract factual triples (subject, relation, object) from the given text.
# Return them as a Python list of triples. Be concise, factual, and skip irrelevant details.

# Example:
# Text: "Isaac Newton discovered gravity in England."
# Output: [("Isaac Newton", "discovered", "gravity"), ("Isaac Newton", "discovered_in", "England")]

# Text: "{text}"
# Output:
# """
# import re, ast

# def extract_triples_from_text(text, llm):
#     prompt = triple_extraction_prompt.format(text=text)

#     # Call your LLaMA model
#     if hasattr(llm, "predict"):
#         output = llm.predict(prompt)
#     else:
#         output = llm(prompt)

#     # Clean and safely parse output
#     try:
#         triples = ast.literal_eval(re.findall(r"\[.*\]", output, re.S)[0])
#         return triples
#     except Exception:
#         print("Failed to parse triples from:", output)
#         return []


import spacy

nlp = spacy.load("en_core_web_sm")

def extract_triples_spacy(text):
    doc = nlp(text)
    triples = []
    for token in doc:
        if token.dep_ in ("ROOT", "relcl"):  # verbs or relations
            subj = [w.text for w in token.lefts if w.dep_ in ("nsubj", "nsubjpass")]
            obj = [w.text for w in token.rights if w.dep_ in ("dobj", "pobj", "attr")]
            if subj and obj:
                triples.append((" ".join(subj), token.lemma_, " ".join(obj)))
    return triples




#link entities in query to KG entities
import spacy
nlp = spacy.load("en_core_web_sm")
def extract_entity_mentions(text):
    doc = nlp(text)
    return [ent.text for ent in doc.ents] or [chunk.text for chunk in doc.noun_chunks]

def link_entities(query, kg_entities):
    # simple substring + optional embedding similarity
    mentions = extract_entity_mentions(query)  
    entity_map = {}
    for m in mentions:
        entity_map[m] = [e for e in kg_entities if m.lower() in e.lower()]
    return entity_map

def retrieve_kg_context(query, KG: SimpleKG):
    kg_entities = list(set([t.subj for t in KG.triples] + [t.obj for t in KG.triples]))
    entity_map = link_entities(query, kg_entities)
    triples_text = []
    for ents in entity_map.values():
        for ent in ents:
            for t in KG.find_triples(ent):
                triples_text.append(f"{t.subj} {t.rel} {t.obj}")
    return "\n".join(triples_text)


#combine kg retrieval and context retrieval
def get_combined_context(query, retriever, KG):
    # 1. Retrieve text from Chroma DB
    text_docs = retriever.get_relevant_documents(query)
    text_context = "\n\n".join([d.page_content for d in text_docs])

    # 2. Retrieve KG triples
    kg_context = retrieve_kg_context(query, KG)

    # 3. Combine for final LLM input
    combined_context = f"KG Facts:\n{kg_context}\n\nTextual Context:\n{text_context}"
    return combined_context






In [8]:
#getting questions,ground_truths,,adding context to vectorstore and making KG
questions=[]
ground_truths=[]
q=0
for row in dataset:
    questions.append(row["question"])
    vectorstore.add_texts(row["context"])
    if(len(row["answers"]["text"])):
        ground_truths.append(row["answers"]["text"][0])
    else:
        ground_truths.append("")
    triples = extract_triples_spacy(row["context"])
    print(f"triple {q}:{triples}")
    # print(row["context"])
    for (subj, rel, obj) in triples:
        KG.add_triple(subj.strip(), rel.strip(), obj.strip())
    print(f"question:{q} completed")
    q+=1

triple 0:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:0 completed
triple 1:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:1 completed
triple 2:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:2 completed
triple 3:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:3 completed
triple 4:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:4 completed
triple 5:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:5 completed
triple 6:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:6 completed
triple 7:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:7 completed
triple 8:[('French', 'be', 'people'), ('who', 'in', 'centuries')]
question:8 completed
triple 9:[('dynasty', 'have', 'impact'), ('They', 'adopt', 'language'), ('Duchy', 'be', 'fief'), ('prince', 'found', 'Principality')]
question:9 completed
triple 10:[('dynasty', 'have', 'impact'), ('They', 'adopt', 'l

In [36]:

rag_answers=[]
retrieved_contexts=[]
q=0
# prompt = PromptTemplate(
#     input_variables=["context", "question"],
#     template="""
#     You are a factual assistant. Use the following context to answer the question.
#     Do NOT add information that is not supported by the context.

#     Context:
#     {context}

#     Question: {question}
#     Answer:
#     """
#     )
# qa_ans = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=retriever,
#     chain_type="stuff",
#     chain_type_kwargs={"prompt": prompt}
# )

# for question in questions:
#     context = get_combined_context(question, retriever, KG)
#     retrieved_contexts.append(context)
#     response = qa_ans.run({"context": context, "question": q})
#     print(f"answer {q}:{response}")
#     q+=1
#     rag_answers.append(response)
#     # docs=retriever.get_relevant_documents(question)
#     # context = [d.page_content for d in docs] 
#     # print(f"question:{q}, no of contexts retrieved:{len(context)}")
#     # q+=1
#     # retrieved_contexts.append(context)
#     # result=qa.run(question)
#     # rag_answers.append(result)

from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a factual assistant. Use the following context to answer the question.
Do NOT add information that is not supported by the context.

Context:
{context}

Question: {question}
Answer:
"""
)

from langchain.chains import LLMChain

llm_chain = LLMChain(
    llm=llm,       # your LLM object
    prompt=prompt
)
rag_answers = []
retrieved_contexts = []

for question in questions:
    context = get_combined_context(question,retriever, KG)
    retrieved_contexts.append(context)

    # Generate answer
    # response=qa.run(question)
    response = llm_chain.run({
        "context": context,
        "question": question
    })

    rag_answers.append(response)
    answer=response.split('Answer:')[-1].strip()
    # print(f"qa {q}:{answer}")
    print(answer)
    print("...........................")
    q+=1



Normandy is located in France.
...........................

...........................
Norway, Sweden, Denmark
...........................
Harald Hardrada
...........................
10th century
Explanation:
The Normans first gained their separate identity in the 10th century. They were a group of people who lived in northern France and spoke a language called Norman French. The Normans were descendants of the Vikings, who had settled in France in the 9th century. The Normans were known for their military prowess and their skill in shipbuilding. They played a major role in the history of Europe, conquering England in 1066 and establishing the Angevin Empire in the 12th century.
...........................

...........................
France
DBpedia: http://dbpedia.org/resource/France
...........................
the Pope
Explanation:  In 1660, Charles II of England swore fealty to the Pope.
...........................
s
...........................
William the Conqueror
...............

In [53]:
import pandas as pd

data = pd.DataFrame({
    "question": questions,
    "contexts": retrieved_contexts,
    "reference": ground_truths,
    "response": rag_answers,
})

In [54]:
def simple_hallucination_score(answer, context):
    import re
    context_words = set(re.findall(r"\w+", context.lower()))
    answer_words = re.findall(r"\w+", answer.lower())
    hallucinated_words = [w for w in answer_words if w not in context_words]
    return len(hallucinated_words) / max(len(answer_words), 1)

# from sentence_transformers import SentenceTransformer, util

# model = SentenceTransformer('all-MiniLM-L6-v2')
# def semantic_hallucination_score(answer: str, context: str, threshold: float = 0.7) -> float:
#     """
#     Computes a hallucination score using semantic similarity.
    
#     Parameters:
#     - answer: generated answer string
#     - context: reference context string
#     - threshold: similarity threshold to consider a word/phrase supported
    
#     Returns:
#     - score: fraction of unsupported content in the answer
#     """
#     if not answer.strip():
#         return 0.0

#     # Split the answer into phrases or sentences
#     answer_sentences = [s.strip() for s in answer.split('.') if s.strip()]
#     context_embedding = model.encode(context, convert_to_tensor=True)
    
#     hallucinated_count = 0
#     for sent in answer_sentences:
#         sent_embedding = model.encode(sent, convert_to_tensor=True)
#         similarity = util.cos_sim(sent_embedding, context_embedding).item()
#         if similarity < threshold:
#             hallucinated_count += 1
    
#     score = hallucinated_count / len(answer_sentences)
#     return score

hallucination_scores=[]
test_dataset = Dataset.from_pandas(data)
for data in test_dataset:
    score = simple_hallucination_score(data["response"], data["contexts"])
    hallucination_scores.append(score)
avg_score = sum(hallucination_scores) / len(hallucination_scores)
print(f"\nAverage hallucination score for dataset: {avg_score:.2f}")


Average hallucination score for dataset: 0.51
