In [None]:
!pip install langchain-community faiss-cpu sentence-transformers





In [None]:
import nltk
nltk.download("punkt")
nltk.download("punkt_tab")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
import numpy
import transformers
import os, glob, json
from typing import List
from langchain_community.document_loaders import TextLoader
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from nltk.tokenize import sent_tokenize
from transformers import pipeline
from sentence_transformers import CrossEncoder


In [None]:
from google.colab import files

uploaded = files.upload()
DATA_PATH = list(uploaded.keys())[0]
print("Dataset uploaded:", DATA_PATH)


Saving OtherQA.csv to OtherQA (1).csv
Dataset uploaded: OtherQA (1).csv


In [None]:
DATA_DIR = DATA_PATH   # use uploaded file
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
TOPK_SENTENCE = 20


In [None]:


from langchain_community.document_loaders import TextLoader, CSVLoader, PyPDFLoader

def load_documents(filepath):
    docs = []
    if filepath.endswith(".txt"):
        docs.extend(TextLoader(filepath, autodetect_encoding=True).load())
    elif filepath.endswith(".csv"):
        docs.extend(CSVLoader(filepath).load())
    elif filepath.endswith(".pdf"):
        docs.extend(PyPDFLoader(filepath).load())
    else:
        raise RuntimeError("Unsupported file format. Use .txt, .csv, or .pdf")
    return docs


def to_sentence_docs(docs: List[Document]) -> List[Document]:
    sentence_docs = []
    for d in docs:
        sents = sent_tokenize(d.page_content)
        for idx, s in enumerate(sents):
            meta = dict(d.metadata) if d.metadata else {}
            meta.update({"sent_index": idx, "total_sents": len(sents)})
            sentence_docs.append(Document(page_content=s.strip(), metadata=meta))
    return sentence_docs

def build_sentence_store(sentence_docs: List[Document]) -> FAISS:
    emb = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
    return FAISS.from_documents(sentence_docs, emb)


In [None]:
class HyDERetriever:
    def __init__(self, store: FAISS):
        self.store = store
        self.generator = pipeline("text2text-generation", model="google/flan-t5-large")


    def generate_hypothetical_answer(self, query: str) -> str:
        result = self.generator(query, max_length=128, do_sample=False)
        return result[0]["generated_text"]

    def retrieve(self, query: str, k: int):
        hypo = self.generate_hypothetical_answer(query)
        return self.store.similarity_search_with_score(hypo, k=k), hypo


In [None]:
class Reranker:
    def __init__(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-12-v2"):
        self.model = CrossEncoder(model_name)


    def rerank(self, query: str, docs: List[Document]):
        pairs = [(query, d.page_content) for d in docs]
        scores = self.model.predict(pairs)
        ranked = sorted(zip(docs, scores), key=lambda x: x[1], reverse=True)
        return ranked


In [None]:
# 1. Load dataset
full_docs = load_documents(DATA_DIR)
def to_chunks(docs: List[Document], chunk_size: int = 3) -> List[Document]:
    chunk_docs = []
    for d in docs:
        sents = sent_tokenize(d.page_content)
        for i in range(0, len(sents), chunk_size):
            chunk = " ".join(sents[i:i+chunk_size])
            meta = dict(d.metadata) if d.metadata else {}
            meta.update({"chunk_start": i, "chunk_end": i+chunk_size})
            chunk_docs.append(Document(page_content=chunk.strip(), metadata=meta))
    return chunk_docs

chunk_docs = to_chunks(full_docs, chunk_size=5)
sentence_store = build_sentence_store(chunk_docs)


# 2. Define query
query = "Explain in detail what Skin Pigmentation Disorders are, including definition, causes, and examples."


# 3. HyDE retrieve
hyde = HyDERetriever(sentence_store)
hits_with_scores, hypo = hyde.retrieve(query, TOPK_SENTENCE)
hyde_docs = [d for d, _ in hits_with_scores]

# 4. Re-rank
reranker = Reranker(model_name="cross-encoder/ms-marco-electra-base")
reranked = reranker.rerank(query, hyde_docs)
from transformers import pipeline
answer_generator = pipeline("text2text-generation", model="google/flan-t5-large")

# Use top 5 docs to generate a clean final answer
context = "\n".join([doc.page_content for doc, _ in reranked[:5]])
final_answer = answer_generator(
    f"Question: {query}\n\nUsing the context below, write a detailed answer that directly defines Skin Pigmentation Disorders, their causes, symptoms, and examples. Make sure to restate key terms from the question.\n\nContext:\n{context}",
    max_length=300,
    do_sample=False
)[0]['generated_text']


print("\n[Final Generated Answer]")
print(final_answer)

# 5. Show output
print("\n[Hypothetical Answer from HyDE]")
print(hypo)

print("\n[explanation] ")
exp_id = 1
for doc, score in reranked[:5]:
    # remove lines starting with "Question:"
    cleaned = "\n".join(
        line for line in doc.page_content.splitlines()
        if not line.strip().lower().startswith("question:")
    )

    cleaned = cleaned.strip()
    if cleaned:
        print(f"Explanation {exp_id}:")
        print(cleaned)
        exp_id += 1


print("\n[Top 5 AFTER ReRank]")
ans_id = 1
for doc, score in reranked[:5]:
    cleaned = "\n".join(
        line for line in doc.page_content.splitlines()
        if not line.strip().lower().startswith("question:")
    )
    cleaned = cleaned.strip()
    if cleaned:
        print(f"Answer {ans_id} (score={score:.4f}):")
        print(cleaned)
        ans_id += 1
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from sentence_transformers import SentenceTransformer, util
import torch

# -----------------------------
# 1. Load models (all free)
# -----------------------------
# For relevance / entailment
nli_model = pipeline("text-classification", model="roberta-large-mnli")

# For semantic similarity (faithfulness & completeness)
sbert_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# -----------------------------
# 2. Helper functions
# -----------------------------
def get_relevance(query, answer):
    """Check if answer is relevant to query using NLI."""
    result = nli_model(f"{query} </s></s> {answer}")
    label = result[0]['label']
    score = result[0]['score']
    return 1.0 if label == "ENTAILMENT" else (1 - score)

def get_similarity(a, b):
    """Cosine similarity between two texts."""
    emb1 = sbert_model.encode(a, convert_to_tensor=True)
    emb2 = sbert_model.encode(b, convert_to_tensor=True)
    return util.cos_sim(emb1, emb2).item()

def evaluate(query, answer, retrieved_docs):
    """Return scores for Faithfulness, Completeness, Relevance."""
    # Faithfulness: answer vs retrieved docs
    faithfulness = max([get_similarity(answer, doc) for doc in retrieved_docs])

    # Completeness: answer vs query
    completeness = get_similarity(answer, query)

    # Relevance: query vs answer (NLI)
    relevance = get_relevance(query, answer)

    return {
        "Faithfulness": round(faithfulness, 3),
        "Completeness": round(completeness, 3),
        "Relevance": round(relevance, 3)
    }
print("\n[Overall Evaluation Scores]")
all_scores = []

for doc, score in reranked[:5]:
    cleaned = "\n".join(
        line for line in doc.page_content.splitlines()
        if not line.strip().lower().startswith("question:")
    ).strip()

    if cleaned:
        eval_scores = evaluate(query, cleaned, [d.page_content for d, _ in reranked[:5]])
        all_scores.append(eval_scores)

print("\n[Overall Evaluation Scores]")

eval_scores = evaluate(query, final_answer, [d.page_content for d, _ in reranked[:5]])

print(f"Faithfulness: {eval_scores['Faithfulness']:.3f}")
print(f"Completeness: {eval_scores['Completeness']:.3f}")
print(f"Relevance:    {eval_scores['Relevance']:.3f}")



Device set to use cuda:0
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Device set to use cuda:0
Both `max_new_tokens` (=256) and `max_length`(=300) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



[Final Generated Answer]
Skin pigmentation disorders affect the color of your skin. Your skin gets its color from a pigment called melanin. Special cells in the skin make melanin. When these cells become damaged or unhealthy, it affects melanin production. Some pigmentation disorders affect just patches of skin. Others affect your entire body. If your body makes too little melanin, your skin gets lighter. Vitiligo is a condition that causes patches of light skin. Albinism is a genetic condition affecting a person's skin. A person with albinism may have no color, lighter than normal skin color, or patchy missing skin color.

[Hypothetical Answer from HyDE]
Skin Pigmentation Disorders Skin Pigmentation Disorders may refer to:

[explanation] 
Explanation 1:
When these cells become damaged or unhealthy, it affects melanin production. Some pigmentation disorders affect just patches of skin. Others affect your entire body. If your body makes too much melanin, your skin gets darker. Pregnanc

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0



[Overall Evaluation Scores]

[Overall Evaluation Scores]
Faithfulness: 0.852
Completeness: 0.612
Relevance:    0.300
