In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
! pip install torch
! pip install transformers
! pip install langchain_community
! pip install langchain
! pip install langchain-huggingface
! pip install langchain_experimental
! pip install langchain_chroma
! pip install langchainhub
! pip install unstructured

In [None]:
#!pip install flask flask-cors pyngrok

In [None]:
#!ngrok config add-authtoken 2u8201NvKgcEzWonKQNTCyso6fO_3vyZYXHMXaaiYAq9HvPZX

In [None]:
from langchain_community.document_loaders import UnstructuredURLLoader

urls = [
    'https://en.wikipedia.org/wiki/Menstruation', 'https://kidshealth.org/en/teens/menstruation.html',
    'https://www.nhs.uk/conditions/periods/', 'https://my.clevelandclinic.org/health/articles/10132-menstrual-cycle',
    'https://www.thewomens.org.au/health-information/periods/periods-overview/about-periods',
    'https://www.mayoclinic.org/healthy-lifestyle/womens-health/in-depth/menstrual-cycle/art-20047186',
    'https://www.mayoclinic.org/diseases-conditions/menopause/symptoms-causes/syc-20353397',
    'https://en.wikipedia.org/wiki/Menopause', 'https://www.nia.nih.gov/health/menopause/what-menopause',
    'https://en.wikipedia.org/wiki/Ovulation', 
    'https://www.westsuburbanmc.com/the-role-of-hormones-in-the-menstrual-cycle','https://www.wikihow.com/Enjoy-Periods','https://www.healthline.com/health/womens-health/what-to-eat-during-period','https://www.webmd.com/women/ss/slideshow-women-superfoods', 'https://my.clevelandclinic.org/health/articles/23439-ovulation', "https://en.wikipedia.org/wiki/Pregnancy", "https://my.clevelandclinic.org/health/articles/pregnancy", "https://www.nhs.uk/start-for-life/pregnancy/", "https://my.clevelandclinic.org/health/articles/9677-fetal-positions-for-birth"
]
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()

print(data)

In [None]:
! pip install langchain langchain_chroma sentence-transformers


In [None]:
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from sentence_transformers import CrossEncoder

## Text splitting 

In [None]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(data)
print("Total number of documents:", len(docs))

## Embedding

In [None]:
embedding_model = HuggingFaceEmbeddings()
vectorstore = Chroma.from_documents(documents=docs, embedding=embedding_model)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":10})

## Cross-Encoder Re-Ranking

In [None]:
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

def cross_encoder_rerank(query, retrieved_docs, top_n=3):
    # Prepare query-document pairs
    pairs = [(query, doc.page_content) for doc in retrieved_docs]

    # Score pairs using the cross-encoder
    scores = reranker.predict(pairs)

    # Sort by score (higher is better)
    reranked_docs = sorted(zip(retrieved_docs, scores), key=lambda x: x[1], reverse=True)

    # Return the top-n documents
    return [doc for doc, _ in reranked_docs[:top_n]]

# Retrieve and re-rank
query = "What is menstruation?"
retrieved_docs = retriever.invoke(query)
reranked_docs = cross_encoder_rerank(query, retrieved_docs)

# Display results
for i, doc in enumerate(reranked_docs):
    print(f"\n### Document {i+1} ###\n")
    print(doc.page_content[:500], "...")


## RAG Pipeline

In [None]:
'''
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFacePipeline
from langchain_core.output_parsers import StrOutputParser
from transformers import pipeline
import torch
model_id = "microsoft/phi-2"

text_generation_pipeline = pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    max_new_tokens=200,
    device=0,
    return_full_text=False,
    do_sample=False,
    eos_token_id=50256
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# Structured prompt to enforce concise Q&A format
prompt_template = """
You are an expert on menstruation. Based on the following context, answer the question accurately and concisely. Do NOT include extra information. Do NOT generate unrelated texts.


Context:
{context}

Question:
{question}

Answer:
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create LLM chain
llm_chain = prompt | llm | StrOutputParser()

# RAG Pipeline Function
def retrieve_and_answer(question):
    # Retrieve top documents from Chroma
    retrieved_docs = retriever.invoke(question)
    reranked_docs = cross_encoder_rerank(question, retrieved_docs)
    context = "\n".join([doc.page_content for doc in reranked_docs])

    # Invoke LLM
    response = llm_chain.invoke({"context": context, "question": question})

    return response

context_1 = """
Menstruation is the shedding of the uterine lining when pregnancy does not occur. It is regulated by estrogen and progesterone.
"""
question_1 = "How do I relive menstrual cramps?"

context_2 = """
Menstrual pain (dysmenorrhea) is caused by prostaglandins triggering uterine contractions. Conditions like endometriosis can worsen it.
"""
question_2 = "Why do some people experience pain during menstruation?"

# Run the chain with Menstruation-related input
response_1 = llm_chain.invoke({"context": context_1, "question": question_1})
response_2 = llm_chain.invoke({"context": context_2, "question": question_2})

# Print Outputs
print("Answer 1:", response_1)
print("Answer 2:", response_2)
'''


In [None]:
!pip install transformers peft bitsandbytes accelerate

In [None]:
!pip install git+https://github.com/PanQiWei/AutoGPTQ.git
!pip install optimum

In [None]:
#!pip install --upgrade pip
#!pip install git+https://github.com/PanQiWei/AutoGPTQ.git
#!pip install optimum

In [None]:
!pip install --upgrade transformers accelerate
!pip install --upgrade optimum auto-gptq

In [None]:
#!pip uninstall transformers -y
#!pip uninstall huggingface-hub -y

In [None]:
from langchain_core.output_parsers import StrOutputParser

In [None]:
from langchain.prompts import PromptTemplate

In [None]:
!pip install auto-gptq --quiet  # Run only once

In [None]:
import torch

In [None]:
print(torch.cuda.is_available())  # should be True
print(torch.cuda.device_count())

In [None]:
pip install --upgrade transformers

In [None]:
!pip uninstall -y transformers huggingface_hub
!pip install transformers==4.37.2 huggingface_hub

In [None]:
!pip install tensorflow

In [None]:
pip install -U transformers

In [None]:
'''
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_huggingface import HuggingFacePipeline

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

# QLoRA model - use a quantized fine-tuned model
model_id = "TheBloke/phi-2-GPTQ"  # Example QLoRA model (change to your fine-tuned one)

# Load model and tokenizer (QLoRA is quantized, so use `AutoModelForCausalLM`)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)

# Text generation pipeline
text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=200,
    do_sample=False,
    eos_token_id=50256,
)

# Wrap pipeline into LangChain
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# Prompt template remains unchanged
prompt_template = """
### Instruction:
Explain what causes menstruation in humans in a concise and factual way.

### Response:
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)
'''
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from auto_gptq import AutoGPTQForCausalLM
import torch
model_id = "theBloke/Mistral-7B-Instruct-v0.1-GPTQ"  # or the GPTQ version like 'TheBloke/phi-2-GPTQ'
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoGPTQForCausalLM.from_quantized(
    model_id,
    device_map={"": 0},  # Your T4 GPU
    use_safetensors=True,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    revision="main"
)
# Better pipeline config
text_gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.8,
    top_p=0.95,
    repetition_penalty=1.1,
    return_full_text=False
)
# Wrap with LangChain
llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
# Use instruction-style prompt
# Prompt template remains unchanged
prompt_template = """
You are an expert on menstruation. Based on the following context, answer the question accurately and concisely. Do NOT include extra information. Do NOT generate unrelated texts.
Context:
{context}
Question:
{question}
Answer:
"""
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)
# LangChain LLM chain setup
llm_chain = prompt | llm | StrOutputParser()
def retrieve_and_answer(question):
    # Retrieve top documents from Chroma
    retrieved_docs = retriever.invoke(question)
    reranked_docs = cross_encoder_rerank(question, retrieved_docs)
    context = "\n".join([doc.page_content for doc in reranked_docs])
    # Invoke LLM
    response = llm_chain.invoke({"context": context, "question": question})
    return response
context_1 = """
Menstruation is the shedding of the uterine lining when pregnancy does not occur. It is regulated by estrogen and progesterone.
"""
question_1 = "How do I relive menstrual cramps?"
context_2 = """
Menstrual pain (dysmenorrhea) is caused by prostaglandins triggering uterine contractions. Conditions like endometriosis can worsen it.
"""
question_2 = "Why do some people experience pain during menstruation?"
# Run the chain with Menstruation-related input
response_1 = llm_chain.invoke({"context": context_1, "question": question_1})
response_2 = llm_chain.invoke({"context": context_2, "question": question_2})
# Print Outputs
print("Answer 1:", response_1)
print("Answer 2:", response_2)

In [None]:
context_2 = """
Menstrual pain (dysmenorrhea) is caused by prostaglandins triggering uterine contractions. Conditions like endometriosis can worsen it.
"""
question_2 = "How do I insert a menstrual cup?"

response_3 = llm_chain.invoke({"context": context_2, "question": question_2})
print("Answer 2:", response_3)

In [None]:
context_4 = """
Menstrual pain (dysmenorrhea) is caused by prostaglandins triggering uterine contractions. Conditions like endometriosis can worsen it.
"""
question_4 = "Why do some pregnant people experience back pain in the second trimester?"

response_4 = llm_chain.invoke({"context": context_4, "question": question_4})
print("Answer 2:", response_4)

In [None]:
context_4 = """
Menstrual pain (dysmenorrhea) is caused by prostaglandins triggering uterine contractions. Conditions like endometriosis can worsen it.
"""
question_4 = "What are the symptoms of pre menopause symptoms?"

response_4 = llm_chain.invoke({"context": context_4, "question": question_4})
print("Answer 2:", response_4)

In [None]:
context_5 = """
Menstrual pain (dysmenorrhea) is caused by prostaglandins triggering uterine contractions. Conditions like endometriosis can worsen it.
"""
question_5 = "How to reduce hormonal acne?"

response_5 = llm_chain.invoke({"context": context_5, "question": question_5})
print("Answer 2:", response_5)

In [None]:
context_5 = """
Menstrual pain (dysmenorrhea) is caused by prostaglandins triggering uterine contractions. Conditions like endometriosis can worsen it.
"""
question_5 = "What are some premenopause symptoms?"

response_5 = llm_chain.invoke({"context": context_5, "question": question_5})
print("Answer 2:", response_5)

In [None]:
'''
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok

app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True)

# Handle preflight requests
@app.before_request
def handle_preflight():
    if request.method == "OPTIONS":
        return jsonify({"message": "Preflight OK"}), 200

@app.route('/ask', methods=['POST'])
def ask():
    try:
        data = request.get_json()
        question = data.get("question")

        if not question:
            return jsonify({"error": "No question provided"}), 400

        # Placeholder context
        context = """
        Menstruation is the shedding of the uterine lining when pregnancy does not occur.
        It is regulated by estrogen and progesterone.
        """

        # Get response from LLM
        response = llm_chain.invoke({"context": context, "question": question, "stop": ["\n\n"]})

        return jsonify({"answer": response}), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 500

# Start ngrok tunnel
public_url = ngrok.connect(5000).public_url
print(f"Public API URL: {public_url}")

# Run Flask server
app.run(port=5000)
'''

In [None]:
context = "Menstruation is regulated by hormones like estrogen and progesterone, which control the shedding of the uterine lining."
question = "Which are the other mammals that mensturate?"

output = llm_chain.invoke({"context": context, "question": question,"stop": ["\n\n"] })
print(output)

In [None]:
context = "Menstruation is regulated by hormones like estrogen and progesterone, which control the shedding of the uterine lining."
question="Recommend some foods to eat during pregnancy?"
output = llm_chain.invoke({"context": context, "question": question,"stop": ["\n\n"] })
print(output)

In [None]:
context = "Menstruation is regulated by hormones like estrogen and progesterone, which control the shedding of the uterine lining."
question="What is cervical cancer and how can I be more aware of it?"
output = llm_chain.invoke({"context": context, "question": question,"stop": ["\n\n"] })
print(output)

In [None]:
context = "Menstruation is regulated by hormones like estrogen and progesterone, which control the shedding of the uterine lining."
question="Can hormone replacement therapy (HRT) relieve menopausal symptoms?"
output = llm_chain.invoke({"context": context, "question": question,"stop": ["\n\n"] })
print(output)

In [None]:
context = "Menstruation is regulated by hormones like estrogen and progesterone, which control the shedding of the uterine lining."
question="how to get rid of acne?"
output = llm_chain.invoke({"context": context, "question": question, "stop": ["\n\n"]})
print(output)

In [None]:
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu

In [None]:

def evaluate_ranking_metrics(query, retrieved_docs, relevant_docs, top_k=3):
    reranked_docs = cross_encoder_rerank(query, retrieved_docs, top_n=top_k)

    # Extract content for evaluation
    reranked_texts = [doc.page_content for doc in reranked_docs]

    # Mean Reciprocal Rank (MRR)
    mrr = 0
    for idx, doc in enumerate(reranked_texts):
        if doc in relevant_docs:
            mrr = 1 / (idx + 1)
            break

    # Precision@k
    precision = sum(1 for doc in reranked_texts if doc in relevant_docs) / top_k

    # Recall@k
    recall = sum(1 for doc in reranked_texts if doc in relevant_docs) / len(relevant_docs)

    print(f"MRR: {mrr:.4f}, Precision@{top_k}: {precision:.4f}, Recall@{top_k}: {recall:.4f}")
    return mrr, precision, recall

# BLEU Score Calculation
def evaluate_bleu(generated_answer, reference_answer):
    reference_tokens = [nltk.word_tokenize(reference_answer)]
    generated_tokens = nltk.word_tokenize(generated_answer)

    bleu_score = sentence_bleu(reference_tokens, generated_tokens)

    print(f"BLEU Score: {bleu_score:.4f}")
    return bleu_score

# Example Conversation
answer = retrieve_and_answer("Why do some people experience pain during menstruation?")
evaluate_bleu(answer, "Menstrual cramps are caused by uterine contractions triggered by prostaglandins.")

answer = retrieve_and_answer("How can they reduce the pain?")
evaluate_bleu(answer, "Pain relief can be achieved through medications, heat therapy, and lifestyle changes.")


In [None]:
# BLEU Score Calculation
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def evaluate_ranking_metrics(query, retrieved_docs, relevant_docs, top_k=3):
    reranked_docs = cross_encoder_rerank(query, retrieved_docs, top_n=top_k)

    # Extract content for evaluation
    reranked_texts = [doc.page_content for doc in reranked_docs]

    # Mean Reciprocal Rank (MRR)
    mrr = 0
    for idx, doc in enumerate(reranked_texts):
        if doc in relevant_docs:
            mrr = 1 / (idx + 1)
            break

    # Precision@k
    precision = sum(1 for doc in reranked_texts if doc in relevant_docs) / top_k

    # Recall@k
    recall = sum(1 for doc in reranked_texts if doc in relevant_docs) / len(relevant_docs)

    print(f"MRR: {mrr:.4f}, Precision@{top_k}: {precision:.4f}, Recall@{top_k}: {recall:.4f}")
    return mrr, precision, recall

def evaluate_bleu(generated_answer, reference_answer):
    reference_tokens = [nltk.word_tokenize(reference_answer)]
    generated_tokens = nltk.word_tokenize(generated_answer)

    # Apply smoothing
    smoothie = SmoothingFunction().method4
    bleu_score = sentence_bleu(reference_tokens, generated_tokens, smoothing_function=smoothie)

    print(f"BLEU Score (smoothed): {bleu_score:.4f}")
    return bleu_score

# Example Conversation
answer = retrieve_and_answer("Why do some people experience pain during menstruation?")
evaluate_bleu(answer, "Menstrual cramps are caused by uterine contractions triggered by prostaglandins.")

answer = retrieve_and_answer("How can they reduce the pain?")
evaluate_bleu(answer, "Pain relief can be achieved through medications, heat therapy, and lifestyle changes.")


In [None]:

from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def evaluate_ranking_metrics_debug(query, retrieved_docs, relevant_docs, top_k=3):
    reranked_docs = cross_encoder_rerank(query, retrieved_docs, top_n=top_k)

    # Extract content for evaluation
    reranked_texts = [doc.page_content.strip().lower() for doc in reranked_docs]
    relevant_texts = [doc.strip().lower() for doc in relevant_docs]

    # Debugging: Print retrieved vs relevant documents
    print("\n--- Debug: Retrieved Docs ---")
    for doc in reranked_texts:
        print(f"Retrieved: {doc}")

    print("\n--- Debug: Relevant Docs ---")
    for doc in relevant_texts:
        print(f"Relevant: {doc}")

    # Print similarity scores
    print("\n--- Debug: Similarity Scores ---")
    for ret_doc in reranked_texts:
        for rel_doc in relevant_texts:
            score = similar(ret_doc, rel_doc)
            print(f"Similarity({ret_doc[:50]}..., {rel_doc[:50]}...) = {score:.4f}")

    # Adjust threshold if needed
    threshold = 0.12  # Try lowering if needed

    # Mean Reciprocal Rank (MRR)
    mrr = 0
    for idx, doc in enumerate(reranked_texts):
        if any(similar(doc, rel) > threshold for rel in relevant_texts):
            mrr = 1 / (idx + 1)
            break

    # Precision@k
    precision = sum(1 for doc in reranked_texts if any(similar(doc, rel) > threshold for rel in relevant_texts)) / top_k

    # Recall@k
    recall = sum(1 for doc in reranked_texts if any(similar(doc, rel) > threshold for rel in relevant_texts)) / len(relevant_texts)

    print(f"\nMRR: {mrr:.4f}, Precision@{top_k}: {precision:.4f}, Recall@{top_k}: {recall:.4f}")
    return mrr, precision, recall


In [None]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("paraphrase-MiniLM-L6-v2")  # Small, fast embedding model

def evaluate_ranking_metrics_3(query, retrieved_docs, relevant_docs, top_k=3):
    reranked_docs = cross_encoder_rerank(query, retrieved_docs, top_n=top_k)

    # Extract content for evaluation
    reranked_texts = [doc.page_content.strip().lower() for doc in reranked_docs]
    relevant_texts = [doc.strip().lower() for doc in relevant_docs]

    # Encode the texts as vectors
    retrieved_embeddings = model.encode(reranked_texts, convert_to_tensor=True)
    relevant_embeddings = model.encode(relevant_texts, convert_to_tensor=True)

    # Compute cosine similarity
    similarity_scores = util.pytorch_cos_sim(retrieved_embeddings, relevant_embeddings)

    # Compute MRR
    mrr = 0
    for idx in range(len(reranked_texts)):
        if max(similarity_scores[idx]) > 0.7:  # 70% similarity threshold
            mrr = 1 / (idx + 1)
            break

    # Compute Precision@k
    precision = sum(1 for idx in range(len(reranked_texts)) if max(similarity_scores[idx]) > 0.7) / top_k

    # Compute Recall@k
    recall = sum(1 for idx in range(len(reranked_texts)) if max(similarity_scores[idx]) > 0.7) / len(relevant_texts)

    print(f"\nMRR: {mrr:.4f}, Precision@{top_k}: {precision:.4f}, Recall@{top_k}: {recall:.4f}")
    return mrr, precision, recall

In [None]:
'''
def retrieve_documents(question):
    """Retrieve and rerank documents without invoking the LLM."""
    retrieved_docs = retriever.vectorstore.similarity_search(question, k=10)
    reranked_docs = cross_encoder_rerank(question, retrieved_docs)  # Rerank with cross-encoder
    return reranked_docs  # Return only the documents, not the LLM response

# Test Setup
query = "What causes period pain?"

# Get retrieved and reranked documents (BEFORE calling LLM)
retrieved_docs = retrieve_documents(query)

# Define relevant documents (ground truth)
relevant_docs = [
    "Menstrual pain is caused by uterine contractions due to prostaglandins.",
    "Some women feel cramps due to high levels of inflammation."
]

# Run the ranking evaluation
evaluate_ranking_metrics_2(query, retrieved_docs, relevant_docs)
'''

In [None]:
'''
def retrieve_documents(question):
    """Retrieve and rerank documents without invoking the LLM."""
    retrieved_docs = retriever.vectorstore.similarity_search(question, k=10)
    reranked_docs = cross_encoder_rerank(question, retrieved_docs)  # Rerank with cross-encoder
    return reranked_docs  # Return only the documents, not the LLM response

# Test Setup
query = "What are common symptoms of PMS?"

# Get retrieved and reranked documents (BEFORE calling LLM)
retrieved_docs = retrieve_documents(query)

# Define relevant documents (ground truth)
relevant_docs = [
    "Premenstrual syndrome (PMS) can cause mood swings, bloating, and fatigue.",
    "Common PMS symptoms include headaches, breast tenderness, and irritability.",
    "Many people experience cramps, food cravings, and difficulty sleeping before their period."
]

# Run the ranking evaluation
evaluate_ranking_metrics_debug(query, retrieved_docs, relevant_docs)
'''

In [None]:
def retrieve_documents(question):
    """Retrieve and rerank documents without invoking the LLM."""
    retrieved_docs = retriever.vectorstore.similarity_search(question, k=10)
    reranked_docs = cross_encoder_rerank(question, retrieved_docs)  # Rerank with cross-encoder
    return reranked_docs  # Return only the documents, not the LLM response

# Test Setup
query = "What are common symptoms of PMS?"

# Get retrieved and reranked documents (BEFORE calling LLM)
retrieved_docs = retrieve_documents(query)

# Define relevant documents (ground truth)
relevant_docs = [
    "Premenstrual syndrome (PMS) can cause mood swings, bloating, and fatigue.",
    "Common PMS symptoms include headaches, breast tenderness, and irritability.",
    "Many people experience cramps, food cravings, and difficulty sleeping before their period."
]

# Run the ranking evaluation
evaluate_ranking_metrics_3(query, retrieved_docs, relevant_docs)

In [None]:
def retrieve_documents(question):
    """Retrieve and rerank documents without invoking the LLM."""
    retrieved_docs = retriever.vectorstore.similarity_search(question, k=10)
    reranked_docs = cross_encoder_rerank(question, retrieved_docs)  # Rerank with cross-encoder
    return reranked_docs  # Return only the documents, not the LLM response

# Test Setup
query = "What foods help with period cramps?"

# Get retrieved and reranked documents (BEFORE calling LLM)
retrieved_docs = retrieve_documents(query)

# Define relevant documents (ground truth)
relevant_docs = [
    "Foods rich in magnesium, like bananas and spinach, can help relax muscles and reduce cramps.",
    "Drinking ginger tea may help relieve menstrual pain due to its anti-inflammatory properties.",
    "Dark chocolate contains antioxidants that can help reduce period discomfort."
]

# Run the ranking evaluation
evaluate_ranking_metrics_3(query, retrieved_docs, relevant_docs)

In [None]:
import torch

print(torch.cuda.memory_allocated() / 1e6, "MB")

In [None]:
from pynvml import *
nvmlInit()
handle = nvmlDeviceGetHandleByIndex(0)
power = nvmlDeviceGetPowerUsage(handle) / 1000  # mW to W
print(f"Power: {power} Watts")
nvmlShutdown()

In [None]:
import time

context = "Menstruation is regulated by hormones like estrogen and progesterone, which control the shedding of the uterine lining."
question = "How to reduce hormonal acne?"

# Start timer
start_time = time.time()

# Model inference
output = llm_chain.invoke({"context": context, "question": question, "stop": ["\n\n"]})

# End timer
end_time = time.time()

# Calculate inference time
inference_time = end_time - start_time
print("Output:", output)
print(f"Inference time: {inference_time:.4f} seconds")

In [None]:
import time
from pynvml import *

# Initialize NVML for GPU power usage
nvmlInit()
handle = nvmlDeviceGetHandleByIndex(0)

# Your test prompts
questions = [
    "how to get rid of hormonal acne?",
    "what are symptoms of menstruation?",
    "can periods affect mood?",
    "how to relieve menstrual cramps?",
    "why do periods happen?"
]

# Dummy context (you can make this dynamic)
context = "Menstrual pain (dysmenorrhea) is caused by prostaglandins triggering uterine contractions. Conditions like endometriosis can worsen it."

# This will store benchmarking results
def benchmark_model(llm_chain, model_name):
    times, powers = [], []

    print(f"\n--- Benchmarking {model_name} ---")
    for q in questions:
        power_start = nvmlDeviceGetPowerUsage(handle) / 1000  # in watts

        start = time.time()
        output = llm_chain.invoke({"context": context, "question": q, "stop": ["\n\n"]})
        end = time.time()

        power_end = nvmlDeviceGetPowerUsage(handle) / 1000

        # Approximate average power during inference
        avg_power = (power_start + power_end) / 2
        inference_time = end - start

        print(f"Q: {q}")
        print(f"Output: {output}")
        print(f"Inference Time: {inference_time:.4f} s | Avg Power: {avg_power:.2f} W")
        print("-" * 50)

        times.append(inference_time)
        powers.append(avg_power)

    avg_time = sum(times) / len(times)
    avg_power = sum(powers) / len(powers)
    efficiency_score = 1 / (avg_time * avg_power)

    print(f"\n✅ {model_name} Summary:")
    print(f"Avg Time: {avg_time:.4f}s | Avg Power: {avg_power:.2f}W")
    print(f"Efficiency Score: {efficiency_score:.5f}\n")

    return {
        "model": model_name,
        "avg_time": avg_time,
        "avg_power": avg_power,
        "efficiency_score": efficiency_score
    }

# Run for both models (replace llm_chain_x with yours)
result1 = benchmark_model(llm_chain, "Mistral-7B (QLoRA)")
nvmlShutdown()