<a href="https://colab.research.google.com/github/keduog/LLM/blob/main/examevaluatiowithrag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- Biology Exam Evaluation with RAG + Falcon-7B-Instruct (Fully Public) ---

# STEP 1: Install Required Libraries
!pip install -q sentence-transformers faiss-cpu transformers accelerate bitsandbytes

# STEP 2: Import Libraries
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from google.colab import files

# STEP 3: Upload CSV Files (grade9Q.csv, biologyg9.csv)
uploaded = files.upload()

# STEP 4: Load Data
exam_df = pd.read_csv("grade9Q.csv", encoding="utf-8")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")  # Columns: Unit, Section, Text

# STEP 5: Encode Textbook Content for RAG
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

# STEP 6: Match Questions to Textbook Sections
matched_units = []
matched_sections = []
"""
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    matched_units.append(textbook_df.iloc[best_idx]["Unit"])
    matched_sections.append(textbook_df.iloc[best_idx]["Section"])
"""
threshold = 0.9
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()

    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")
exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# STEP 7: Pedagogical Check - All Questions Should Have 4 Options
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)),
    axis=1
)

# STEP 8: Load Falcon-7B-Instruct Model (No API Key Required)
model_id = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    device_map="auto"
)

llama_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    torch_dtype=torch.float16
)

# STEP 9: Define Difficulty Classifier
def classify_difficulty(question):
    prompt = (
        "You are a biology teacher.\n"
        "Classify the following multiple-choice question into one of three categories: Easy, Medium, or Difficult.\n"
        f"Question: {question}\n\n"
        "Only answer with one word: Easy, Medium, or Difficult."
    )
    try:
        output = llama_pipeline(prompt, max_new_tokens=10, do_sample=False)[0]["generated_text"]
        for word in ["Easy", "Medium", "Difficult"]:
            if word in output:
                return word
        return "Unclear"
    except Exception as e:
        print(f"❌ Error: {e}")
        return "Error"

# STEP 10: Classify Difficulty for All Questions
exam_df["difficulty"] = exam_df["question_text"].apply(classify_difficulty)

# STEP 11: Print Summary
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())

print("\n=== Difficulty Level Distribution ===")
print(exam_df["difficulty"].value_counts())

print("\n=== Pedagogical Violations (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# STEP 12: Save and Download Results
exam_df.to_csv("evaluated_exam_falcon.csv", index=False)
files.download("evaluated_exam_falcon.csv")


Saving biologyg9.csv to biologyg9 (3).csv
Saving grade9Q.csv to grade9Q (3).csv


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_i


=== Coverage by Section ===
matched_section
No Match    20
Name: count, dtype: int64

=== Difficulty Level Distribution ===
difficulty
Easy    20
Name: count, dtype: int64

=== Pedagogical Violations (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>