<a href="https://colab.research.google.com/github/keduog/LLM/blob/main/Biology_Final_Exam_Assessement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx+1}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology multiple-choice question (MCQ) using Bloom's Revised Taxonomy.

Use only one of the following levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond strictly in the format:
Q1: [Level]
Q2: [Level]
...
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx+1}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification (Batched) ===
difficulty_levels = []
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        lines = response.text.strip().splitlines()
        for line in lines:
            try:
                diff = line.split(":")[1].strip()
                difficulty_levels.append(diff if diff in ["Easy", "Medium", "Difficult"] else "Unclear")
            except:
                difficulty_levels.append("Unclear")
        # In case of mismatch in count, pad with "Unclear"
        while len(difficulty_levels) < len(exam_df):
            difficulty_levels.append("Unclear")
    except Exception as e:
        print(f"Difficulty API Error: {e}")
        difficulty_levels += ["API Error"] * len(batch_df)

exam_df["difficulty"] = difficulty_levels[:len(exam_df)]

# === Bloom’s Taxonomy Classification (Batched) ===
bloom_levels = []
valid_levels = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_blooms_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        lines = response.text.strip().splitlines()
        for line in lines:
            try:
                bloom = line.split(":")[1].strip()
                bloom_levels.append(bloom if bloom in valid_levels else "Unclear")
            except:
                bloom_levels.append("Unclear")
        while len(bloom_levels) < len(exam_df):
            bloom_levels.append("Unclear")
    except Exception as e:
        print(f"Bloom API Error: {e}")
        bloom_levels += ["API Error"] * len(batch_df)

exam_df["bloom_level"] = bloom_levels[:len(exam_df)]

# === Summary ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Result ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (4).csv
Saving grade9Q1.csv to grade9Q1 (4).csv

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Unclear      90
Easy          4
Medium        4
Difficult     2
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Unclear       90
Remember       4
Understand     3
Apply          2
Analyze        1
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [7]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification (Batched + Robust Parsing) ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Bloom’s Taxonomy Classification (Batched + Robust Parsing) ===
bloom_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_blooms_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"])
        for idx, label in parsed.items():
            bloom_levels[idx] = label
    except Exception as e:
        print(f"Bloom API Error: {e}")

exam_df["bloom_level"] = bloom_levels

# === Summary ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Result ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (5).csv
Saving grade9Q1.csv to grade9Q1 (5).csv




Bloom API Error: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.




Bloom API Error: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Understand    32
Remember      32
Unclear       20
Apply         11
Analyze        5
Name: count, dtype: int64

=== Invalid 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [8]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification (Batched + Robust Parsing) ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Bloom’s Taxonomy Classification (Batched + Robust Parsing) ===
bloom_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_blooms_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"])
        for idx, label in parsed.items():
            bloom_levels[idx] = label
    except Exception as e:
        print(f"Bloom API Error: {e}")

exam_df["bloom_level"] = bloom_levels

# === Summary ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Result ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (6).csv
Saving grade9Q1.csv to grade9Q1 (6).csv




Bloom API Error: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.




Bloom API Error: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.




Bloom API Error: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      31
Unclear       30
Understand    26
Apply          9
Analyze        4
Name: count, dtype: int64

=== Invalid 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification (Batched + Robust Parsing) ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Bloom’s Taxonomy Classification (Batched + Retry on 429 Errors) ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):  # Smaller batch
    success = False
    retries = 3
    wait_seconds = 15
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):  # Rate limit
                print("Quota limit reached. Retrying after delay...")
                time.sleep(wait_seconds)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Summary ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Result ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (7).csv
Saving grade9Q1.csv to grade9Q1 (7).csv




Quota limit reached. Retrying after delay...




Quota limit reached. Retrying after delay...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      45
Understand    34
Analyze       11
Apply          9
Evaluate       1
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [10]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification (Batched + Robust Parsing) ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Bloom’s Taxonomy Classification (Batched + Retry on 429 Errors) ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):  # Smaller batch
    success = False
    retries = 3
    wait_seconds = 15
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):  # Rate limit
                print("Quota limit reached. Retrying after delay...")
                time.sleep(wait_seconds)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Summary ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Result ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (8).csv
Saving grade9Q1.csv to grade9Q1 (8).csv




Quota limit reached. Retrying after delay...




Quota limit reached. Retrying after delay...




Quota limit reached. Retrying after delay...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      42
Understand    35
Analyze       10
Apply          8
Unclear        5
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [11]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Bloom’s Taxonomy Classification (Initial Pass) ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):  # Smaller batch to reduce 429 errors
    success = False
    retries = 3
    wait_seconds = 15
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):  # Rate limit
                print("Quota limit reached. Retrying after delay...")
                time.sleep(wait_seconds)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Retry Bloom Classification for Unclear ===
unclear_bloom_df = exam_df[exam_df["bloom_level"] == "Unclear"]
if not unclear_bloom_df.empty:
    print(f"\nRetrying Bloom classification for {len(unclear_bloom_df)} unclear items...")
    for batch_df in split_df_into_batches(unclear_bloom_df, 5):
        success = False
        retries = 2
        while not success and retries > 0:
            try:
                prompt = build_blooms_batch_prompt(batch_df)
                response = gemini_model.generate_content(prompt, generation_config=config)
                parsed = extract_labels(response.text, valid_bloom)
                for idx, label in parsed.items():
                    exam_df.at[idx, "bloom_level"] = label
                success = True
            except Exception as e:
                if "429" in str(e):
                    print("Quota limit reached during retry. Waiting...")
                    time.sleep(15)
                    retries -= 1
                else:
                    print(f"Retry Bloom API Error: {e}")
                    break

# === Summary Output ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Final Output ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (9).csv
Saving grade9Q1.csv to grade9Q1 (9).csv




Quota limit reached. Retrying after delay...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      45
Understand    34
Analyze       12
Apply          9
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [12]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Bloom’s Taxonomy Classification (Initial Pass) ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):  # Smaller batch to reduce 429 errors
    success = False
    retries = 3
    wait_seconds = 15
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):  # Rate limit
                print("Quota limit reached. Retrying after delay...")
                time.sleep(wait_seconds)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Retry Bloom Classification for Unclear ===
unclear_bloom_df = exam_df[exam_df["bloom_level"] == "Unclear"]
if not unclear_bloom_df.empty:
    print(f"\nRetrying Bloom classification for {len(unclear_bloom_df)} unclear items...")
    for batch_df in split_df_into_batches(unclear_bloom_df, 5):
        success = False
        retries = 2
        while not success and retries > 0:
            try:
                prompt = build_blooms_batch_prompt(batch_df)
                response = gemini_model.generate_content(prompt, generation_config=config)
                parsed = extract_labels(response.text, valid_bloom)
                for idx, label in parsed.items():
                    exam_df.at[idx, "bloom_level"] = label
                success = True
            except Exception as e:
                if "429" in str(e):
                    print("Quota limit reached during retry. Waiting...")
                    time.sleep(15)
                    retries -= 1
                else:
                    print(f"Retry Bloom API Error: {e}")
                    break

# === Summary Output ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Final Output ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (10).csv
Saving grade9Q1.csv to grade9Q1 (10).csv




Quota limit reached. Retrying after delay...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      44
Understand    35
Analyze       12
Apply          9
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [13]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Bloom’s Taxonomy Classification (Initial Pass) ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):  # Smaller batch to reduce 429 errors
    success = False
    retries = 3
    wait_seconds = 15
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):  # Rate limit
                print("Quota limit reached. Retrying after delay...")
                time.sleep(wait_seconds)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Retry Bloom Classification for Unclear ===
unclear_bloom_df = exam_df[exam_df["bloom_level"] == "Unclear"]
if not unclear_bloom_df.empty:
    print(f"\nRetrying Bloom classification for {len(unclear_bloom_df)} unclear items...")
    for batch_df in split_df_into_batches(unclear_bloom_df, 5):
        success = False
        retries = 2
        while not success and retries > 0:
            try:
                prompt = build_blooms_batch_prompt(batch_df)
                response = gemini_model.generate_content(prompt, generation_config=config)
                parsed = extract_labels(response.text, valid_bloom)
                for idx, label in parsed.items():
                    exam_df.at[idx, "bloom_level"] = label
                success = True
            except Exception as e:
                if "429" in str(e):
                    print("Quota limit reached during retry. Waiting...")
                    time.sleep(15)
                    retries -= 1
                else:
                    print(f"Retry Bloom API Error: {e}")
                    break

# === Summary Output ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Final Output ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (11).csv
Saving grade9Q1.csv to grade9Q1 (11).csv




Quota limit reached. Retrying after delay...




Quota limit reached. Retrying after delay...




Quota limit reached. Retrying after delay...

Retrying Bloom classification for 5 unclear items...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      46
Understand    33
Analyze       12
Apply          9
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [14]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Delay to Prevent Quota Hit ===
print("\nWaiting for 45 seconds before starting Bloom's Taxonomy classification...")
time.sleep(45)

# === Bloom’s Taxonomy Classification (Initial Pass) ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):
    success = False
    retries = 3
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):
                print("Quota limit reached. Retrying after delay...")
                time.sleep(15)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Retry Bloom Classification for Unclear ===
unclear_bloom_df = exam_df[exam_df["bloom_level"] == "Unclear"]
if not unclear_bloom_df.empty:
    print(f"\nRetrying Bloom classification for {len(unclear_bloom_df)} unclear items...")
    for batch_df in split_df_into_batches(unclear_bloom_df, 5):
        success = False
        retries = 2
        while not success and retries > 0:
            try:
                prompt = build_blooms_batch_prompt(batch_df)
                response = gemini_model.generate_content(prompt, generation_config=config)
                parsed = extract_labels(response.text, valid_bloom)
                for idx, label in parsed.items():
                    exam_df.at[idx, "bloom_level"] = label
                success = True
            except Exception as e:
                if "429" in str(e):
                    print("Quota limit reached during retry. Waiting...")
                    time.sleep(15)
                    retries -= 1
                else:
                    print(f"Retry Bloom API Error: {e}")
                    break

# === Summary Output ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Final Output ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (12).csv
Saving grade9Q1.csv to grade9Q1 (12).csv

Waiting for 45 seconds before starting Bloom's Taxonomy classification...




Quota limit reached. Retrying after delay...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      46
Understand    33
Analyze       11
Apply          9
Evaluate       1
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Difficulty Classification ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Delay to Prevent Quota Hit ===
print("\nWaiting for 45 seconds before starting Bloom's Taxonomy classification...")
time.sleep(45)

# === Bloom’s Taxonomy Classification (Initial Pass) ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):
    success = False
    retries = 3
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):
                print("Quota limit reached. Retrying after delay...")
                time.sleep(15)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Retry Bloom Classification for Unclear ===
unclear_bloom_df = exam_df[exam_df["bloom_level"] == "Unclear"]
if not unclear_bloom_df.empty:
    print(f"\nRetrying Bloom classification for {len(unclear_bloom_df)} unclear items...")
    for batch_df in split_df_into_batches(unclear_bloom_df, 5):
        success = False
        retries = 2
        while not success and retries > 0:
            try:
                prompt = build_blooms_batch_prompt(batch_df)
                response = gemini_model.generate_content(prompt, generation_config=config)
                parsed = extract_labels(response.text, valid_bloom)
                for idx, label in parsed.items():
                    exam_df.at[idx, "bloom_level"] = label
                success = True
            except Exception as e:
                if "429" in str(e):
                    print("Quota limit reached during retry. Waiting...")
                    time.sleep(15)
                    retries -= 1
                else:
                    print(f"Retry Bloom API Error: {e}")
                    break

# === Summary Output ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Final Output ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (14).csv
Saving grade9Q1.csv to grade9Q1 (14).csv

Waiting for 45 seconds before starting Bloom's Taxonomy classification...




Quota limit reached. Retrying after delay...




Quota limit reached. Retrying after delay...




Quota limit reached. Retrying after delay...

Retrying Bloom classification for 5 unclear items...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      44
Understand    35
Analyze       12
Apply          9
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Step 1: Difficulty Classification ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Step 2: WAIT before Bloom Classification ===
print("\n✅ Difficulty classification complete.")
print("⏳ Waiting 120 seconds before starting Bloom's taxonomy classification...")
time.sleep(120)

# === Step 3: Bloom's Taxonomy Classification ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):
    success = False
    retries = 3
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):
                print("⚠️ Quota limit reached. Retrying after 15s...")
                time.sleep(15)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Final Output Summary ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Final Output ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


Saving biologyg9.csv to biologyg9 (15).csv
Saving grade9Q1.csv to grade9Q1 (15).csv

✅ Difficulty classification complete.
⏳ Waiting 120 seconds before starting Bloom's taxonomy classification...




⚠️ Quota limit reached. Retrying after 15s...




⚠️ Quota limit reached. Retrying after 15s...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      47
Understand    32
Analyze       11
Apply          9
Evaluate       1
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1]:
# === Install Required Libraries ===
!pip install -q -U google-generativeai sentence-transformers faiss-cpu pandas

# === Imports ===
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from google.colab import files
import re
import time

# === Upload CSVs ===
uploaded = files.upload()
exam_df = pd.read_csv("grade9Q1.csv", encoding="ISO-8859-1")
textbook_df = pd.read_csv("biologyg9.csv", encoding="ISO-8859-1")

# === Setup Configuration ===
config = GenerationConfig(
    temperature=0.0,
    top_p=1.0,
    top_k=1
)

# === Semantic Search Setup ===
embedder = SentenceTransformer("all-MiniLM-L6-v2")
text_chunks = textbook_df["Text"].tolist()
chunk_embeddings = embedder.encode(text_chunks, convert_to_tensor=True)

matched_units, matched_sections = [], []
threshold = 0.6
for question in exam_df["question_text"]:
    q_embed = embedder.encode(question, convert_to_tensor=True)
    similarities = util.cos_sim(q_embed, chunk_embeddings)
    best_idx = similarities.argmax().item()
    best_score = similarities[0][best_idx].item()
    if best_score >= threshold:
        matched_units.append(textbook_df.iloc[best_idx]["Unit"])
        matched_sections.append(textbook_df.iloc[best_idx]["Section"])
    else:
        matched_units.append("No Match")
        matched_sections.append("No Match")

exam_df["matched_unit"] = matched_units
exam_df["matched_section"] = matched_sections

# === MCQ Format Validation ===
exam_df["option_count_valid"] = exam_df.apply(
    lambda row: all(pd.notnull(row[f"option_{i}"]) for i in range(1, 5)), axis=1
)

# === Gemini Setup ===
genai.configure(api_key="AIzaSyCGgwigTnwX7CSJZfdvlCAdZ46KQTyD6XI")  # Replace with your actual Gemini API key
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# === Helper: Split DataFrame into Batches ===
def split_df_into_batches(df, batch_size):
    for i in range(0, len(df), batch_size):
        yield df.iloc[i:i + batch_size]

# === Robust Output Parser ===
def extract_labels(text, valid_labels):
    lines = text.strip().splitlines()
    results = {}
    for line in lines:
        match = re.match(r"Q(\d+):?\s*(\w+)", line.strip())
        if match:
            idx, label = int(match.group(1)), match.group(2)
            results[idx] = label if label in valid_labels else "Unclear"
    return results

# === Prompt Builders ===
def build_difficulty_batch_prompt(df):
    header = """
You are a biology teacher assessing the difficulty of multiple-choice questions.

Difficulty Levels:
- Easy: Factual recall or basic recognition
- Medium: Requires explanation or understanding
- Difficult: Requires analysis, synthesis, or reasoning

Classify each MCQ with one word: Easy, Medium, or Difficult.

Respond in this format:
Q[ID]: [Level]

Example:
Q23: Medium
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

def build_blooms_batch_prompt(df):
    header = """
You are an educational evaluator. Classify each biology MCQ using Bloom's Revised Taxonomy.

Levels:
- Remember
- Understand
- Apply
- Analyze
- Evaluate
- Create

Respond in the format:
Q[ID]: [Level]

Example:
Q23: Apply
"""
    body = ""
    for idx, row in df.iterrows():
        body += f"\nQ{idx}: {row['question_text']}\n"
        body += f"A. {row['option_1']}\nB. {row['option_2']}\nC. {row['option_3']}\nD. {row['option_4']}\n"
    return header + body

# === Step 1: Difficulty Classification ===
difficulty_levels = ["Unclear"] * len(exam_df)
for batch_df in split_df_into_batches(exam_df, 10):
    try:
        prompt = build_difficulty_batch_prompt(batch_df)
        response = gemini_model.generate_content(prompt, generation_config=config)
        parsed = extract_labels(response.text, ["Easy", "Medium", "Difficult"])
        for idx, label in parsed.items():
            difficulty_levels[idx] = label
    except Exception as e:
        print(f"Difficulty API Error: {e}")

exam_df["difficulty"] = difficulty_levels

# === Step 2: WAIT before Bloom Classification ===
print("\n✅ Difficulty classification complete.")
print("⏳ Waiting 120 seconds before starting Bloom's taxonomy classification...")
time.sleep(120)

# === Step 3: Bloom's Taxonomy Classification ===
bloom_levels = ["Unclear"] * len(exam_df)
valid_bloom = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]

for batch_df in split_df_into_batches(exam_df, 5):
    success = False
    retries = 3
    while not success and retries > 0:
        try:
            prompt = build_blooms_batch_prompt(batch_df)
            response = gemini_model.generate_content(prompt, generation_config=config)
            parsed = extract_labels(response.text, valid_bloom)
            for idx, label in parsed.items():
                bloom_levels[idx] = label
            success = True
        except Exception as e:
            if "429" in str(e):
                print("⚠️ Quota limit reached. Retrying after 15s...")
                time.sleep(15)
                retries -= 1
            else:
                print(f"Bloom API Error: {e}")
                break

exam_df["bloom_level"] = bloom_levels

# === Final Output Summary ===
print("\n=== Coverage by Section ===")
print(exam_df["matched_section"].value_counts())
print("\n=== Difficulty Distribution ===")
print(exam_df["difficulty"].value_counts())
print("\n=== Bloom’s Distribution ===")
print(exam_df["bloom_level"].value_counts())
print("\n=== Invalid MCQs (≠ 4 options) ===")
print(len(exam_df[~exam_df["option_count_valid"]]))

# === Save Final Output ===
exam_df.to_csv("evaluated_exam_gemini_revised.csv", index=False)
files.download("evaluated_exam_gemini_revised.csv")


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.4/155.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.7/345.7 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m83.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m95.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m86.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Saving biologyg9.csv to biologyg9.csv
Saving grade9Q1.csv to grade9Q1.csv


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


✅ Difficulty classification complete.
⏳ Waiting 120 seconds before starting Bloom's taxonomy classification...




⚠️ Quota limit reached. Retrying after 15s...




⚠️ Quota limit reached. Retrying after 15s...

=== Coverage by Section ===
matched_section
No Match                                   91
3.4 Types of cells                          3
2.5 Common Ethiopian animals and plants     2
3.2. Cell theory                            1
1.4 . Tools of a Biologist                  1
2.4 Linnaean system of nomenclature         1
2.2 Taxonomy of living things               1
Name: count, dtype: int64

=== Difficulty Distribution ===
difficulty
Medium       59
Easy         31
Difficult    10
Name: count, dtype: int64

=== Bloom’s Distribution ===
bloom_level
Remember      44
Understand    35
Analyze       12
Apply          9
Name: count, dtype: int64

=== Invalid MCQs (≠ 4 options) ===
0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>