<a href="https://colab.research.google.com/github/keduog/LLM/blob/main/biogpt3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
# --- Biology Exam Evaluation with RAG and OpenAI GPT-3.5 (Colab-Compatible, v1 SDK Secure Version) ---

# STEP 1: Install Required Libraries
!pip install -q sentence-transformers faiss-cpu openai

# STEP 2: Import Libraries
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util
from openai import OpenAI
import matplotlib.pyplot as plt
from google.colab import files

# STEP 3: Upload Files to Colab
uploaded = files.upload()

# STEP 4: Load Data with Encoding Fix
exam_df = pd.read_csv("grade9Q.csv", encoding='utf-8')
textbook_df = pd.read_csv("biologyg9.csv", encoding='ISO-8859-1')  # Columns: Unit, Section, Text

# STEP 5: Embedding Textbook Content
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
text_chunks = textbook_df['Text'].tolist()
text_embeddings = embedding_model.encode(text_chunks, convert_to_tensor=True)

# STEP 6: RAG-Based Retrieval - Match Questions to Textbook
matched_units = []
matched_sections = []
for question in exam_df['question_text']:
    q_embedding = embedding_model.encode(question, convert_to_tensor=True)
    scores = util.cos_sim(q_embedding, text_embeddings)
    top_idx = scores.argmax().item()
    matched_units.append(textbook_df.iloc[top_idx]['Unit'])
    matched_sections.append(textbook_df.iloc[top_idx]['Section'])

exam_df['matched_unit'] = matched_units
exam_df['matched_section'] = matched_sections

# STEP 7: Validate Pedagogical Rule - Four Options
exam_df['option_count_valid'] = exam_df.apply(
    lambda row: all([pd.notnull(row[f'option_{i}']) for i in range(1, 5)]), axis=1
)
##################3
!curl https://api.openai.com/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer " \
  -d '{ "model": "gpt-4o-mini", "messages": [{"role": "user", "content": "write a haiku about ai"}] }'


#########3
# STEP 8: Set Your OpenAI Key Securely
client = OpenAI(api_key=""

# STEP 9: Classify Difficulty Using GPT-3.5 (v1 SDK Secure Syntax)

def classify_difficulty_openai(question):
    prompt = [
        {"role": "system", "content": "You are a biology teacher."},
        {"role": "user", "content": f"Classify the following biology exam question into one of three categories: Easy, Medium, or Difficult.\n\nQuestion: {question}\n\nOnly answer with one word: Easy, Medium, or Difficult."}
    ]
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=prompt
    )
    return response.choices[0].message.content.strip()

try:
    exam_df['difficulty'] = exam_df['question_text'].apply(classify_difficulty_openai)
except Exception as e:
    print("\nERROR: Difficulty classification failed.")
    print(str(e))
    exam_df['difficulty'] = "Error"

# STEP 10: Summary Statistics
coverage_summary = exam_df['matched_section'].value_counts()
difficulty_summary = exam_df['difficulty'].value_counts() if 'difficulty' in exam_df else "Not available due to OpenAI error"
pedagogical_violations = len(exam_df[~exam_df['option_count_valid']])

print("=== Coverage by Section ===")
print(coverage_summary)
print("\n=== Difficulty Level Distribution ===")
print(difficulty_summary)
print(f"\nQuestions with Option Count Violation: {pedagogical_violations}")

# Optional: Save Annotated Exam File
exam_df.to_csv("evaluated_exam_questions.csv", index=False)
files.download("evaluated_exam_questions.csv")


Saving biologyg9.csv to biologyg9 (11).csv
Saving grade9Q.csv to grade9Q (12).csv
{
  "id": "chatcmpl-BbOTobPKzEdZL7kpF1Ej7RoYxi5wg",
  "object": "chat.completion",
  "created": 1748251936,
  "model": "gpt-4o-mini-2024-07-18",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Silent circuits hum,  \nWhispers of thought intertwine—  \nDreams in code take flight.",
        "refusal": null,
        "annotations": []
      },
      "logprobs": null,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 13,
    "completion_tokens": 20,
    "total_tokens": 33,
    "prompt_tokens_details": {
      "cached_tokens": 0,
      "audio_tokens": 0
    },
    "completion_tokens_details": {
      "reasoning_tokens": 0,
      "audio_tokens": 0,
      "accepted_prediction_tokens": 0,
      "rejected_prediction_tokens": 0
    }
  },
  "service_tier": "default",
  "system_fingerprint": "fp_34a54ae93c"
}

ERROR: Difficulty classi

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>