In [2]:
## Adaptive Testing 

import openai
from openai import OpenAI
import os
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import random
import time

In [5]:

os.environ["OPENAI_API_KEY"] = "sk-proj-fPbd82pXUd0snI5DTCSYT3BlbkFJNT1ZqfvGxFa1K1Pz3bJG"
def generate_questions(prompt, num_questions=10):
    try:
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OPENAI_API_KEY environment variable not set.")

        client = OpenAI(api_key=api_key)
        generated_texts = set()
        questions = []
        i = 0

        while i < num_questions:
            variation_prompt = f"{prompt} - Question {i+1}"

            response = client.completions.create(
                model="gpt-3.5-turbo-instruct",
                prompt=f"Generate a multiple-choice question about {variation_prompt} strictly in the format:\n\n" \
                       f"Question:\n" \
                       f"Option A:\n" \
                       f"Option B:\n" \
                       f"Option C:\n" \
                       f"Option D:\n" \
                       f"Correct Option: Option A or Option B or Option C or Option D",
                max_tokens=150,
                n=1,
                stop=None,
                temperature=0.9
            )

            question_text = response.choices[0].text.strip()

            if question_text in generated_texts:
                continue

            generated_texts.add(question_text)

            lines = question_text.splitlines()
            lines = [line.strip() for line in lines if line.strip()]

            if not (lines[0].startswith('Question') and lines[1].startswith('Option A') and
                    lines[2].startswith('Option B') and lines[3].startswith('Option C') and
                    lines[4].startswith('Option D') and lines[5].startswith('Correct')):
                continue

            question_data = {
                "Question": lines[0].replace("Question:", "").strip(),
                "Category": prompt,
                "Option A": lines[1].replace("Option A:", "").strip(),
                "Option B": lines[2].replace("Option B:", "").strip(),
                "Option C": lines[3].replace("Option C:", "").strip(),
                "Option D": lines[4].replace("Option D:", "").strip(),
                "Correct Option": lines[5].replace("Correct Option:", "").strip()[0:8],
                'difficulty_index': np.random.uniform(0.1, 1.0),
                'discriminatory_index': np.random.uniform(0.1, 1.0),
            }

            questions.append(question_data)
            i += 1

        questions_df = pd.DataFrame(questions)
        return questions_df

    except ValueError as ve:
        print(f"ValueError: {ve}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None



In [6]:
# Prompts to generate questions
prompts = ["Biology", "Psychology", "Gynaecology", "Orthopedic","Oncology"]
questions_df_list = [generate_questions(prompt, num_questions=20) for prompt in prompts]
all_questions_df = pd.concat(questions_df_list, ignore_index=True)

if all_questions_df is not None:
    print("Generated Questions:")
    print(all_questions_df)
else:
    print("Failed to generate questions.")

all_questions_df=all_questions_df.reset_index().rename(columns={'index':'question_id'})
all_questions_df.to_csv('Questions.csv',header=True)


Generated Questions:
                                             Question  Category  \
0   What is the process by which green plants conv...   Biology   
1   What process is responsible for converting lig...   Biology   
2   What is the process by which plants convert su...   Biology   
3   Which process involves the conversion of light...   Biology   
4   What is the function of the mitochondria in a ...   Biology   
..                                                ...       ...   
95  What is the most common form of cancer in wome...  Oncology   
96  Which of the following is a common symptom of ...  Oncology   
97            What is the most common form of cancer?  Oncology   
98  Which of the following is NOT a common treatme...  Oncology   
99  What is the most common type of cancer in men ...  Oncology   

                                Option A                      Option B  \
0                            Respiration                Photosynthesis   
1                         

In [3]:
all_questions_df=pd.read_csv('Questions.csv')

In [4]:
questions_list = all_questions_df.to_dict(orient='records')

In [8]:
import gurobipy as gp
from gurobipy import GRB
import ipywidgets as widgets
from IPython.display import display, clear_output
from functools import partial

class AdaptiveTest:
    def __init__(self, questions, total_questions=5):
        self.questions = questions
        self.total_questions = total_questions
        self.asked_questions = []
        self.correct_answers = 0
        self.correct_difficulty_indices = []
        self.current_difficulty = 0.5  # Starting difficulty index
        self.categories = list(set(q['Category'] for q in questions))
        self.past_categories = []  # List to track past question categories
        self.category_counts = {cat: 0 for cat in self.categories}  # Track category counts

    def adjust_difficulty(self, previous_difficulty, correct):
        if correct:
            return min(1.0, previous_difficulty + 0.1)
        else:
            return max(0.1, previous_difficulty - 0.1)

    def setup_milp_model(self, available_questions):
        model = gp.Model("Question_Selection")

        # Variables
        x = model.addVars(len(available_questions), vtype=GRB.BINARY, name="x")

        # Objective: Maximize the discriminatory index
        obj = gp.quicksum(available_questions[i]['discriminatory_index'] * x[i] for i in range(len(available_questions)))
        
        # Dynamic penalties for category representation
        total_asked = len(self.asked_questions) + 1  # Include the next question to be asked
        min_proportion = 0.15 * total_asked
        max_proportion = 0.35 * total_asked
        
        for cat in self.categories:
            category_questions = [i for i in range(len(available_questions)) if available_questions[i]['Category'] == cat]
            min_needed = max(0, min_proportion - self.category_counts[cat])
            max_needed = max_proportion - self.category_counts[cat]
            deviation_below = model.addVar(vtype=GRB.CONTINUOUS, name=f"deviation_below_{cat}")
            deviation_above = model.addVar(vtype=GRB.CONTINUOUS, name=f"deviation_above_{cat}")

            model.addConstr(deviation_below >= min_needed - gp.quicksum(x[i] for i in category_questions))
            model.addConstr(deviation_above >= gp.quicksum(x[i] for i in category_questions) - max_needed)
            
            obj -= 100 * deviation_below  # Penalize deviation below
            obj -= 100 * deviation_above  # Penalize deviation above

        model.setObjective(obj, GRB.MAXIMIZE)

        # Constraints: Ensure only one question is selected
        model.addConstr(gp.quicksum(x[i] for i in range(len(available_questions))) == 1)

        return model, x

    def get_next_question(self):
        available_questions = [q for q in self.questions if q['question_id'] not in self.asked_questions and 
                               abs(q['difficulty_index'] - self.current_difficulty) <= 0.05]

        if not available_questions:
            print("No more questions available with the current difficulty settings.")
            return None

        # Select a question ensuring balanced category representation
        model, x = self.setup_milp_model(available_questions)

        # Solve the Gurobi model
        model.optimize()

        if model.status == GRB.OPTIMAL:
            selected_question_index = next(i for i in range(len(available_questions)) if x[i].X > 0.5)
            selected_question = available_questions[selected_question_index]
            self.asked_questions.append(selected_question['question_id'])
            self.past_categories.append(selected_question['Category'])
            self.category_counts[selected_question['Category']] += 1  # Update the count for the selected category
            return selected_question
        else:
            print("No feasible solution found by the MILP model. Relaxing constraints.")
            self.current_difficulty = self.adjust_difficulty(self.current_difficulty, False)
            return self.get_next_question()

    def ask_question(self, question):
        clear_output(wait=True)  # Clear output before printing question
        print(f"Question: {question['Question']}, Category: {question['Category']}")  # Print the question
        option_buttons = []
        for idx, option in enumerate([question['Option A'], question['Option B'], question['Option C'], question['Option D']]):
            button = widgets.Button(description=f"{chr(65+idx)}: {option}")
            button.on_click(partial(self.check_answer, question, chr(65+idx)))
            option_buttons.append(button)
            button.layout.width = 'auto'
            display(button)  # Display the options

    def check_answer(self, question, answer, b):
        correct_option_letter = question['Correct Option'][-1]  # Get the last character (A, B, C, or D)
        correct = (answer == correct_option_letter)
        if correct:
            self.correct_answers += 1
            self.correct_difficulty_indices.append(question['difficulty_index'])

        self.current_difficulty = self.adjust_difficulty(self.current_difficulty, correct)

        if len(self.asked_questions) < self.total_questions:
            next_question = self.get_next_question()
            if next_question:
                self.ask_question(next_question)
            else:
                self.finish_test()
        else:
            self.finish_test()

    def finish_test(self):
        clear_output(wait=True)
        final_score = sum(self.correct_difficulty_indices)
        print(f"Test completed! You answered {self.correct_answers} out of {self.total_questions} questions correctly.")
        print(f"Your final score is: {final_score:.2f}")

        # Print final category representation
        category_counts = {cat: 0 for cat in self.categories}
        for q_id in self.asked_questions:
            category_counts[self.questions[q_id]['Category']] += 1

        total_asked = len(self.asked_questions)
        for cat in self.categories:
            category_percent = (category_counts[cat] / total_asked) * 100
            print(f"Category '{cat}' represented {category_percent:.2f}% ({category_counts[cat]} questions)")

    def start_test(self):
        self.correct_answers = 0
        self.asked_questions = []
        self.correct_difficulty_indices = []
        self.current_difficulty = 0.5  # Reset starting difficulty
        self.past_categories = []  # Reset past categories
        self.category_counts = {cat: 0 for cat in self.categories}  # Reset category counts
        first_question = self.get_next_question()
        if first_question:
            self.ask_question(first_question)

# Example usage:
# Initialize the AdaptiveTest class with the questions
test = AdaptiveTest(questions_list)
test.start_test()


Test completed! You answered 2 out of 5 questions correctly.
Your final score is: 1.09
Category 'Oncology' represented 20.00% (1 questions)
Category 'Biology' represented 20.00% (1 questions)
Category 'Orthopedic' represented 20.00% (1 questions)
Category 'Gynaecology' represented 20.00% (1 questions)
Category 'Psychology' represented 20.00% (1 questions)


In [None]:
import gurobipy as gp
from gurobipy import GRB
import ipywidgets as widgets
from IPython.display import display, clear_output
from functools import partial
import numpy as np

class AdaptiveTest2PL:
    def __init__(self, questions, total_questions=5):
        self.questions = questions
        self.total_questions = total_questions
        self.asked_questions = []
        self.correct_answers = 0
        self.ability_estimate = 0.0  # Initial ability estimate
        self.categories = list(set(q['Category'] for q in questions))
        self.past_categories = []  # List to track past question categories
        self.category_counts = {cat: 0 for cat in self.categories}  # Track category counts

    def adjust_ability(self, previous_ability, correct, question):
        # Simple Bayesian updating for ability estimation
        learning_rate = 0.1
        return previous_ability + learning_rate * (correct - self.probability_correct(previous_ability, question))

    def probability_correct(self, ability, question):
        # 2PL IRT model: P(correct) = 1 / (1 + exp(-a(theta - b)))
        # In this case, we'll use 'discriminatory_index' as 'a' and 'difficulty_index' as 'b'
        a = question['discriminatory_index']
        b = question['difficulty_index']
        return 1 / (1 + np.exp(-a * (ability - b)))

    def setup_milp_model(self, available_questions):
        model = gp.Model("Question_Selection")

        # Variables
        x = model.addVars(len(available_questions), vtype=GRB.BINARY, name="x")

        # Objective: Maximize the expected information gain
        obj = gp.quicksum(self.expected_information_gain(available_questions[i], self.ability_estimate) * x[i] for i in range(len(available_questions)))
        
        # Dynamic penalties for category representation
        total_asked = len(self.asked_questions) + 1  # Include the next question to be asked
        min_proportion = 0.15 * total_asked
        max_proportion = 0.35 * total_asked
        
        for cat in self.categories:
            category_questions = [i for i in range(len(available_questions)) if available_questions[i]['Category'] == cat]
            min_needed = max(0, min_proportion - self.category_counts[cat])
            max_needed = max_proportion - self.category_counts[cat]
            deviation_below = model.addVar(vtype=GRB.CONTINUOUS, name=f"deviation_below_{cat}")
            deviation_above = model.addVar(vtype=GRB.CONTINUOUS, name=f"deviation_above_{cat}")

            model.addConstr(deviation_below >= min_needed - gp.quicksum(x[i] for i in category_questions))
            model.addConstr(deviation_above >= gp.quicksum(x[i] for i in category_questions) - max_needed)
            
            obj -= 100 * deviation_below  # Penalize deviation below
            obj -= 100 * deviation_above  # Penalize deviation above

        model.setObjective(obj, GRB.MAXIMIZE)

        # Constraints: Ensure only one question is selected
        model.addConstr(gp.quicksum(x[i] for i in range(len(available_questions))) == 1)

        return model, x

    def expected_information_gain(self, question, ability):
        # Information gain for a 2PL IRT model
        p = self.probability_correct(ability, question)
        a = question['discriminatory_index']  # Use the existing 'discriminatory_index'
        return a**2 * p * (1 - p)

    def get_next_question(self):
        available_questions = [q for q in self.questions if q['question_id'] not in self.asked_questions]

        if not available_questions:
            print("No more questions available.")
            return None

        # Select a question based on expected information gain
        model, x = self.setup_milp_model(available_questions)

        # Solve the Gurobi model
        model.optimize()

        if model.status == GRB.OPTIMAL:
            selected_question_index = next(i for i in range(len(available_questions)) if x[i].X > 0.5)
            selected_question = available_questions[selected_question_index]
            self.asked_questions.append(selected_question['question_id'])
            self.past_categories.append(selected_question['Category'])
            self.category_counts[selected_question['Category']] += 1  # Update the count for the selected category
            return selected_question
        else:
            print("No feasible solution found by the MILP model. Relaxing constraints.")
            # If no feasible solution, you might want to implement a fallback strategy 
            return None

    def ask_question(self, question):
        clear_output(wait=True)  # Clear output before printing question
        print(f"Question: {question['Question']}, Category: {question['Category']}")  # Print the question
        option_buttons = []
        for idx, option in enumerate([question['Option A'], question['Option B'], question['Option C'], question['Option D']]):
            button = widgets.Button(description=f"{chr(65+idx)}: {option}")
            button.on_click(partial(self.check_answer, question, chr(65+idx)))
            option_buttons.append(button)
            button.layout.width = 'auto'
            display(button)  # Display the options

    def check_answer(self, question, answer, b):
        correct_option_letter = question['Correct Option'][-1]  # Get the last character (A, B, C, or D)
        correct = (answer == correct_option_letter)
        if correct:
            self.correct_answers += 1

        self.ability_estimate = self.adjust_ability(self.ability_estimate, correct, question)

        if len(self.asked_questions) < self.total_questions:
            next_question = self.get_next_question()
            if next_question:
                self.ask_question(next_question)
            else:
                self.finish_test()
        else:
            self.finish_test()

    def finish_test(self):
        clear_output(wait=True)
        print(f"Test completed! You answered {self.correct_answers} out of {self.total_questions} questions correctly.")
        print(f"Your estimated ability level is: {self.ability_estimate:.2f}")

        # Print final category representation
        category_counts = {cat: 0 for cat in self.categories}
        for q_id in self.asked_questions:
            category_counts[self.questions[q_id]['Category']] += 1

        total_asked = len(self.asked_questions)
        for cat in self.categories:
            category_percent = (category_counts[cat] / total_asked) * 100
            print(f"Category '{cat}' represented {category_percent:.2f}% ({category_counts[cat]} questions)")

    def start_test(self):
        self.correct_answers = 0
        self.asked_questions = []
        self.ability_estimate = 0.0  # Reset starting ability
        self.past_categories = []  # Reset past categories
        self.category_counts = {cat: 0 for cat in self.categories}  # Reset category counts
        first_question = self.get_next_question()
        if first_question:
            self.ask_question(first_question)

# Example usage:
# Initialize the AdaptiveTest2PL class with the questions
test = AdaptiveTest2PL(questions_list)
test.start_test()

Test completed! You answered 3 out of 5 questions correctly.
Your estimated ability level is: 0.10
Category 'Oncology' represented 20.00% (1 questions)
Category 'Biology' represented 20.00% (1 questions)
Category 'Orthopedic' represented 20.00% (1 questions)
Category 'Gynaecology' represented 20.00% (1 questions)
Category 'Psychology' represented 20.00% (1 questions)
