### This ipynb notebook can be used to create questions for different topics  at a time

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

# Database connection parameters
DATABASE = os.getenv('DB_NAME')
USER = os.getenv('DB_USER')
PASSWORD = os.getenv('DB_PASSWORD')
HOST = os.getenv('DB_HOST')
PORT = os.getenv('DB_PORT')

TOGETHER_KEY = os.getenv('TOGETHER_API_KEY')
OPENAI_KEY = os.getenv('OPENAI_API_KEY')

In [None]:
from langchain_community.utilities import SQLDatabase

postgres_uri = f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}"

db = SQLDatabase.from_uri(postgres_uri)

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from chat_together import ChatTogether

from typing import List

@tool 
def get_topic_tool(input: str) -> str:
    """
    Analyze user input and identify the physics topic mentioned in the input.
    """
    topic_identifier_system = """Analyze user input and identify the physics topic mentioned in the input. Do not return the input text.
        For example:
        - "Create questions about velocity" -> "velocity"
        - "Explain displacement" -> "displacement"
        
        Return only the identified physics topic name as given in the input.
        
        Make sure not to add anything extra to the output."""
        
    topic_check = ChatPromptTemplate.from_messages([
                        ("system", topic_identifier_system),
                        ("placeholder", "{messages}")
                    ]) | ChatTogether(
                            model="meta-llama/Llama-3.3-70B-Instruct-Turbo", temperature=0
                        )

    topic = topic_check.invoke({"messages": [("user", input)]}).content
    
    return topic

input = "Give your input here to create questions"

topic = get_topic_tool(input)


In [None]:
from langchain_together import ChatTogether
from langchain.prompts import ChatPromptTemplate

@tool
def find_matching_topic_id(topic_of_interest: str) -> str:
    """
    Find most matching topic using LLM reasoning.
    """
    # Get all topics from database
    query = "SELECT topic_name, topic_id FROM topics;"
    result = db.run_no_throw(query)
    
    if not result:
        return "No topics found in database"
    
    topics = result
    
    # Initialize LLM
    llm = ChatTogether(
    model="meta-llama/Llama-3.3-70B-Instruct-Turbo", temperature=0
    )
    
    # Create prompt
    prompt = ChatPromptTemplate.from_template(
        """You are a hierarchical topic matching expert. Find the most relevant topic from the available list.

            Topic to match: {topic_of_interest}

            Available topics:
            {topics}

            Instructions:
            
            Match the topic by considering:
                - Exact matches
                - Parent concepts that encompass the given topic
                - Fundamental principles that explain or govern the topic
                - Related physical phenomena or laws that include this topic
                
                For example:
                    - "inertia" should match with "Newton's First Law of Motion"
                    - "weight" should match with "Newton's Second Law of Motion"
                    - "action-reaction" should match with "Newton's Third Law of Motion"
            
            Return ONLY the exact matching topic_ID as it is without adding additional space without any additional information.

            If you find more than one matching topic, return the one that comes first.

            If none match well, return 'NO_MATCH'.
        """
    )
    
    # Create chain and run
    chain = prompt | llm
    
    response = chain.invoke({
        "topic_of_interest": topic_of_interest,
        "topics": "\n".join(topics)
    }).content
    
    return response

matched_topic_id = find_matching_topic_id(topic)
print(matched_topic_id)

In [None]:
query = f"""WITH topic_search AS 
(SELECT topic_id FROM topics 
WHERE topic_id ILIKE '{matched_topic_id}')
SELECT s.subtopic_name, 
s.description, 
s.mathematical_formulation,
s.prerequisites,
s.misconceptions,
s.engineering_applications,
s.cross_cutting_topics,
s.analogies
FROM subtopics s 
JOIN topic_search ts 
ON s.topic_id = ts.topic_id 
LIMIT 10;"""

In [None]:
from langchain_core.tools import tool

# this tool can run a SQL query and get the output

@tool
def db_query_tool(query: str) -> str:
    """
    Execute a SQL query against the database and get back the result.
    If the query is not correct, an error message will be returned.
    If an error is returned, rewrite the query, check the query, and try again.
    """
    result = db.run(query)
    if not result:
        return "Error: Query failed. Please rewrite your query and try again."
    return result

context = db_query_tool(query)

In [None]:
class QuestionGenerator:
    def __init__(self, llm):
        self.llm = llm
        self.base_prompt = """
        
        Create one multiple choice question for {skill} level of Bloom's taxonomy for a 9th grade Physics student in India on {topic}. 

        Use {context} for accuracy in creating the questions and distractors. 

        Specifically use information from the prerequisites, misconceptions, engineering_applications, cross_cutting_topics, analogies for the generation.
        
        Make sure you don't specify the topic in the question, like 'according to Newton's first law of motion' or 'according to work-energy theorem'.

        Requirements:

        - Student should only be able to answer if they've mastered the concept"
        - Each distractor must address either: 
            - A specific misconception about {topic} 
            - A prerequisite knowledge gap"
        - Language and complexity suitable for 9th grade
        - Unique question, don't generate questions that are in or similar to questions in question history - {question_history}
        - Accurate answer and not ambiguous
        - Physics context and application

        Make sure to include the question, answer, distractors, misconceptions, and explanation in the output in a JSON format.
        
        Output format:
        
        {{{{
            "question": "",
            "skill": ""
            "options": {{"a": "", "b": "", "c": "", "d": ""}},
            "correct": "",
            "explanation": {{
                "correct": "",
                "a": "misconception/prerequisite tested",
                "b": "", "c": "", "d": ""
            }}
        }}}}
        
        For {skill} level, ensure that the question meets the skill requirement: {skill_requirement}.

        Make sure there are no additional information being other than the output in the format that is asked for.
        
        """
    
    def generate_question(self, skill, skill_requirement, topic, context, question_history):
        prompt = self.base_prompt.format(
            skill=skill,
            skill_requirement=skill_requirement,
            topic=topic,
            context=context,
            question_history=question_history
        )
        response = self.llm.invoke(prompt)
        cleaned_content = response.content.strip()
        question_json = json.loads(cleaned_content)
        return question_json

class QuestionEvaluator:
    def __init__(self, llm):
        self.llm = llm
        self.evaluation_prompt = """
        Evaluate the question meticulously:

        Question to evaluate:
        {question}

        Evaluate the question based on the following criteria:
        
        1. Uniqueness Check:
           - Compare with previous questions: {previous_questions}
           - If there are no previous questions, mark as unique
           - Check for similar concepts, context, or wording
           - Verify different application/scenario
           
        2. Answer Check:
            - Correct answer must be unique and accurate
            - Explanation must be clear and concise
            
        If the question give a unique question that is not present before and the right answer for the question among the options, return a JSON object with "valid": true. 
        
        Otherwise, return a JSON object with "valid": false in the format:
        
        {{{{
            "valid": True/False,
            "1": {{
                "uniqueness": True/False,
                "uniqueness_issues": " " # the issues for False, else None 
            }},
            "2": {{
                "answer": True/False,
                "answer_issues": " " # the issues for False, else None
            }}
        }}}}
        
        Make sure there are no additional information being other than the output in the format that is asked for.
        
        """
        
    def evaluate_question(self, question, previous_questions):
       prompt = self.evaluation_prompt.format(
           question=json.dumps(question),
           previous_questions=json.dumps(previous_questions), 
           skill=question["skill"]
       )
       try:
           response = self.llm.invoke(prompt).content
           evaluation = json.loads(response)
       except:
           evaluation = {
               "valid": False,
               "1": {"uniqueness": False, "uniqueness_issues": "Failed to evaluate"},
               "2": {"answer": False, "answer_issues": "Failed to evaluate"},
               "3": {"distractor": False, "distractor_issues": "Failed to evaluate"},
               "4": {"grade_level": False, "grade_issues": "Failed to evaluate"},
               "5": {"skill_alignment": False, "skill_issues": "Failed to evaluate"}
           }
       return evaluation
   
class QuestionFixer:
   def __init__(self, llm):
       self.llm = llm
       self.fix_prompts = {
           "uniqueness": """
           Current question has uniqueness issue: {question}
           Previous questions: {previous_questions}
           
           Create new {skill} level question about {topic} that is distinctly different.
           Must use exact JSON format as current question.
           """,
           
           "answer": """
           Question with answer issue: {question}
           
           Modify only:
           1. Correct answer option
           2. Correct answer explanation
           
           Return in same JSON format with only these changes."""
       }

   def fix_question(self, question, evaluation, previous_questions=None):
       if not evaluation["1"]["uniqueness"]:
           return self._generate_new_question(question, previous_questions)
       elif not evaluation["2"]["answer"]:
           return self._fix_answer(question)
       return question

   def _generate_new_question(self, question, previous):
       prompt = self.fix_prompts["uniqueness"].format(
           question=json.dumps(question),
           previous_questions=previous,
           skill=question["skill"],
           topic=topic
       )
       response = self.llm.invoke(prompt)
       return json.loads(response.content)

   def _fix_answer(self, question):
       prompt = self.fix_prompts["answer"].format(
           question=json.dumps(question)
       )
       updated = json.loads(self.llm.invoke(prompt).content)
       question["correct"] = updated["correct"]
       question["explanation"]["correct"] = updated["explanation"]["correct"]
       return question


class AssessmentGenerator:
    def __init__(self, llm, skills, skill_requirements):
        self.generator = QuestionGenerator(llm)
        self.evaluator = QuestionEvaluator(llm)
        self.fixer = QuestionFixer(llm)
        self.skills = skills
        self.skill_requirements = skill_requirements
    
    def generate_assessment(self, topic, context):
        assessment = {
            "topic": topic,
            "questions": []
        }
        
        for skill in self.skills:
            skill_requirement = self.skill_requirements[skill]
            print(f"\n=== Generating {skill} question ===")
            question = self._generate_valid_question(skill, skill_requirement, topic, context, assessment["questions"])
            if question:
                assessment["questions"].append(question)
                print(f"Successfully generated {skill} question")
            else:
                print(f"Failed to generate valid {skill} question after max attempts")
        
        return assessment

    def _generate_valid_question(self, skill, skill_requirement, topic, context, previous_questions, max_attempts=2):
        for attempt in range(max_attempts):
            print(f"\nAttempt {attempt + 1}/{max_attempts}")
            question_history = [q['question'] for q in previous_questions] if previous_questions else []
            try:
                question = self.generator.generate_question(skill, skill_requirement, topic, context, question_history)
                evaluation = self.evaluator.evaluate_question(question, question_history)
                
                if evaluation.get("valid", False):
                    return question
                
                while not evaluation["valid"]:
                    fixed_question = self.fixer.fix_question(question, evaluation, question_history)
                    fixed_evaluation = self.evaluator.evaluate_question(fixed_question, question_history)
                    
                    if fixed_evaluation["valid"]:
                        return fixed_question
                    
                    if fixed_question == question:
                        break
                    
                    question = fixed_question
                    evaluation = fixed_evaluation
                
            except Exception as e:
                print(f"Error: {str(e)}")
                continue
        return None

In [None]:
import pandas as pd
import json

llm = ChatTogether(model="meta-llama/Llama-3.3-70B-Instruct-Turbo", temperature = 0.75)
skills = ["Remember", "Understand", "Apply", "Analyze", "Evaluate"]

skill_requirements = {
   "Remember": "Question tests ability to retrieve relevant knowledge from long-term memory.",
   "Understand": "Question tests ability to onstruct meaning from instructional messages, including oral, written, and graphic communication.",
   "Apply": "Question tests ability to carry out or use a procedure in a given situation.",
   "Analyze": "Question tests ability to break material into foundational parts and determine how parts relate to one another and the overall structure or purpose.",
   "Evaluate": "Question tests ability to make judgments based on criteria and standards."
}

topics = [ ] # Add topics here

def generate_assessment(llm):
   all_responses = [] 
   model_name = llm.model_name.split('/')[-1]
   
   for topic in topics:
      print(f"\n=== Generating assessment for {topic} ===")
      
      generator = AssessmentGenerator(llm, skills, skill_requirements)
      assessment = generator.generate_assessment(topic)
      
      json_filename = f"LLM_{topic}_{model_name}.json"
      with open(json_filename, "w") as f:
         json.dump(assessment, f, indent=2, ensure_ascii=False)
      all_responses.extend(
         {"topic": topic, **question} 
         for question in assessment["questions"]
      )
   
   # Convert all responses to CSV
   csv_data = []
   max_options = 0
    
    # First pass to find maximum number of options across all questions
   for response in all_responses:
      max_options = max(max_options, len(response.get("options", [])))
   
   # Second pass to create rows
   for response in all_responses:
      options = response.get("options", {})
      explanations = response.get("explanation", {})
      
      row = {
         "topic": response["topic"],
         "skill": response.get("skill", ""),
         "question": response.get("question", ""),
         "correct_option": response.get("correct", ""),
         "correct_explanation": explanations.get("correct", "")
      }
      
      # Add option and explanation columns for each letter
      for letter in ['a', 'b', 'c', 'd']:
         row[f"option_{letter}"] = options.get(letter, "")
         row[f"explanation_{letter}"] = explanations.get(letter, "")
      
      csv_data.append(row)
   
   # Save consolidated CSV
   csv_filename = f"LLM_all_topics_{model_name}.csv"
   if csv_data:
      df = pd.DataFrame(csv_data)
      df.to_csv(csv_filename, index=False)

In [None]:
generate_assessment(llm)