# Question revision chain

- get a question
- LLM A and B verify if the question is OK
    - there is more than one option correct
    - sugests modifications to improve the questions and options
    - sugest a detailed explanation to give to the student
- verify 
    - if they diverge, calls another LLM to solve vote
    - if ok, merge using OpenAI

In [46]:
from dotenv import load_dotenv
import json_repair
import json
import os
import pandas as pd

from pydantic import BaseModel, Field
from typing import Literal

In [7]:
df = pd.read_csv("questions.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 953 entries, 0 to 952
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   id                 953 non-null    int64 
 1   created_at         953 non-null    object
 2   subject_matter     953 non-null    object
 3   topic_description  953 non-null    object
 4   level              953 non-null    object
 5   question           953 non-null    object
 6   type               953 non-null    object
 7   answer_correct     953 non-null    object
 8   explanation        953 non-null    object
 9   answer_a           953 non-null    object
 10  answer_b           953 non-null    object
 11  answer_c           953 non-null    object
 12  answer_d           953 non-null    object
dtypes: int64(1), object(12)
memory usage: 96.9+ KB


In [60]:
question = df.iloc[0]
question

id                                                                  11
created_at                               2024-09-22 12:31:43.546285+00
subject_matter                                             Probability
topic_description                       Understanding Confusion Matrix
level                                                     1 - Remember
question             Which term represents correctly classified pos...
type                                                  multiple_options
answer_correct                                                       a
explanation          True Positives (TP) represent the number of ca...
answer_a                                                True Positives
answer_b                                               False Positives
answer_c                                               False Negatives
answer_d                                                True Negatives
Name: 0, dtype: object

In [61]:
question["question"]

'Which term represents correctly classified positive cases?'

## Gemini

In [8]:
import google.generativeai as genai
from google.ai.generativelanguage_v1beta.types import content

key_gemini = os.environ["GOOGLE_API_KEY"]

genai.configure(api_key=key_gemini)

In [33]:
schema = content.Schema(
    type=content.Type.OBJECT,
    properties={
        "option_a_explanation": content.Schema(
            type=content.Type.STRING,
            description="Detailed explanation of why the option 'a' is correct or not.",
        ),
        "option_b_explanation": content.Schema(
            type=content.Type.STRING,
            description="Detailed explanation of why the option 'b' is correct or not.",
        ),
        "option_c_explanation": content.Schema(
            type=content.Type.STRING,
            description="Detailed explanation of why the option 'c' is correct or not.",
        ),
        "option_d_explanation": content.Schema(
            type=content.Type.STRING,
            description="Detailed explanation of why the option 'd' is correct or not.",
        ),
        "just_one_option_correct": content.Schema(
            type=content.Type.BOOLEAN,
            description="If there is just one option correct.",
        ),
        "correct_answer": content.Schema(
            type=content.Type.STRING,
            description="The correct option. It could be 'a', 'b', 'c', 'd'.",
        ),
    },
    required=[
        "option_a_explanation",
        "option_b_explanation",
        "option_c_explanation",
        "option_d_explanation",
        "just_one_option_correct",
        "correct_answer",
    ]
)

In [34]:
# Create the model
generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    # "top_k": 64,
    # "max_output_tokens": 8192,
    "response_schema": schema,
    "response_mime_type": "application/json",
}
    

llm_gemini = genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    generation_config=generation_config,
    # safety_settings = Adjust safety settings
    # See https://ai.google.dev/gemini-api/docs/safety-settings
)

In [63]:
prompt_revision = (
    "# BACKGROUND INFORMATION OF YOUR ROLE:\n"
    "You are an expert in machine learning.\n"
    "You are verifying if a question from a bank of question is ok to be used in a job interview.\n"
    "your task is to analyse the provided question to verify if there is some problem.\n"
    "You shoud verify if there is just one option correct or if there is more than one.\n"
    "The question could be perfect fine, so you could say it too.\n"
    "Also, you should give a detailed explanation for your answer.\n"
    "# QUESTION\n\n"
    "{question}\n\n"
    "# Option a\n"
    "{option_a}\n\n"
    "# Option b\n"
    "{option_b}\n\n"
    "# Option c\n"
    "{option_c}\n\n"
    "# Option d\n"
    "{option_d}\n\n"
)

In [50]:
prompt_revision_fromatted = prompt_revision.format(
        question=question["question"],
        option_a=question["answer_a"],
        option_b=question["answer_b"],
        option_c=question["answer_c"],
        option_d=question["answer_d"],        
    )

prompt_revision_fromatted

'# BACKGROUND INFORMATION OF YOUR ROLE:\nYou are an expert in machine learning.\nYou are verifying if a question from a bank of question is ok to be used in a job interview.\nyour task is to analyse the provided question to verify if there is some problem.\nYou shoud verify if there is just one option correct or if there is more than one.\nThe question could be perfect fine, so you could say it too.\nAlso, you should give a detailed explanation for your answer.\n# QUESTION\n\n0    Which term represents correctly classified pos...\nName: question, dtype: object\n\n# Option a0    True Positives\nName: answer_a, dtype: object\n\n# Option b0    False Positives\nName: answer_b, dtype: object\n\n# Option c0    False Negatives\nName: answer_c, dtype: object\n\n# Option d0    True Negatives\nName: answer_d, dtype: object\n\n'

In [36]:
response = llm_gemini.generate_content(prompt_revision_fromatted)
response

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "{\"correct_answer\": \"a\", \"just_one_option_correct\": true, \"option_a_explanation\": \"True Positives (TP) represent correctly classified positive instances. In the context of machine learning classification, a positive instance is a data point that belongs to the target class.  When the model correctly identifies a positive instance, it's considered a true positive.  This is an important metric used in evaluating the performance of classification models.\", \"option_b_explanation\": \"False Positives (FP), also known as Type I errors, occur when the model incorrectly classifies a negative instance as positive.  For example, if a spam filter classifies a legitimate email as spam, that would be a false positive. False Positives are also a key metric for e

In [37]:
gemeni_analysis = json_repair.loads(response.text)
gemeni_analysis

{'correct_answer': 'a',
 'just_one_option_correct': True,
 'option_a_explanation': "True Positives (TP) represent correctly classified positive instances. In the context of machine learning classification, a positive instance is a data point that belongs to the target class.  When the model correctly identifies a positive instance, it's considered a true positive.  This is an important metric used in evaluating the performance of classification models.",
 'option_b_explanation': 'False Positives (FP), also known as Type I errors, occur when the model incorrectly classifies a negative instance as positive.  For example, if a spam filter classifies a legitimate email as spam, that would be a false positive. False Positives are also a key metric for evaluating model performance.',
 'option_c_explanation': 'False Negatives (FN), also known as Type II errors, occur when the model incorrectly classifies a positive instance as negative. For example, if a cancer screening test fails to detect 

## Llama 3.1

In [43]:
class ExplanationSchema(BaseModel):
    option_a_explanation: str = Field(..., description="Detailed explanation of why the option 'a' is correct or not.")
    option_b_explanation: str = Field(..., description="Detailed explanation of why the option 'b' is correct or not.")
    option_c_explanation: str = Field(..., description="Detailed explanation of why the option 'c' is correct or not.")
    option_d_explanation: str = Field(..., description="Detailed explanation of why the option 'd' is correct or not.")
    just_one_option_correct: bool = Field(..., description="If there is just one option correct.")
    correct_answer: Literal['a', 'b', 'c', 'd'] = Field(..., description="The correct option. It could be 'a', 'b', 'c', or 'd'.")


In [44]:
from groq import Groq

key_groq = os.environ["GROQ_API_KEY"]
client = Groq(api_key=key_groq)

In [68]:
prompt_revision_with_json = prompt_revision + (
    "# OUTPUT FORMAT: json\n\n"
    "{output_format}"
)

prompt_revision_with_json

'# BACKGROUND INFORMATION OF YOUR ROLE:\nYou are an expert in machine learning.\nYou are verifying if a question from a bank of question is ok to be used in a job interview.\nyour task is to analyse the provided question to verify if there is some problem.\nYou shoud verify if there is just one option correct or if there is more than one.\nThe question could be perfect fine, so you could say it too.\nAlso, you should give a detailed explanation for your answer.\n# QUESTION\n\n{question}\n\n# Option a\n{option_a}\n\n# Option b\n{option_b}\n\n# Option c\n{option_c}\n\n# Option d\n{option_d}\n\n# OUTPUT FORMAT: json\n\n{output_format}'

In [69]:
prompt_revision_fromatted = prompt_revision_with_json.format(
        question=question["question"],
        option_a=question["answer_a"],
        option_b=question["answer_b"],
        option_c=question["answer_c"],
        option_d=question["answer_d"],
        output_format=json.dumps(ExplanationSchema.model_json_schema(), indent=2)
    )

print( prompt_revision_fromatted )

# BACKGROUND INFORMATION OF YOUR ROLE:
You are an expert in machine learning.
You are verifying if a question from a bank of question is ok to be used in a job interview.
your task is to analyse the provided question to verify if there is some problem.
You shoud verify if there is just one option correct or if there is more than one.
The question could be perfect fine, so you could say it too.
Also, you should give a detailed explanation for your answer.
# QUESTION

Which term represents correctly classified positive cases?

# Option a
True Positives

# Option b
False Positives

# Option c
False Negatives

# Option d
True Negatives

# OUTPUT FORMAT: json

{
  "properties": {
    "option_a_explanation": {
      "description": "Detailed explanation of why the option 'a' is correct or not.",
      "title": "Option A Explanation",
      "type": "string"
    },
    "option_b_explanation": {
      "description": "Detailed explanation of why the option 'b' is correct or not.",
      "title": 

In [70]:
response = client.chat.completions.create(
    # model="llama3-8b-8192",
    model="llama-3.1-70b-versatile",
    messages=[{
        "role": "user",
        "content": prompt_revision_fromatted
    }],
    temperature=0.5,
    # max_tokens=2420,
    top_p=0.95,
    stream=False,
    response_format={"type": "json_object"},
    stop=None,
)

completion = response.choices[0].message
question = json_repair.loads(completion.content)

In [71]:
questions

{'option_a_explanation': "Option 'a' is correct because True Positives represent the number of positive cases that were correctly classified as positive.",
 'option_b_explanation': "Option 'b' is incorrect because False Positives represent the number of negative cases that were incorrectly classified as positive.",
 'option_c_explanation': "Option 'c' is incorrect because False Negatives represent the number of positive cases that were incorrectly classified as negative.",
 'option_d_explanation': "Option 'd' is incorrect because True Negatives represent the number of negative cases that were correctly classified as negative.",
 'just_one_option_correct': True,
 'correct_answer': 'a'}