In [1]:
from langchain.prompts import PromptTemplate

In [2]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

True

In [15]:
from langchain_core.pydantic_v1 import BaseModel, Field

class Question(BaseModel):
    """ A question about a domain of study."""

    topic_description: str = Field(
        description="A sentence describing the sub-topic to which the question belongs. That means this sentence should specify in a granular level what specific sub-topic the question belongs to. It should be abstract in a way that other questions could be put in this description too. Use between 5 and 10 words."
    )
    level: str = Field(
        description="The difficulty level of the question. It should be only one of the following options: 'easy', 'medium', 'hard'."
    )
    question: str = Field(
        description="The actual question text. It should be a question of type TRUE or FALSE. It means that the questions should be an assertion that could be answered with TRUE or FALSE."
    )
    answer_correct: str = Field(
        description="The correct answer to the question. It should be only one of the following options: TRUE or FALSE"
    )
    explanation: str = Field(
        description="An explanation or solution to the question."
    )

class QuestionList(BaseModel):
    """A list of Question class."""

    items: list[Question] = Field(
        description="list of Question"
    )

In [16]:
from langchain_groq import ChatGroq

llm_groq = ChatGroq(model="llama-3.1-70b-versatile")
llm_QuestionList = llm_groq.with_structured_output(QuestionList)

In [17]:
prompt_question_generator = PromptTemplate.from_template("""

    TASK CONTEXT:
    I am studying machine learning and I need to practice some questions on various topics.
    
    TASK DESCRIPTION:
    I will provide you with a list of topics, and I would like you to generate a list of TRUE or FALSE questions.
    These questions should be interesting, creative, challenging and thought-provoking. 
    Each question should be in the form of a statement that could be either TRUE or FALSE.
    Feel free to be imaginative and attempt to confuse the student by blending related concepts or similar words.
    I will provide the topics in the DOMAIN KNOWLEDGE section.
    The questions should pertain to these topics, and you can use this knowledge as a foundation to create questions that delve deeper into the subject matter.
    
    ADDITIONAL TASK DESCRIPTION:
    {additional_task_description}
    
    TASK REQUIREMENTS:
    Please refrain from creating questions that require mathematical calculations, but you may create questions with mathematical formulas.
    You SHOULD use LATEX to write mathematical formulas and code, but you should use the Katex flavor.
    Also you should put $$ in the beggining of the katex code and $$ at the end of the code. This is necessary because the interpreter needs it.
    
    TASK DETAILS:
    You should create {quantity} questions of level {level}.
    
    DOMAIN KNOWLEDGE:
    {domain_knowledge}
    
    FORMAT OUTPUT INSTRUCTIONS:
    It should be formatted as described in the output format.
    """
)

In [7]:
from supabase import create_client, Client

In [8]:
import os

url: str = os.environ.get("SUPABASE_URL")
key: str = os.environ.get("SUPABASE_KEY")
supabase: Client = create_client(url, key)

In [9]:
text = """

Language Models and Probability
Entropy
Cross-Entropy
Cross-Entropy Loss
Perplexity

"""

In [18]:
def generate_questions(text, llm, parameters, subject_matter_1, subject_matter_2):
    print( "------------------- generate_questions FUNCTION -------------------" )
    
    if text is None:
        raise Exception("text is None")
        
    try:
        chain = prompt_question_generator | llm
        
        parameters["domain_knowledge"] = text
        
        questions = chain.invoke(parameters)

        print(questions)
               
        # for q in questions:
        #     q["subject_matter_1"] = subject_matter_1
        #     q["subject_matter_2"] = subject_matter_2    
        
        # data, count = supabase.table('questions').insert(questions).execute()
        
    except Exception as e:
        print("An error occurred:", e)

In [19]:
for level in [
    # "beginner", 
    # "intermediate", 
    "hard"
    ]:
    print( "level:", level )
    
    parameters = {
        "quantity": 1,
        "level": level,
        "additional_task_description": "Create questions only about the definitions of the concepts, like mixing the definition of one with another distribution, or mixing the use of one with the use of another. I need this to memorize this concepts."
    }
    
    subject_matter_1 = "Probability - LLMs"
    subject_matter_2 = subject_matter_1
    
    generate_questions(text, llm_QuestionList, parameters, subject_matter_1, subject_matter_2)

level: hard
------------------- generate_questions FUNCTION -------------------
items=[Question(topic_description='Language Models and Probability', level='hard', question='A language model that assigns a higher probability to a sentence is more likely to generate that sentence.', answer_correct='TRUE', explanation='A language model assigns probabilities to sentences based on their likelihood of occurrence. A higher probability indicates that the model is more confident in generating that sentence.'), Question(topic_description='Entropy', level='hard', question='Entropy is a measure of the amount of information in a probability distribution.', answer_correct='TRUE', explanation='Entropy is a measure of the uncertainty or randomness in a probability distribution. It quantifies the amount of information in the distribution.'), Question(topic_description='Cross-Entropy', level='hard', question='Cross-entropy is a measure of the difference between two probability distributions.', answer_co