In [11]:
# vector database
from langchain_google_genai import GoogleGenerativeAIEmbeddings
# from langchain_community.vectorstores import Qdrant

# ingestion
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter

# from llama_index.core import SimpleDirectoryReader

# chat
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.messages.base import BaseMessage

from langchain.prompts import FewShotPromptTemplate, PromptTemplate

from langchain_core.pydantic_v1 import BaseModel, Field

from pydantic import BaseModel
from typing import List, Optional
from langchain.output_parsers import PydanticOutputParser
import json
import re

# system
import os
import logging
import sys

import nest_asyncio

nest_asyncio.apply()

logging.basicConfig(level=logging.DEBUG,  # Define o nível de log
                    format='%(asctime)s - %(levelname)s - %(message)s',  # Define o formato da mensagem de log
                    stream=sys.stdout)  # Define a saída do log para stdout
                    # filename='app.log',  # Define o arquivo onde os logs serão gravados
                    # filemode='a')  # Define o modo de escrita do arquivo de log (append)

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

True

In [13]:
logging.info('Inicializando LLM e embedings')
llm_google = ChatGoogleGenerativeAI(model="gemini-1.5-flash", convert_system_message_to_human=True)
# embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key_google)

2024-07-29 11:42:58,936 - INFO - Inicializando LLM e embedings


In [14]:
# from langchain_openai import ChatOpenAI
# from langchain_openai import ChatOpenAI

# llm_openai = ChatOpenAI(openai_api_key="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")

In [15]:
object_schema = """
{
  "properties": {
    "topic_description": {
      "type": "string",
      "description": "A sentence describing the sub-topic to which the question belongs. That means this sentence should specify in a granular level what specific sub-topic the question belongs to. It should be abstract in a way that other questions could be put in this description too. Use between 5 and 10 words."
    },
    "level": {
      "type": "string",
      "description": "The difficulty level of the question. It should be only one of the following options: 'beginner', 'intermediate', 'advanced'."
    },
    "question": {
      "type": "string",
      "description": "The actual question text. It should be a question of type TRUE or FALSE. It means that the questions should be an assertion that could be answered with TRUE or FALSE."
    },
    "answer": {
      "type": "string",
      "description": "The correct answer to the question. It should be only one of the following options: TRUE or FALSE"
    },
    "explanation": {
      "type": "string",
      "description": "An explanation or solution to the question."
    }
  },
  "required": ["topic_description", "level", "question", "answer", "explanation"]
}
"""

In [16]:
prompt_question_generator = PromptTemplate(
    template="""
                TASK CONTEXT:
                I am studying machine learning and I need to practice some questions on various topics.
                
                TASK DESCRIPTION:
                I will provide you with a list of topics, and I would like you to generate a list of TRUE or FALSE questions.
                These questions should be interesting, creative, challenging and thought-provoking. 
                Each question should be in the form of a statement that could be either TRUE or FALSE.
                Feel free to be imaginative and attempt to confuse the student by blending related concepts or similar words.
                I will provide the topics in the DOMAIN KNOWLEDGE section.
                The questions should pertain to these topics, and you can use this knowledge as a foundation to create questions that delve deeper into the subject matter.
                
                ADDITIONAL TASK DESCRIPTION:
                {additional_task_description}
                
                TASK REQUIREMENTS:
                Please refrain from creating questions that require mathematical calculations, but you may create questions with mathematical formulas.
                You SHOULD use LATEX to write mathematical formulas and code, but you should use the Katex flavor.
                Also you should put $$ in the beggining of the katex code and $$ at the end of the code. This is necessary because the interpreter needs it.
                
                TASK DETAILS:
                You should create {quantity} questions of level {level}.
                
                DOMAIN KNOWLEDGE:
                {domain_knowledge}
                
                FORMAT OUTPUT INSTRUCTIONS:
                The output should be formatted as a JSON list of objects that conforms class object schema below.
                You should output just the Json list. 
                You should not output any other word like "json" in the beginning because it will ruin the parser.

                ```
                {object_schema}
                ```
            """,
    input_variables=["quantity", "level", "additional_task_description"],
    partial_variables={"object_schema": object_schema},
)

In [17]:
from supabase import create_client, Client

In [18]:
import os

url: str = os.environ.get("SUPABASE_URL")
key: str = os.environ.get("SUPABASE_KEY")
supabase: Client = create_client(url, key)

2024-07-29 11:42:59,172 - DEBUG - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-29 11:42:59,174 - DEBUG - load_verify_locations cafile='/workspaces/llm-quiz-creator-streamlitapp-trainer/.venv/lib/python3.10/site-packages/certifi/cacert.pem'


In [19]:
import  json_repair

def json_parser(message: AIMessage) -> List[dict]:
    return json_repair.loads(message.content)

In [20]:
def generate_questions(text, llm, parameters, subject_matter_1, subject_matter_2):
    print( "------------------- generate_questions FUNCTION -------------------" )
    
    if text is None:
        raise Exception("text is None")
        
    try:
        chain = prompt_question_generator | llm
        
        parameters["domain_knowledge"] = text
        
        response = chain.invoke(parameters)
        
        questions = json_parser(response)
        
        for q in questions:
            q["subject_matter_1"] = subject_matter_1
            q["subject_matter_2"] = subject_matter_2    
        
        data, count = supabase.table('questions').insert(questions).execute()
        
    except Exception as e:
        print("An error occurred:", e)

In [21]:
logging.disable(logging.DEBUG)

# Re-enable debug logs
# logging.disable(logging.NOTSET)

In [22]:
text = """
Machine Learning Metrics
1. Classification Metrics
Accuracy:
Definition and calculation
Pros and cons: sensitivity to class imbalance
Use cases: overall performance evaluation
Precision:
Definition and calculation
Meaning: ability to correctly identify positive cases
Use cases: evaluating models with high false positive costs
Recall:
Definition and calculation
Meaning: ability to correctly identify all positive cases
Use cases: evaluating models with high false negative costs
F1-Score:
Definition and calculation as harmonic mean of precision and recall
Use cases: balancing precision and recall
Specificity:
Definition and calculation
Meaning: ability to correctly identify negative cases
Use cases: evaluating models with high false negative costs
ROC Curve and AUC:
Definition and interpretation of ROC curve
Calculation of Area Under the Curve (AUC)
Use cases: comparing models with different thresholds
Confusion Matrix:
Definition and interpretation of matrix elements
Visualization of classification performance
Use cases: detailed analysis of model errors
2. Regression Metrics
Mean Absolute Error (MAE):
Definition and calculation
Meaning: average absolute difference between predicted and actual values
Use cases: sensitive to outliers
Mean Squared Error (MSE):
Definition and calculation
Meaning: average squared difference between predicted and actual values
Use cases: penalizes large errors more heavily
Root Mean Squared Error (RMSE):
Definition and calculation as square root of MSE
Meaning: provides error in the same units as the target variable
Use cases: easy to interpret and compare to other models
R-squared:
Definition and calculation as proportion of variance explained
Meaning: indicates how well the model fits the data
Use cases: evaluating model fit for continuous data
Adjusted R-squared:
Definition and calculation, accounting for number of predictors
Meaning: adjusts R-squared for model complexity
Use cases: comparing models with different numbers of features
3. Model Evaluation and Selection
Cross-validation:
K-fold cross-validation
Leave-one-out cross-validation
Use cases: evaluating model performance on unseen data
Hyperparameter Tuning:
Grid search
Random search
Use cases: optimizing model performance by adjusting hyperparameters
Model Comparison:
Statistical significance tests: t-test, F-test
Comparing models based on chosen metrics
Use cases: choosing the best model for a specific task
Bias-Variance Trade-off:
Understanding the trade-off between model bias and variance
Impact on model performance
Use cases: choosing a model that balances complexity and accuracy
4. Advanced Metrics
Log Loss:
Definition and calculation for probabilistic models
Meaning: measures model confidence in its predictions
Use cases: evaluating models with multiple classes
KL Divergence:
Definition and calculation for comparing probability distributions
Meaning: measures dissimilarity between two distributions
Use cases: evaluating model calibration
Jaccard Index:
Definition and calculation for comparing sets
Meaning: measures overlap between predicted and actual sets
Use cases: evaluating models for object detection and segmentation
Ranking Metrics:
Mean Reciprocal Rank (MRR)
Normalized Discounted Cumulative Gain (NDCG)
Use cases: evaluating models for ranking and recommendation tasks
5. Application-Specific Metrics
Sentiment Analysis:
Accuracy, precision, recall for sentiment classification
Sentiment polarity scores for fine-grained analysis
Natural Language Processing:
BLEU, ROUGE for machine translation and text summarization
Perplexity for language models
Computer Vision:
Intersection over Union (IoU) for object detection and segmentation
Mean Average Precision (mAP) for multi-object detection
Recommender Systems:
Precision, recall, F1-score for recommendation accuracy
Rank correlation metrics for evaluating ranking quality
Time Series Forecasting:
Root Mean Squared Error (RMSE), Mean Absolute Error (MAE)
Mean Absolute Percentage Error (MAPE) for relative error evaluation
Fraud Detection:
Area Under the ROC Curve (AUC) for imbalanced datasets
False Positive Rate (FPR) and False Negative Rate (FNR) for cost-sensitive evaluation
"""

In [23]:
for level in [
    "beginner", 
    "intermediate", 
    "hard"
    ]:
    print( "level:", level )
    
    parameters = {
        "quantity": 10,
        "level": level,
        "additional_task_description": "Create questions only about the definitions of the concepts, like mixing the definition of one with another distribution, or mixing the use of one with the use of another. I need this to memorize this concepts."
    }
    
    subject_matter_1 = "Machine Learning"
    subject_matter_2 = "Machine Learning  - Metrics"
    
    generate_questions(text, llm_google, parameters, subject_matter_1, subject_matter_2)

level: beginner
------------------- generate_questions FUNCTION -------------------


I0000 00:00:1722253379.278750    6767 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache
I0000 00:00:1722253379.338672    6767 check_gcp_environment.cc:61] BIOS data file does not exist or cannot be opened.


2024-07-29 11:43:10,157 - INFO - HTTP Request: POST https://xoxlgvakygiyfijfeixu.supabase.co/rest/v1/questions?columns=%22explanation%22%2C%22question%22%2C%22level%22%2C%22answer%22%2C%22topic_description%22%2C%22subject_matter_2%22%2C%22subject_matter_1%22 "HTTP/2 201 Created"
level: intermediate
------------------- generate_questions FUNCTION -------------------
2024-07-29 11:43:19,463 - INFO - HTTP Request: POST https://xoxlgvakygiyfijfeixu.supabase.co/rest/v1/questions?columns=%22explanation%22%2C%22question%22%2C%22level%22%2C%22answer%22%2C%22topic_description%22%2C%22subject_matter_2%22%2C%22subject_matter_1%22 "HTTP/2 201 Created"
level: hard
------------------- generate_questions FUNCTION -------------------
An error occurred: index: 0
finish_reason: RECITATION

