In [4]:
# vector database
from langchain_google_genai import GoogleGenerativeAIEmbeddings
# from langchain_community.vectorstores import Qdrant

# ingestion
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter

# from llama_index.core import SimpleDirectoryReader

# chat
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.messages.base import BaseMessage

from langchain.prompts import FewShotPromptTemplate, PromptTemplate

from langchain_core.pydantic_v1 import BaseModel, Field

from pydantic import BaseModel
from typing import List, Optional
from langchain.output_parsers import PydanticOutputParser
import json
import re

# system
import os
import logging
import sys

import nest_asyncio

nest_asyncio.apply()

logging.basicConfig(level=logging.DEBUG,  # Define o nível de log
                    format='%(asctime)s - %(levelname)s - %(message)s',  # Define o formato da mensagem de log
                    stream=sys.stdout)  # Define a saída do log para stdout
                    # filename='app.log',  # Define o arquivo onde os logs serão gravados
                    # filemode='a')  # Define o modo de escrita do arquivo de log (append)

In [5]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

True

In [6]:
logging.info('Inicializando LLM e embedings')
llm_google = ChatGoogleGenerativeAI(model="gemini-1.5-flash", convert_system_message_to_human=True)
# embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key_google)

2024-08-02 11:13:45,539 - INFO - Inicializando LLM e embedings


In [7]:
# from langchain_openai import ChatOpenAI
# from langchain_openai import ChatOpenAI

# llm_openai = ChatOpenAI(openai_api_key="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")

In [8]:
object_schema = """
{
  "properties": {
    "topic_description": {
      "type": "string",
      "description": "A sentence describing the sub-topic to which the question belongs. That means this sentence should specify in a granular level what specific sub-topic the question belongs to. It should be abstract in a way that other questions could be put in this description too. Use between 5 and 10 words."
    },
    "level": {
      "type": "string",
      "description": "The difficulty level of the question. It should be only one of the following options: 'beginner', 'intermediate', 'advanced'."
    },
    "question": {
      "type": "string",
      "description": "The actual question text. It should be a question of type TRUE or FALSE. It means that the questions should be an assertion that could be answered with TRUE or FALSE."
    },
    "answer": {
      "type": "string",
      "description": "The correct answer to the question. It should be only one of the following options: TRUE or FALSE"
    },
    "explanation": {
      "type": "string",
      "description": "An explanation or solution to the question."
    }
  },
  "required": ["topic_description", "level", "question", "answer", "explanation"]
}
"""

In [9]:
prompt_question_generator = PromptTemplate(
    template="""
                TASK CONTEXT:
                I am studying machine learning and I need to practice some questions on various topics.
                
                TASK DESCRIPTION:
                I will provide you with a list of topics, and I would like you to generate a list of TRUE or FALSE questions.
                These questions should be interesting, creative, challenging and thought-provoking. 
                Each question should be in the form of a statement that could be either TRUE or FALSE.
                Feel free to be imaginative and attempt to confuse the student by blending related concepts or similar words.
                I will provide the topics in the DOMAIN KNOWLEDGE section.
                The questions should pertain to these topics, and you can use this knowledge as a foundation to create questions that delve deeper into the subject matter.
                
                ADDITIONAL TASK DESCRIPTION:
                {additional_task_description}
                
                TASK REQUIREMENTS:
                Please refrain from creating questions that require mathematical calculations, but you may create questions with mathematical formulas.
                You SHOULD use LATEX to write mathematical formulas and code, but you should use the Katex flavor.
                Also you should put $$ in the beggining of the katex code and $$ at the end of the code. This is necessary because the interpreter needs it.
                
                TASK DETAILS:
                You should create {quantity} questions of level {level}.
                
                DOMAIN KNOWLEDGE:
                {domain_knowledge}
                
                FORMAT OUTPUT INSTRUCTIONS:
                The output should be formatted as a JSON list of objects that conforms class object schema below.
                You should output just the Json list. 
                You should not output any other word like "json" in the beginning because it will ruin the parser.

                ```
                {object_schema}
                ```
            """,
    input_variables=["quantity", "level", "additional_task_description"],
    partial_variables={"object_schema": object_schema},
)

In [10]:
from supabase import create_client, Client

In [11]:
import os

url: str = os.environ.get("SUPABASE_URL")
key: str = os.environ.get("SUPABASE_KEY")
supabase: Client = create_client(url, key)

2024-08-02 11:13:50,356 - DEBUG - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-08-02 11:13:50,357 - DEBUG - load_verify_locations cafile='/workspaces/llm-quiz-creator-streamlitapp-trainer/.venv/lib/python3.10/site-packages/certifi/cacert.pem'


In [12]:
from langchain_core.messages import AIMessage, HumanMessage
from typing import List, Optional
import  json_repair

def json_parser(message: AIMessage) -> List[dict]:
    return json_repair.loads(message.content)

In [13]:
def generate_questions(text, llm, parameters, subject_matter_1, subject_matter_2):
    print( "------------------- generate_questions FUNCTION -------------------" )
    
    if text is None:
        raise Exception("text is None")
        
    try:
        chain = prompt_question_generator | llm
        
        parameters["domain_knowledge"] = text
        
        response = chain.invoke(parameters)
        
        questions = json_parser(response)
        
        for q in questions:
            q["subject_matter_1"] = subject_matter_1
            q["subject_matter_2"] = subject_matter_2    
        
        data, count = supabase.table('questions').insert(questions).execute()
        
    except Exception as e:
        print("An error occurred:", e)

In [14]:
# logging.disable(logging.DEBUG)

# # Re-enable debug logs
# # logging.disable(logging.NOTSET)

In [19]:
text = """
1.1. Eigenvalues and Eigenvectors
1.1.1. Introduction
1.1.1.1. Definition of Eigenvalues
1.1.1.2. Definition of Eigenvectors
1.1.1.3. Importance and Applications in Machine Learning and Data Science
1.1.2. Mathematical Foundation
1.1.2.1. Linear Transformations
1.1.2.1.1. Definition of Linear Transformations
1.1.2.1.2. Properties of Linear Transformations
1.1.2.2. Matrix Representation
1.1.2.2.1. Matrices as Linear Transformations
1.1.2.2.2. Diagonalization
1.1.2.3. Characteristic Polynomial
1.1.2.3.1. Definition and Calculation
1.1.2.3.2. Properties of Characteristic Polynomials
1.1.2.3.3. Example of Characteristic Polynomial Calculation
1.1.3. Eigenvalues
1.1.3.1. Definition and Notation
1.1.3.2. Calculation of Eigenvalues
1.1.3.2.1. Finding the Determinant
1.1.3.2.2. Solving the Characteristic Equation
1.1.3.3. Properties of Eigenvalues
1.1.3.3.1. Sum and Product of Eigenvalues
1.1.3.3.2. Eigenvalues of Diagonal Matrices
1.1.3.3.3. Eigenvalues of Special Matrices (e.g., symmetric, orthogonal)
1.1.4. Eigenvectors
1.1.4.1. Definition and Notation
1.1.4.2. Calculation of Eigenvectors
1.1.4.2.2. Normalization of Eigenvectors
1.1.4.3. Properties of Eigenvectors
1.1.4.3.1. Linearly Independent Eigenvectors
1.1.4.3.2. Basis and Span of Eigenvectors
1.1.5. Diagonalization
1.1.5.1. Definition and Conditions
1.1.5.1.1. Diagonalizable Matrices
1.1.5.1.2. Conditions for Diagonalization
1.1.5.2. Process of Diagonalization
1.1.5.2.1. Finding Eigenvalues and Eigenvectors
1.1.5.3. Applications of Diagonalization
1.1.6. Spectral Theorem
1.1.6.1. Introduction to the Spectral Theorem
1.1.6.2. Spectral Theorem for Symmetric Matrices
1.1.6.2.1. Statement of the Theorem
1.1.6.2.2. Proof Outline
1.1.6.3. Applications of the Spectral Theorem
1.1.6.3.1. Principal Component Analysis (PCA)
1.1.6.3.2. Quadratic Forms
1.1.7. Eigenvalue Decomposition
1.1.7.1. Definition and Purpose
1.1.7.2. Process of Eigenvalue Decomposition
1.1.7.2.1. Decomposing a Matrix
1.1.7.2.2. Reconstructing the Original Matrix
1.1.7.3. Applications in Machine Learning
1.1.7.3.1. Data Compression
1.1.7.3.2. Feature Extraction
1.1.8. Singular Value Decomposition (SVD)
1.1.8.1. Introduction to SVD
1.1.8.2. Mathematical Foundation
1.1.8.2.1. Definition and Calculation
1.1.8.2.2. Relation to Eigenvalues and Eigenvectors
1.1.8.3. Applications of SVD
1.1.8.3.1. Image Compression
1.1.8.3.2. Latent Semantic Analysis in Natural Language Processing
1.1.9. Numerical Methods for Eigenvalues and Eigenvectors
1.1.9.1. Power Iteration
1.1.9.1.1. Algorithm and Convergence
1.1.9.1.2. Applications
1.1.9.2. QR Algorithm
1.1.9.2.1. Overview and Steps
1.1.9.2.2. Convergence and Efficiency
1.1.10. Practical Considerations
1.1.10.1. Computational Complexity
1.1.10.2. Stability and Sensitivity
1.1.10.3. Software and Libraries for Computation
1.1.10.3.1. MATLAB
1.1.10.3.2. NumPy and SciPy in Python

"""

In [20]:
for level in [
    "beginner", 
    "intermediate", 
    "hard"
    ]:
    print( "level:", level )
    
    parameters = {
        "quantity": 5,
        "level": level,
        "additional_task_description": "Create questions only about the definitions of the concepts, like mixing the definition of one with another distribution, or mixing the use of one with the use of another. I need this to memorize this concepts."
    }
    
    subject_matter_1 = "Linear Algebra - Basics"
    subject_matter_2 = subject_matter_1
    
    generate_questions(text, llm_google, parameters, subject_matter_1, subject_matter_2)

level: beginner
------------------- generate_questions FUNCTION -------------------
2024-08-02 11:35:04,389 - DEBUG - close.started
2024-08-02 11:35:04,390 - DEBUG - close.complete
2024-08-02 11:35:04,391 - DEBUG - connect_tcp.started host='xoxlgvakygiyfijfeixu.supabase.co' port=443 local_address=None timeout=120 socket_options=None
2024-08-02 11:35:04,418 - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x7f87bbea6290>
2024-08-02 11:35:04,419 - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x7f87bc02a340> server_hostname='xoxlgvakygiyfijfeixu.supabase.co' timeout=120
2024-08-02 11:35:04,433 - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x7f87bbe86980>
2024-08-02 11:35:04,434 - DEBUG - send_connection_init.started request=<Request [b'POST']>
2024-08-02 11:35:04,435 - DEBUG - send_connection_init.complete
2024-08-02 11:35:04,436 - DEBUG - send_request_headers.started request=<Request [b'