In [23]:
import requests
from dotenv import load_dotenv
import pymongo
import sys
import os
import time
from math import ceil

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Get the path to the parent directory
parent_dir = os.path.dirname(notebook_dir)

# Add the parent directory to sys.path if it's not already there
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

import const

In [15]:
load_dotenv() 
GENERATIVE_URI = os.environ['GENERATIVE_URI']
db_client = pymongo.MongoClient(os.environ['DB_URI'])
db = db_client['db_certificates']   
metadata_collection = db['tb_cert_metadata']    #meta data of certificates

In [None]:
COMMON_QUESTION_PROMPT = 'Each question is more than 60 words in length and should focus more on complex real-world scenarios rather than definitions. Please provide a response in a structured JSON format with the key name "questions", including all explanations for each answer as a JSON object. Each explanation has more than 50 words. The response should avoid error while parsing JSON format "Error decoding JSON: Expecting property name enclosed in double quotes."'
MULTI_CHOICE_PROMPT = COMMON_QUESTION_PROMPT + 'Sample response structure should be like this: {"question": "a text", "options": { "A": "a text", "B": "a text", "C": "a text", "D": "a text"}, "answer": B, "explanation": { "A": "a text", "B": "a text", "C": "a text", "D": "a text"}, type: "multiple-choice", domain: x}.'
MULTI_SELECTION_PROMPT = COMMON_QUESTION_PROMPT + 'Sample response structure should be like this: { "question" : "a text", "options" : { "A" : "a text", "B" : "a text", "C" : "a text", "D" : "a text", "E": "a_text"},"answer" : [],"explanation" : { "A" : "a text", "B" : "a text", "C" : "a text", "D" : "a text", "E" : "a text"},"type" : "multiple-selection","domain":x}. Do not generate a question that has only 1 correct answer. The array "answer" should have 2 or 3 correct elements.'

In [9]:
def store_questions_2_db(collection, raw_questions):
    questions = const.extract_questions_from_candidates(raw_questions)
    if questions:
        #parse questions and answers
        for q in questions:
            q['exported'] = 0
            q['uuid'] = const.generate_random_uuid()
            #print(q)
            const.insert_questions(collection, q)
        print('Stored questions to db successfully')
    else:
        print("Error: No questions found in the parsed content")

In [None]:
def generate_questions(cert_symbol, cert_metadata):
    context = cert_metadata['prompt_context']
    question_collection = db[cert_metadata['collection_name']]
    #multiple choice
    if 'multi_choice_prompt_prefix' in cert_metadata:
        text_prompt = cert_metadata['multi_choice_prompt_prefix'] + MULTI_CHOICE_PROMPT
        final_prompt = context + text_prompt
        no_of_loop = ceil(cert_metadata['multi_choice_questions'] / 10)
        for i in range(no_of_loop):
            raw_generated_text = const.post_request_generative_ai(GENERATIVE_URI, final_prompt)
            store_questions_2_db(question_collection, raw_generated_text)
            time.sleep(5)   #delay 5 seconds
    #multi selection, if any
    if 'multi_selection_prompt_prefix' in cert_metadata:
        text_prompt = cert_metadata['multi_selection_prompt_prefix'] + MULTI_SELECTION_PROMPT
        final_prompt = context + text_prompt
        no_of_loop = ceil(cert_metadata['multi_selection_questions'] / 10)
        for i in range(no_of_loop):
            raw_generated_text = const.post_request_generative_ai(GENERATIVE_URI, final_prompt)
            store_questions_2_db(question_collection, raw_generated_text)
            time.sleep(5)   #delay 5 seconds
    

In [None]:
def begin_generate_questions(cert_symbol, no_of_tests):
    #query metadata of this symbol
    cert_metadata = metadata_collection.find_one({'symbol': cert_symbol})
    if cert_metadata is None:
        print('Certificate not found')
        return
    print('Begin generating questions for: ' + cert_metadata['name'])
    #
    for i in range(no_of_tests):
        generate_questions(cert_symbol, cert_metadata)
        print(cert_symbol + ' ========== Finish generating set: ' + str(i))

In [24]:
cert_symbol = 'AWS_DVA_C02' #predefined
begin_generate_questions(cert_symbol, 1)    #ideally 6 full tests

Begin generating questions of: AWS Certified Developer - Associate (DVA-C02)
Stored questions to db successfully
