In [1]:
import requests
from dotenv import load_dotenv
import pymongo
import sys
import os
import importlib
import csv

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Get the path to the parent directory
parent_dir = os.path.dirname(notebook_dir)

# Add the parent directory to sys.path if it's not already there
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

import const
#importlib.reload(const)    #if we update the file const.py

In [2]:
load_dotenv() 
GENERATIVE_URI = os.environ['GENERATIVE_URI']

In [3]:
db_client = pymongo.MongoClient(os.environ['DB_URI'])
db = db_client['db_certificates']
collection = db['tb_aws_cloud_practitioner']

FOLDER = './aws_cloud_practitioner/'

The exam has the following content domains and weightings:
• Domain 1: Cloud Concepts (24% of scored content)
• Domain 2: Security and Compliance (30% of scored content)
• Domain 3: Cloud Technology and Services (34% of scored content)
• Domain 4: Billing, Pricing, and Support (12% of scored content) 

In [None]:
#60 questions (60 multi choice, 5 multi select)
def generate_questions():
    context = 'The AWS Cloud Practitioner Exam has the following content domains: Domain 1 is Cloud Concepts, that includes: Define the benefits of the AWS Cloud; Identify design principles of the AWS Cloud; Understand the benefits of and strategies for migration to the AWS Cloud; Understand concepts of cloud economics. Domain 2 is Security and Compliance, that includes: Understand the AWS shared responsibility model; Understand AWS Cloud security, governance, and compliance concepts; Identify AWS access management capabilities; Identify components and resources for security. Domain 3 is Cloud Technology and Services, that includes: Define methods of deploying and operating in the AWS Cloud; Define the AWS global infrastructure; Identify AWS compute services; Identify AWS database services; Identify AWS network services; Identify AWS storage services; Identify AWS artificial intelligence and machine learning (AI/ML) services and analytics services; Identify services from other in-scope AWS service categories. Domain 4 is Billing, Pricing, and Support, that includes: Compare AWS pricing models; Understand resources for billing, budget, and cost management; Identify AWS technical resources and AWS Support options.'
    #multiple choice
    #text_prompt = 'Generate 10 high-quality multiple-choice questions (2 questions in domain 1, 3 questions in domain 2, 3 questions in domain 3, 2 questions in domain 4) with answers and explanations for the AWS Certified Cloud Practitioner examination. Each question has more than 60 words in length and should focus more on complex real-world scenarios rather than definitions. Please provide a response in a structured JSON format with the key name "questions", including all explanations for each answer as a JSON object. Each explanation has more than 50 words. Sample response structure should like this: { "question" : "xxx", "options" : { "A" : "a text", "B" : "a text", "C" : "a text", "D" : "a text"},"answer" : "B","explanation" : { "A" : "a text", "B" : "a text", "C" : "a text", "D" : "a text"},"type" : "multiple-choice","domain":x}. Response should avoid error while parsing JSON format "Error decoding JSON: Expecting property name enclosed in double quotes".'
    #multi selection
    text_prompt = 'Generate 10 high-quality multiple-selection questions (2 questions in domain 1, 3 questions in domain 2, 3 questions in domain 3, 2 questions in domain 4) with answers and explanations for the AWS Certified Cloud Practitioner examination. Each question has more than 60 words in length and should focus more on complex real-world scenarios rather than definitions. Please provide a response in a structured JSON format with the key name "questions", including all explanations for each answer as a JSON object. Each explanation has more than 50 words. Sample response structure should like this: { "question" : "xxx", "options" : { "A" : "a text", "B" : "a text", "C" : "a text", "D" : "a text", "E": "a_text"},"answer" : [],"explanation" : { "A" : "a text", "B" : "a text", "C" : "a text", "D" : "a text", "E" : "a text"},"type" : "multiple-selection","domain":x}. Do not generate question that has only 1 answer. "answer" should have 2 to 4 correct elements. Response should avoid error while parsing JSON format "Error decoding JSON: Expecting property name enclosed in double quotes".'
    #
    raw_generated_text = const.post_request_generative_ai(GENERATIVE_URI, context + text_prompt)

    #print(raw_generated_text)
    questions = const.extract_questions_from_candidates(raw_generated_text)
    if questions:
        #parse questions and answers
        for q in questions:
            q['exported'] = 0
            q['uuid'] = const.generate_random_uuid()
            #print(q)
            const.insert_questions(collection, q)
    else:
        print(raw_generated_text)
        print("No questions found in the parsed content")
#test
for i in range(1):
    #generate_questions() #10 sentences 30 secs
    print('===== Finish loop: ' + str(i))

#https://www.udemy.com/course/aws-certified-cloud-practitioner-practice-tests-clf-c02/?couponCode=CP130525

===== Finish loop: 0
===== Finish loop: 1
===== Finish loop: 2
===== Finish loop: 3
===== Finish loop: 4
===== Finish loop: 5
===== Finish loop: 6
===== Finish loop: 7
===== Finish loop: 8
===== Finish loop: 9
===== Finish loop: 10
===== Finish loop: 11
===== Finish loop: 12
===== Finish loop: 13
===== Finish loop: 14


In [7]:
#Question,Question Type,Answer Option 1,Explanation 1,Answer Option 2,Explanation 2,Answer Option 3,Explanation 3,Answer Option 4,Explanation 4,Answer Option 5,Explanation 5,Answer Option 6,Explanation 6,Correct Answers,Overall Explanation,Domain
def export_csv(path, filename):
    #get questions that not exported yet. Note that: each part must follow by domain percents
    file_data = []
    #append header line (both multi-choice and multi-selection)
    file_data.append(['Question','Question Type','Answer Option 1','Explanation 1','Answer Option 2','Explanation 2','Answer Option 3','Explanation 3','Answer Option 4','Explanation 4','Answer Option 5','Explanation 5','Answer Option 6','Explanation 6','Correct Answers','Overall Explanation','Domain'])
    exported_uuid = []
    manual_uuid = []
    #1. export multiple-choice first
    pipeline = [
                {"$match": {'exported': 0, 'type': 'multiple-choice'}},
                {"$sample": {"size": 60}}
            ]
    random_documents = list(collection.aggregate(pipeline))
    for doc in random_documents:
        file_data.append([doc['question'].replace('  ', ' ').replace('\n', ''), 'multiple-choice', 
                                  doc['options']['A'], doc['explanation']['A'].replace('  ', ' ').replace('\n', ''),     #A
                                  doc['options']['B'], doc['explanation']['B'].replace('  ', ' ').replace('\n', ''),     #B
                                  doc['options']['C'], doc['explanation']['C'].replace('  ', ' ').replace('\n', ''),     #C
                                  doc['options']['D'], doc['explanation']['D'].replace('  ', ' ').replace('\n', ''),     #D
                                  '', '',   #E
                                  '', '',   #6
                                  const.map_index(doc['answer']), #correct answer
                                  '', #overall
                                  '' #domain
                                  ])
        exported_uuid.append(doc['uuid'])
    #2. multi selection
    pipeline = [
                {"$match": {'exported': 0, 'type': 'multiple-selection'}},
                {"$sample": {"size": 5}}
            ]
    random_documents = list(collection.aggregate(pipeline))
    for doc in random_documents:
        exported_uuid.append(doc['uuid'])
        manual_uuid.append(doc['uuid']) #they do not suppor bulk upload this type of question, we need to manually add them
    #
    print('","'.join(manual_uuid))
    #save all questions to csv
    try:
        with open(path + filename, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(file_data)
            print(f"Data successfully saved to '{filename}'")
            for _id in exported_uuid:
                collection.update_one({'uuid': _id}, {'$set': {'exported': 1, 'filename': filename}})
    except Exception as e:
        print(f"An error occurred while saving the array: {e}")
    
#test|
export_csv(FOLDER, 'aws_cloud_practitioner_6_20250516.csv')

1dd94ea6-4e7f-45ed-a786-edc8e41eb5aa","5d3d1153-cfbc-407c-8ffb-7d724a4d0aa3","33799af0-b95e-4219-98d7-15010f3d79e0","10162b92-db02-469a-8ffa-f3c34da9eb6d","aadb4ee5-8683-4867-be9b-60f35f1c8d14
Data successfully saved to 'aws_cloud_practitioner_6_20250516.csv'


In [18]:
def unify_fields():
    all_docs = collection.find({"filename" : "aws_cloud_practitioner_2_20250516.csv"})
    for doc in all_docs:
        collection.update_one({'uuid': doc['uuid']}, {'$set': {'exported': 0, 'filename': ''}})

#test
unify_fields()