In [1]:
import requests
from dotenv import load_dotenv
import pymongo
import time
import uuid
import sys
import os
import importlib
import re
import json
import csv

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Get the path to the parent directory
parent_dir = os.path.dirname(notebook_dir)

# Add the parent directory to sys.path if it's not already there
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

import const
#importlib.reload(const)    #if we update the file const.py

In [2]:
load_dotenv() 
GENERATIVE_URI = os.environ['GENERATIVE_URI']

In [3]:
db_client = pymongo.MongoClient(os.environ['DB_URI'])
db = db_client['db_certificates']
collection = db['tb_pmp']

In [4]:
TOTAL_QUESTIONS = 180
DOMAINS = [
    {
        "name": "People",
        "percent": 42,
        "num": 75,   #note: should generate 20 questions per times
        "type": {
            "multiple-choice": 69, "multiple-selection": 4, "fill-the-blank": 2
        }  #no. of question types
    },
    {
        "name": "Process",
        "percent": 50,
        "num": 90,
        "type": {
            "multiple-choice": 84, "multiple-selection": 4, "fill-the-blank": 2
        }  #no. of question types
    },
    {
        "name": "Business",
        "percent": 8,
        "num": 15,
        "type": {
            "multiple-choice": 13, "multiple-selection": 1, "fill-the-blank": 1
        }  #no. of question types
    }
]

In [5]:
# DOMAINS = [
#     {
#         "name": "People",
#         "num": 10
#     },
#     {
#         "name": "People",
#         "num": 10
#     },
#     {
#         "name": "People",
#         "num": 10
#     },
#     {
#         "name": "Process",
#         "num": 10
#     },
#     {
#         "name": "Process",
#         "num": 10
#     },
#     {
#         "name": "Process",
#         "num": 10
#     },
#     {
#         "name": "Business",
#         "num": 10
#     }
# ]

In [6]:
#Send a POST request
def post_request_generative_ai(text_prompt):
    HEADER = {'Content-Type': 'application/json'}
    json_data = {
        "contents": [
            { "parts": [
                {"text": text_prompt + " Please provide a response in a structured JSON format with the key name 'questions', including all explanations for each answer as a JSON object. Answer and explanation should have the JSON format with keys 'A', 'B', 'C', 'D' or 'E'. Each explanation has more than 50 words."}]
                #{"text": text_prompt + " Please provide a plain string response."}]
            }
        ]
    }

    try:
        r = requests.post(GENERATIVE_URI, headers=HEADER, json=json_data)
        return r.json()
    except Exception as e:
       print(e)
       return {'error': e}

In [7]:
#insert questions first
def insert_questions(json_question):
    if 'question' in json_question:
        existed_doc = collection.find_one({'question': json_question['question']})
        if existed_doc == None:
            #insert new document
            collection.insert_one(json_question)
            #print('Inserted new doc')

In [8]:
#map A - D to 1 - 4
def map_index(a_char):
    if a_char == 'A':
        return 1
    if a_char == 'B':
        return 2
    if a_char == 'C':
        return 3
    if a_char == 'D':
        return 4
    if a_char == 'E':
        return 5

In [None]:
def parse_candidate_content(data):
    """
    Parses the content from a dictionary in the 'candidates' list.
    Specifically looks for JSON content within the 'text' part
    and attempts to load it.

    Args:
        data (dict): A dictionary representing an item in the 'candidates' list.

    Returns:
        dict or str or None: If JSON content is found and successfully
                             parsed, it returns the parsed dictionary.
                             If no JSON is found, it returns the raw text.
                             Returns None if the expected structure is not found.
    """
    if not isinstance(data, dict) or 'content' not in data or not isinstance(data['content'], dict) or 'parts' not in data['content'] or not isinstance(data['content']['parts'], list):
        return None

    for part in data['content']['parts']:
        if isinstance(part, dict) and 'text' in part:
            text_content = part['text'].strip()
            # Use regex to find JSON blocks within the text
            json_match = re.search(r'```json\n(.*?)\n```', text_content, re.DOTALL)
            if json_match:
                try:
                    return json.loads(json_match.group(1))
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON: {e}")
                    return text_content  # Return the raw text in case of an error
            elif text_content:
                return text_content  # Return the raw text if no JSON block is found

    return None

def extract_questions_from_candidates(response_data):
    """
    Extracts and parses the 'questions' list from the 'candidates'
    in the given response data.

    Args:
        response_data (dict): The dictionary containing the response data.

    Returns:
        list or None: A list of question dictionaries if found and parsed,
                     otherwise None.
    """
    if not isinstance(response_data, dict) or 'candidates' not in response_data or not isinstance(response_data['candidates'], list):
        return None

    for candidate in response_data['candidates']:
        parsed_content = parse_candidate_content(candidate)
        if isinstance(parsed_content, dict) and 'questions' in parsed_content and isinstance(parsed_content['questions'], list):
            return parsed_content['questions']

    return None


In [10]:
def generate_list_of_questions():
    no_of_questions = 0
    for domain in DOMAINS:
        #domain['num'] = 15 #for testing
        #text_prompt = "The PMP exam format contains questions that are designed to assess the candidate's knowledge and application of project management principles and practices in the real world. The question types include: 'Knowledge-based questions: These questions test the candidate\'s understanding of project management concepts, models, artifacts, and methods. Scenario-based questions: These questions present real-world project management situations and require the candidate to apply their knowledge to determine the appropriate course of action. The challenge in this type of question is that the options provided may seem to be confusing, and they may give you the feeling of more than one correct option. Math-based questions: These questions involve calculations related to project management processes, such as earned value management, estimation techniques, and risk analysis. Professional responsibility questions: Questions that should be answered based on the code of conduct since you are a certified professional. Methodology covers either agile or waterfall or hybrid.'. PMP exam has 10 knowledge areas: Integration Management, Scope Management, Schedule Management, Cost Management, Quality Management, Resource Management, Communication Management, Risk Management, Procurement Management, Stakeholder Management. Generate "+str(domain['num'])+" questions with answers and explanations for the domain "+domain['name']+". There are 3 question types: multiple-choice (80% of total questions), multiple-selection (15% of the total questions), and fill-the-blank (5% of the total questions). Each question has more than 60 words in length and should focus more on complex real-world scenarios rather than definitions."
        text_prompt = "The PMP exam format contains questions that are designed to assess the candidate's knowledge and application of project management principles and practices in the real world. The question types include: 'Knowledge-based questions: These questions test the candidate\'s understanding of project management concepts, models, artifacts, and methods. Scenario-based questions: These questions present real-world project management situations and require the candidate to apply their knowledge to determine the appropriate course of action. The challenge in this type of question is that the options provided may seem to be confusing, and they may give you the feeling of more than one correct option. Math-based questions: These questions involve calculations related to project management processes, such as earned value management, estimation techniques, and risk analysis. Professional responsibility questions: Questions that should be answered based on the code of conduct since you are a certified professional. Methodology covers either agile or waterfall or hybrid.'. PMP exam has 10 knowledge areas: Integration Management, Scope Management, Schedule Management, Cost Management, Quality Management, Resource Management, Communication Management, Risk Management, Procurement Management, Stakeholder Management. Generate "+str(domain['num'])+" multiple-choice questions with answers and explanations for the domain "+domain['name']+". Each question has more than 60 words in length and should focus more on complex real-world scenarios rather than definitions."
        raw_generated_text = post_request_generative_ai(text_prompt)
        #print('raw_generated_text')
        questions = extract_questions_from_candidates(raw_generated_text)
        if questions:
            #parse questions and answers
            for q in questions:
                q['domain'] = domain['name']
                q['exported'] = 0
                q['uuid'] = const.generate_random_uuid()
                #print(q)
                insert_questions(q)
                no_of_questions += 1
        else:
            print(raw_generated_text)
            print("No questions found in the parsed content: " + domain['name'])
        #print(domain['name'] + ': ' + str(no_of_questions))
#test
#for i in range(5):
    #generate_list_of_questions()    #20 questions ~ 30 secs
    #print('===== Finish loop: ' + str(i))

In [18]:
#unify fields in all documents
def unify_fields():
    #questionType, question_type -> type
    # correctAnswer, correctAnswers -> answer
    # answers -> options
    all_docs = collection.find({'exported':0, 'type':'multiple-choice', 'explanation':None})
    for doc in all_docs:
        hasUpdated = False
        update_doc = {}
        #print(doc['uuid'])
        # if 'type' not in doc:
        #     if 'questionType' in doc:
        #         hasUpdated = True
        #         update_doc['type'] = doc['questionType']
        #     elif 'question_type' in doc:
        #         hasUpdated = True
        #         update_doc['type'] = doc['question_type']
        #     else:
        #         hasUpdated = True
        #         update_doc['type'] = 'multiple-choice'
        # if 'answer' not in doc:
        #     if 'correctAnswer' in doc:
        #         hasUpdated = True
        #         update_doc['answer'] = doc['correctAnswer']
        #     elif 'correctAnswers' in doc:
        #         hasUpdated = True
        #         update_doc['answer'] = doc['correctAnswers']
        # if 'options' not in doc:
        #     if 'answers' in doc:
        #         hasUpdated = True
        #         update_doc['options'] = doc['answers']

        #if hasUpdated:
        #print('a')
        #collection.update_one({'uuid': doc['uuid']}, {'$set': {'explanation': doc['explanations']}})
    #update 2
    # all_docs = collection.find({'filename': 'aws_pmp_test_6_20250506.csv'})
    # for doc in all_docs:
    #     collection.update_one({'uuid': doc['uuid']}, {'$set': {'exported': 0, 'filename': ''}})

#test
#unify_fields()

In [None]:
#export to CSV file, each file has 180 questions (80% (144) multi choice, 15% (27) multi select, 5% (9) fill the blank)
#People (75), Process (90), Business (15)
#Question,Question Type,Answer Option 1,Explanation 1,Answer Option 2,Explanation 2,Answer Option 3,Explanation 3,Answer Option 4,Explanation 4,Answer Option 5,Explanation 5,Answer Option 6,Explanation 6,Correct Answers,Overall Explanation,Domain
def export_csv(filename):
    #get questions that not exported yet. Note that: each part must follow by domain percents
    file_data = []
    #append header line (both multi-choice and multi-selection)
    file_data.append(['Question','Question Type','Answer Option 1','Explanation 1','Answer Option 2','Explanation 2','Answer Option 3','Explanation 3','Answer Option 4','Explanation 4','Answer Option 5','Explanation 5','Answer Option 6','Explanation 6','Correct Answers','Overall Explanation','Domain'])
    exported_uuid = []
    manual_uuid = []
    for domain in DOMAINS:
        #get random questions for each domain
        for question_type in domain['type']:
            pipeline = [
                {"$match": {'exported': 0, 'domain': domain['name'], 'type': question_type}},
                {"$sample": {"size": domain['type'][question_type]}}
            ]
            #print(pipeline)
            random_documents = list(collection.aggregate(pipeline))
            #print(random_documents)
            for doc in random_documents:
                optionE = ''
                explanationE = ''
                if 'options' in doc and 'E' in doc['options']:
                    optionE = doc['options']['E']
                    explanationE = doc['explanation']['E'].replace('  ', ' ').replace('\n', '')
                if (question_type == 'multiple-choice'):
                    file_data.append([doc['question'].replace('  ', ' ').replace('\n', ''), 'multiple-choice', 
                                  doc['options']['A'], doc['explanation']['A'].replace('  ', ' ').replace('\n', ''),     #A
                                  doc['options']['B'], doc['explanation']['B'].replace('  ', ' ').replace('\n', ''),     #B
                                  doc['options']['C'], doc['explanation']['C'].replace('  ', ' ').replace('\n', ''),     #C
                                  doc['options']['D'], doc['explanation']['D'].replace('  ', ' ').replace('\n', ''),     #D
                                  optionE, explanationE,   #E
                                  '', '',   #6
                                  map_index(doc['answer']), #correct answer
                                  '', #overall
                                  doc['domain'] #domain
                                  ])
                elif question_type == 'multiple-selection' or question_type == 'fill-the-blank':
                    manual_uuid.append(doc['uuid']) #they do not suppor bulk upload this type of question, we need to manually add them
                exported_uuid.append(doc['uuid'])
    #update this doc is exported
    print('","'.join(manual_uuid))
    #save all questions to csv
    try:
        with open(filename, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(file_data)
            print(f"Data successfully saved to '{filename}'")
            for _id in exported_uuid:
                collection.update_one({'uuid': _id}, {'$set': {'exported': 1, 'filename': filename}})
    except Exception as e:
        print(f"An error occurred while saving the array: {e}")
    
#test|
export_csv('aws_pmp_test_6_20250506.csv')

5e91dc97-f762-413f-b8cc-b2344bcc9046","9315abe1-0a4d-49a9-8299-25f821e40485","92fb5d6e-257d-4e9c-8926-0b3091d25764","688665b4-6c58-476c-a810-897132f95704","8b928b71-6119-47fa-afd6-7456419c46de","732fddf6-cfb4-4bb2-81ac-86290f67fb0e","fc42a43e-cc2c-43c8-86f9-2dd137078eff","4fe90ed4-a211-47d3-b05a-117f3389260b","3ed9aa78-a77e-4374-9ebe-77ddec1f70e9","6a56b8cb-043e-425a-835d-61921a35be29","42b28c97-8cbd-4015-a245-7d6d803296c7","221521cf-1ebb-45e3-bd24-4c91edd4e636","a7ad30a8-0ad4-41ce-9cac-e19cbe5a942c","a1d70693-46fe-44a3-9aad-367f58f4a009
Data successfully saved to 'aws_pmp_test_6_20250506.csv'
