In [2]:
import pymongo
import os
from dotenv import load_dotenv
import exam_settings
import importlib
from xhtml2pdf import pisa
import sys
#import cv2
# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Get the path to the parent directory
parent_dir = os.path.dirname(notebook_dir)

# Add the parent directory to sys.path if it's not already there
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

#importlib.reload(exam_settings)

import const
#importlib.reload(const)

import linkedin_utils
#importlib.reload(linkedin_utils)

In [3]:
load_dotenv() 

db_client = pymongo.MongoClient(os.environ['DB_URI'])
db = db_client['db_certificates']


In [4]:
#only export PDF file
def generate_pdf(cert_name, landing_url, page_num, question_per_page, questions, filename):
    html_question_list = []
    question_index = 1
    #1. Question pages
    for page_idx in range(page_num):
        #begin page
        html_question_list.append('<div class="container">')
        for question_idx in range(question_per_page):
            str_index = str(question_index) + ') '
            current_doc = questions[question_index-1]
            html_question = '<div class="question">'+str_index + current_doc['question']+'</div>'
            html_answers_start = '<div class="answers">'
            html_answers_end = '</div>'
            for key in current_doc['options'].keys():
                html_answers_start += f'''
                    <div class="answer">
                        <label>{key}. {current_doc['options'][key]}</label>
                    </div>'''
            html_question_list.append(html_question + html_answers_start + html_answers_end)    #1 question & options
            question_index += 1
        #end 1 page
        html_question_list.append(f'<div class="checkout">{cert_name}<br/>Check out more questions <a href={landing_url}">{landing_url}</a></div>')
        html_question_list.append('</div><pdf:nextpage />') #close tag of 1 page
    #2. Answers
    question_index = 1
    for page_idx in range(page_num):
        #begin page
        html_question_list.append('<div class="container">')
        for question_idx in range(question_per_page):
            str_index = str(question_index) + ') '
            current_doc = questions[question_index-1]
            html_question = '<div class="question">'+str_index + current_doc['question']+'</div>'
            html_answers_start = '<div class="answers">'
            html_answers_end = '</div>'
            for key in current_doc['options'].keys():
                correct_class = ''
                if key == current_doc['answer']:
                    correct_class = 'correct'
                html_answers_start += f'''
                    <div class="explanation show">
                        <label class="{correct_class}">{key}. {current_doc['explanation'][key]}</label>
                    </div>'''
            html_question_list.append(html_question + html_answers_start + html_answers_end)    #1 question & options
            question_index += 1
        #end 1 page
        #html_question_list.append(f'<div class="checkout">{cert_name}<br/>Check out more questions <a href={landing_url}">{landing_url}</a></div>')
        html_question_list.append('</div>')
    #combine into a string
    html_question_list_str = ('').join(html_question_list)
    #export to pdf
    with open(filename, "w+b") as result_file:
        # convert HTML to PDF
        pisa_status = pisa.CreatePDF(
                exam_settings.html_pdf_head_str + html_question_list_str + exam_settings.html_pdf_tail_str,       # page data
                dest=result_file  # destination file
            )
        # Check for errors
        if pisa_status.err:
            print(f"An error occurred! {pisa_status.err}")
            return {'error': pisa_status.err}
        return {}


In [5]:
def upload_pdf_2_channel(channel_type, cert_metadata, filepath, filename):
    if channel_type == 'LinkedIn':
        result_share = linkedin_utils.share_pdf_2_LI(filepath, filename, cert_metadata)
        print(result_share)

In [6]:
def generate_images(today_yyyymmdd, cert_metadata, documents):
    index = 1
    for doc in documents:
        str_index = str(index) + ') '
        #question first
        html_question = '<div class="question">'+str_index + doc['question']+'</div>'
        html_answers_start = '<div class="answers">'
        html_answers_end = '</div>'
        #First images: options without explanations
        for key in doc['options'].keys():
            html_answers_start += f'''
                <div class="answer">
                    <label>{key}. {doc['options'][key]}</label>
                </div>
                <div class="explanation">
                    <label>{key}. {doc['explanation'][key]}</label>
                </div>'''
        #1 doc 1 image
        exam_settings.generate_image(exam_settings.html_head_str + html_question + html_answers_start + html_answers_end + exam_settings.html_tail_str, cert_metadata['img_folder_path']+"/"+today_yyyymmdd, doc['uuid'] + '.png')
        #Second images: options with explanations
        html_answers_start = '<div class="answers">'
        for key in doc['options'].keys():
            correct_class = ''
            if doc['answer'] == key:
                correct_class = ' correct'
            html_answers_start += f'''
                <div class="answer">
                    <label class="{correct_class}">{key}. {doc['options'][key]}</label>
                </div>
                <div class="explanation show">
                    <label>{doc['explanation'][key]}</label>
                </div>'''
        exam_settings.generate_image(exam_settings.html_head_str + html_question + html_answers_start + html_answers_end + exam_settings.html_tail_str, cert_metadata['img_folder_path']+"/"+today_yyyymmdd, doc['uuid'] + '_explain.png')
        #
        index += 1

In [7]:
#importlib.reload(exam_settings)

#call this API every day to generate PDF files and export images
def auto_generate_pdf_images(channel_type, cert_symbol):
    meta_collection = db['tb_cert_metadata']
    question_total = 20 #number of questions at once
    #get metadata of the certificate
    cert_metadata = meta_collection.find_one({'symbol': cert_symbol})
    #print(cert_metadata)
    #query random questions
    pipeline = [
                {"$match": {'pdf':'20250521'}},  #AWS SA has no 'type': 'multiple-choice', 'pdf': None
                {"$sample": {"size": question_total}} #randomly
            ]
    collection = db[cert_metadata['collection_name']]
    random_documents = list(collection.aggregate(pipeline))
    #print(random_documents)
    if len(random_documents) < question_total:
        print('Not enough questions to export')
        return
    #
    cert_name = cert_metadata['name']
    today_yyyymmdd = const.get_current_date_yyyymmdd()
    filename = cert_metadata['filename_prefix']+today_yyyymmdd+'.pdf'
    filepath = cert_metadata['pdf_folder_path']+filename
    landing_url = cert_metadata['udemy_link']
    #result_pdf = generate_pdf(cert_name, landing_url, 4, 5, random_documents, filename)
    result_pdf = {}
    if 'error' not in result_pdf:
        #no error
        print('No error when generating PDF file')
        #upload PDF file to the channel
        #upload_pdf_2_channel(channel_type, cert_metadata, filepath, filename)
        #update db
        #for doc in random_documents:
            #collection.update_one({'uuid': doc['uuid']}, {'$set':{'pdf': today_yyyymmdd}})  #the question is exported to pdf file
        #
        generate_images(today_yyyymmdd, cert_metadata, random_documents)

#test
#auto_generate_pdf_images('LinkedIn', 'AWS_SAA')

In [8]:
def generate_images_1_page(cert_metadata, documents):
    today_yyyymmdd = const.get_current_date_yyyymmdd()
    question_index = 1
    table = '<table>'
    for row_idx in range(5):
        row = '<tr>'
        for col_idx in range(2):
            doc = documents[question_index-1]
            str_index = str(question_index) + ') '
            #question first
            html_question = '<div class="question">'+str_index + doc['question']+'</div>'
            html_answers_start = '<div class="answers">'
            html_answers_end = '</div>'
            #First images: options without explanations
            for key in doc['options'].keys():
                html_answers_start += f'''
                    <div class="answer">
                        <label>{key}. {doc['options'][key]}</label>
                    </div>'''
            cell = '<td width="50%">' + html_question + html_answers_start + html_answers_end + '</td>'
            row += cell
            question_index += 1
        row += '</tr>'
        table += row
    table += '</table>'
    footer = '<div class="footer">https://www.udemy.com/user/martindo/</div>'
    footer += '<div class="footer">'+cert_metadata['udemy_link']+'</div>'
    page_header = '<div class="header">'+cert_metadata['name']+' Practice Questions</div>'
    #1 doc 1 image
    exam_settings.generate_image_portrait(exam_settings.html_head_1_img_6_q_str + page_header + table + footer + exam_settings.html_tail_1_img_6_q_str, cert_metadata['img_folder_path']+"/"+today_yyyymmdd, 'one_page_'+today_yyyymmdd + '.png')
    print('Done generating 1 page image')
        

In [9]:
importlib.reload(exam_settings)

#generate 1 image with 6 questions
def generate_1_img_multi_questions(cert_symbol):
    meta_collection = db['tb_cert_metadata']
    question_total = 10 #number of questions at once
    #get metadata of the certificate
    cert_metadata = meta_collection.find_one({'symbol': cert_symbol})
    #print(cert_metadata)
    #query random questions
    pipeline = [
                {"$match": {'pdf': None}},  #AWS SA has no 'type': 'multiple-choice', 'pdf': None
                {"$sample": {"size": question_total}} #randomly
            ]
    collection = db[cert_metadata['collection_name']]
    random_documents = list(collection.aggregate(pipeline))
    if len(random_documents) < question_total:
        print('Not enough questions to export')
        return
    #generate_images_1_page(cert_metadata, random_documents)
    #save questions in text file
    index = 1
    for doc in random_documents:
        print(str(index) + ') ' + doc['question'])
        index +=1
#test
generate_1_img_multi_questions('AWS_SAA')

1) A high-traffic e-commerce website experiences intermittent slowdowns during peak hours.  Which strategy would BEST improve resilience and scalability?
2) You are designing an architecture for a highly sensitive application. Which service should you use to manage and rotate encryption keys securely?
3) A company is using Amazon EC2 instances for their web application. They experience high traffic during peak hours and low traffic during off-peak hours. Which approach would minimize costs most effectively?
4) A startup is experiencing rapid growth, leading to unpredictable compute needs.  Their current EC2 instance setup is consistently underutilized during off-peak hours and overutilized during peak hours. Which approach offers the most cost-effective solution for managing this fluctuating demand?
5) A web application needs to handle a sudden surge in traffic. Which combination of services offers the BEST scalability and resilience?
6) A web application uses Amazon EC2 instances.  Wh