In [None]:
from urllib.parse import urljoin
import requests
import json
from bs4 import BeautifulSoup
from collections import namedtuple

class QuizIterator():
    def __init__(self, address, pages):
        self._address = address
        self._index = 1
        self._pages = pages

    def __iter__(self):
        return self

    def __next__(self):
        if self._index <= self._pages:
            address = urljoin(self._address, str(self._index))
            self._index += 1
            return address
        else:
            raise StopIteration
        
Question = namedtuple("Question", ("number", "link", "text"))

AMAZON_QUIZ_TYPE = "amazon-web-services"
PYTHON_QUIZ_TYPE = "python"
PAGES = 4
PAGE = f"https://quizack.com/{AMAZON_QUIZ_TYPE}/questions-and-answers/"


In [None]:
def add_questions(soup_object, questions: list):
    for block in soup_object.find_all(attrs={"class": "col-lg-6 col-md-6 col-sm-12 mt-2"}):
        question_html = block.contents[1].contents[1].contents[1].contents
        q_numb = question_html[1].text
        q_link = question_html[3].attrs["href"]
        q_text = question_html[3].contents[0].text
        
        q = Question(q_numb, q_link, q_text)
        questions.append(q)

In [None]:
questions = []

for quiz in QuizIterator(PAGE, PAGES):
    page_get_response = requests.get(url=quiz)
    soup = BeautifulSoup(page_get_response.text, 'html.parser')
    add_questions(soup, questions)

print(len(questions))


In [None]:
def parse_answers(test_question: Question):
    qpage_get_response = requests.get(url=test_question.link)
    q_soup = BeautifulSoup(qpage_get_response.text, 'html.parser')
    mcq_area = q_soup.find(attrs={"class": "col-12 mcq-area"})
    question_areas = mcq_area.find_all(attrs={"class": "d-flex rounded align-items-center border position-relative mt-3 ans-options"})
    answers = {}
    for area in question_areas:
        try: 
            check = area.contents[3].contents[1].attrs["check"]
            answer = area.contents[5].text
        except IndexError:
            print(area.contents)
            check = "undefined"
            answer = "undefined"
        answers[answer] = check

    return answers

In [None]:
quiz_output = {}

for test_quest in questions:
    quiz_output[test_quest.text] = parse_answers(test_quest)

In [None]:
with open(f"{AMAZON_QUIZ_TYPE}.json", "w", encoding="utf-8") as stream:
    json.dump(quiz_output, stream, indent=2)