In [5]:
import re
import csv
from pydantic import BaseModel
from typing import List, Optional, Tuple

#### Sample Data

In [6]:
sample_md = """
    | <p>1</p> <img alt="A blue circle and a blue pentagon." data-bbox="142 157 386 277" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710985/rxxlbygd8zjswpbtsbdr.jpg"/>                                                                     | <img alt="Five shapes labeled A through E: A is a blue trapezoid, B is a black trapezoid, C is a black heptagon, D is a blue square, and E is a gray heptagon." data-bbox="513 149 931 254" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710968/cziyt0bthm1lmdtjsent.jpg"/> <p>A B C D E</p>                                                                                                         |
| <p>2</p> <img alt="A blue diamond with a black circle in the center and a blue hexagon with a black circle in the center." data-bbox="142 381 386 576" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710932/puzftnbntbryjgufbh1x.jpg"/> | <img alt="Five shapes labeled A through E: A is a blue ribbon with a black circle, B is a blue downward arrow, C is a blue parallelogram with a black circle, D is a blue plus sign, and E is a blue pentagon with a black circle." data-bbox="497 381 920 576" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711018/ek0qxphj1s7o0elbe9do.jpg"/> <p>A B C D E</p>                                     |
| <p>3</p> <img alt="Two octagons with three horizontal lines inside." data-bbox="148 666 396 801" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710893/rvqlszgttgf3hzcspjsc.jpg"/>                                                       | <img alt="Five shapes labeled A through E: A is a parallelogram with a horizontal line, B is a trapezoid, C is a right-angled corner, D is a circle with a horizontal line, and E is a rectangle with three horizontal lines." data-bbox="476 651 920 801" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711004/g3j8821vqgjob0w795fb.jpg"/> <p>A B C D E</p>                                          |
| <p>4</p> <img alt="A blue pentagon, a blue triangle, a black octagon, and a black diamond." data-bbox="163 876 359 1041" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710957/litob1fxpxney6xguzc4.jpg"/>                               | <img alt="Five shapes labeled A through E: A is a blue diamond above a black diamond, B is a blue triangle above a black hexagon, C is a blue pentagon above a black octagon, D is a blue triangle above a black square, and E is a blue star above a black pentagon." data-bbox="513 868 904 1026" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711020/g8brshe67742hpubv71d.jpg"/> <p>A B C D E</p> |
| <p>5</p> <img alt="Two blue chevrons pointing right and two blue chevrons pointing left." data-bbox="111 1116 396 1273" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710975/ogjfp2logpvpt3ejngc5.jpg"/>                                | <img alt="Five shapes labeled A through E: A is two vertical blue rectangles, B is two blue chevrons pointing right, C is two blue chevrons pointing left with a double-headed arrow, D is two blue chevrons pointing left, and E is two vertical blue rectangles." data-bbox="470 1130 925 1273" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710984/hotgz5zizleie9wkgrhe.jpg"/> <p>A B C D E</p>   |

6

![Two blue shield-like shapes. The left one has a black downward-pointing arrow inside. The right one has a white downward-pointing arrow inside.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711004/rxhnidxempfazwzrym56.jpg)

Two blue shield-like shapes. The left one has a black downward-pointing arrow inside. The right one has a white downward-pointing arrow inside.

![Five options: A (blue shield with white downward arrow), B (blue hexagon), C (blue shield with white downward arrow), D (blue diamond with white diamond), E (blue shield with white star).](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710924/bynu3i0ujezyhxjvuagh.jpg)

Five options: A (blue shield with white downward arrow), B (blue hexagon), C (blue shield with white downward arrow), D (blue diamond with white diamond), E (blue shield with white star).

A B C D E

7

![Two shapes: a black cross with a white star in the center, and a blue hexagon with a white star in the center.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711011/lpay44hhtdnfyymkd12u.jpg)

Two shapes: a black cross with a white star in the center, and a blue hexagon with a white star in the center.

![Five options: A (blue six-pointed star with white star), B (blue scalloped circle with white star), C (black six-pointed star with white star), D (blue arrow pointing right with white star), E (blue six-pointed star with white star).](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710922/ardgzf2tr5c7dgowkvgj.jpg)

Five options: A (blue six-pointed star with white star), B (blue scalloped circle with white star), C (black six-pointed star with white star), D (blue arrow pointing right with white star), E (blue six-pointed star with white star).

A B C D E

8

![Two shapes: a square with a dotted circle inside, and a diamond with a dotted circle inside.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710989/dp8n9bpqnyv5rxbhbyqn.jpg)

Two shapes: a square with a dotted circle inside, and a diamond with a dotted circle inside.

![Five options: A (square with solid circle), B (diamond with solid circle), C (square with dotted circle), D (diamond with dotted circle), E (square with dotted circle).](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710917/p4terwityhshv6oksfp7.jpg)

Five options: A (square with solid circle), B (diamond with solid circle), C (square with dotted circle), D (diamond with dotted circle), E (square with dotted circle).

A B C D E

9

![Two shapes: a solid black 'X' and a dotted 'X'.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710913/ghos227iyk83ved2a9gm.jpg)

Two shapes: a solid black 'X' and a dotted 'X'.

![Five options: A (dotted 'X'), B (solid black 'X'), C (dotted 'X'), D (solid black 'X'), E (dotted 'X').](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710980/bn8ftfng9jow1ks4xwgg.jpg)

Five options: A (dotted 'X'), B (solid black 'X'), C (dotted 'X'), D (solid black 'X'), E (dotted 'X').

A B C D E

10

![Two 3D cubes. The left one has a blue heart on the front and a blue plus sign on the right. The right one has a blue plus sign on the front and a blue heart on the right.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710976/taiyazfmmbehnvyhbq70.jpg)

Two 3D cubes. The left one has a blue heart on the front and a blue plus sign on the right. The right one has a blue plus sign on the front and a blue heart on the right.

![Five options: A (blue heart on front, blue plus on right), B (blue plus on front, blue star on right), C (blue plus on front, blue heart on right), D (blue heart on front, blue heart on right), E (blue plus on front, blue plus on right).](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711015/sxj1uvey7d64ghk8vkmf.jpg)

Five options: A (blue heart on front, blue plus on right), B (blue plus on front, blue star on right), C (blue plus on front, blue heart on right), D (blue heart on front, blue heart on right), E (blue plus on front, blue plus on right).

A B C D E

11

![Two blue star-like shapes with a white center. The left shape has 8 points and is inside a light gray circle with a dotted pattern. The right shape has 6 points and is inside a light gray circle with a dotted pattern.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711008/ridztod4ojfq8bptaj87.jpg)

Two blue star-like shapes with a white center. The left shape has 8 points and is inside a light gray circle with a dotted pattern. The right shape has 6 points and is inside a light gray circle with a dotted pattern.

![Five options A-E. A: Blue star with 8 points inside a light gray dotted circle. B: Blue star with 8 points inside a light gray dotted circle. C: Blue hexagon with a white star inside a light gray dotted circle. D: Blue circle with a white star inside a light gray dotted circle. E: Blue star with 8 points inside a light gray solid circle.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710889/qy6nsacoxtfi69cncvvr.jpg)

Five options A-E. A: Blue star with 8 points inside a light gray dotted circle. B: Blue star with 8 points inside a light gray dotted circle. C: Blue hexagon with a white star inside a light gray dotted circle. D: Blue circle with a white star inside a light gray dotted circle. E: Blue star with 8 points inside a light gray solid circle.

12

![Two blue shapes. The left shape is a rectangle with a horizontal double-headed arrow inside, with a blue triangle above and a gray circle below. The right shape is a gray circle with a horizontal double-headed arrow inside, with a blue triangle below.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710943/zvdyxtpztoxf1i4shqux.jpg)

Two blue shapes. The left shape is a rectangle with a horizontal double-headed arrow inside, with a blue triangle above and a gray circle below. The right shape is a gray circle with a horizontal double-headed arrow inside, with a blue triangle below.

![Five options A-E. A: Blue diamond with a horizontal double-headed arrow inside, with a blue triangle below. B: Blue square with a horizontal double-headed arrow inside, with a gray circle below. C: Blue circle with a horizontal double-headed arrow inside, with a blue triangle below. D: Blue square with a horizontal double-headed arrow inside, with a blue star below. E: Blue diamond with a horizontal double-headed arrow inside, with a blue triangle below.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711021/xu8jbgupyal3o0nvxjkx.jpg)

Five options A-E. A: Blue diamond with a horizontal double-headed arrow inside, with a blue triangle below. B: Blue square with a horizontal double-headed arrow inside, with a gray circle below. C: Blue circle with a horizontal double-headed arrow inside, with a blue triangle below. D: Blue square with a horizontal double-headed arrow inside, with a blue star below. E: Blue diamond with a horizontal double-headed arrow inside, with a blue triangle below.

13

![Two cyan shapes. The left shape is a vertical stack of three interlocking loops. The right shape is a vertical stack of three interlocking loops, but the top loop is oriented differently.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711016/ns5sysfppefc7xihq3zj.jpg)

Two cyan shapes. The left shape is a vertical stack of three interlocking loops. The right shape is a vertical stack of three interlocking loops, but the top loop is oriented differently.

![Five options A-E. A: Cyan shape with three interlocking loops, top loop horizontal. B: Cyan shape with three interlocking loops, top loop horizontal. C: Cyan shape with three interlocking loops, top loop horizontal. D: Cyan shape with three interlocking loops, top loop horizontal. E: Cyan shape with three interlocking loops, top loop horizontal.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710908/qxw5ynf3cpbjcfjw3pcx.jpg)

Five options A-E. A: Cyan shape with three interlocking loops, top loop horizontal. B: Cyan shape with three interlocking loops, top loop horizontal. C: Cyan shape with three interlocking loops, top loop horizontal. D: Cyan shape with three interlocking loops, top loop horizontal. E: Cyan shape with three interlocking loops, top loop horizontal.

14

![Two gray shapes. The left shape is a cluster of four squares with a wavy pattern, arranged in a 2x2 grid. The right shape is a cluster of four squares with a wavy pattern, arranged in a 2x2 grid but rotated 90 degrees clockwise.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710994/n85i4iluk57ichc5vvof.jpg)

Two gray shapes. The left shape is a cluster of four squares with a wavy pattern, arranged in a 2x2 grid. The right shape is a cluster of four squares with a wavy pattern, arranged in a 2x2 grid but rotated 90 degrees clockwise.

![Five options A-E. A: Gray shape with four squares in a 2x2 grid, wavy pattern. B: Gray shape with four squares in a 2x2 grid, wavy pattern. C: Gray shape with four squares in a 2x2 grid, wavy pattern. D: Gray shape with four squares in a 2x2 grid, wavy pattern. E: Gray shape with four squares in a 2x2 grid, wavy pattern.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710965/k2crntp8pvjxjgzypwp7.jpg)

Five options A-E. A: Gray shape with four squares in a 2x2 grid, wavy pattern. B: Gray shape with four squares in a 2x2 grid, wavy pattern. C: Gray shape with four squares in a 2x2 grid, wavy pattern. D: Gray shape with four squares in a 2x2 grid, wavy pattern. E: Gray shape with four squares in a 2x2 grid, wavy pattern.

15

![Two blue shapes. The left shape is a vertical stack of three downward-pointing triangles, with a horizontal dashed line through the middle. The right shape is a vertical stack of three diamonds, with a horizontal dashed line through the middle.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710905/l1tveyzytg5kgdhkwso5.jpg)

Two blue shapes. The left shape is a vertical stack of three downward-pointing triangles, with a horizontal dashed line through the middle. The right shape is a vertical stack of three diamonds, with a horizontal dashed line through the middle.

![Five options A-E. A: Blue shape with three downward-pointing triangles, horizontal dashed line. B: Blue shape with three diamonds, horizontal dashed line. C: Blue shape with three circles, horizontal dashed line. D: Blue shape with three stars, horizontal dashed line. E: Blue shape with three diamonds, horizontal dashed line.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710987/flrqek52ctkymz2jej0j.jpg)

Five options A-E. A: Blue shape with three downward-pointing triangles, horizontal dashed line. B: Blue shape with three diamonds, horizontal dashed line. C: Blue shape with three circles, horizontal dashed line. D: Blue shape with three stars, horizontal dashed line. E: Blue shape with three diamonds, horizontal dashed line.
"""

#### For Predicting next Image Types Question. (Basically there are two images(one option, one question) or one image(question + options))

In [7]:
input_file = "nvr2.md"
output_file = "cleaned_nvr.csv"

In [8]:
class Question(BaseModel):
    id: int
    question: str
    lead_in: str = ""
    option_A: str = "A"
    option_B: str = "B"
    option_C: str = "C"
    option_D: str = "D"
    option_E: str = "E"
    option_A_url: str = ""
    option_B_url: str = ""
    option_C_url: str = ""
    option_D_url: str = ""
    option_E_url: str = ""
    section_id: str="nvr"
    multiple_correct: str = "False"
    correct_option: str = "A"
    side_by_side: str = "True"

In [9]:
questions_data: List[Question] = []

In [10]:
QUESTION_SPLIT = re.compile(r'(?:<[pb]>|\*\*|^|\n)\s*(\d+)\s*(?=</[pb]>|\*\*|\n|$)')
IMG_PATTERN = re.compile(r'!\[.*?\]\((.*?)\)|<img[^>]*src="(.*?)"[^>]*>', re.IGNORECASE)
OPTIONS_PATTERN = re.compile(r'\b([A-Z])\b')

In [11]:
def extract_questions(text):
    # Now splits[1] is ALWAYS the number and splits[2] is the block
    splits = QUESTION_SPLIT.split(text)
    results = []
    # i starts at 1 (the first number found)
    # We step by 2 because: [Text, Num1, Text1, Num2, Text2...]
    i = 1
    while i < len(splits):
        q_num = splits[i]
        block = splits[i + 1] if i + 1 < len(splits) else ""
        results.append((q_num.strip(), block.strip()))
        i += 2
    return results

In [12]:
def extract_images(block):
    """Return list of image URLs in order"""
    images = []
    for match in IMG_PATTERN.findall(block):
        url = match[0] or match[1]
        images.append(url)
    return images

In [13]:
def normalize(block):
    block = re.sub(r'<br\s*/?>', '\n', block, flags=re.I)
    block = re.sub(r'<.*?>', '', block)
    return block.replace('\xa0', ' ').strip()

def tokenize_option_lines(block):
    rows = []

    for line in normalize(block).splitlines():
        line = line.strip()
        if not line:
            continue

        # Ignore markdown separators
        if set(line.replace('|','').strip()) == {'-'}:
            continue

        # Table rows
        if '|' in line:
            cells = [c.strip() for c in line.split('|') if c.strip()]
            rows.append(cells)
            continue

        # Inline rows (ANY spacing)
        parts = line.split()
        if all(p.isupper() and p.isalpha() for p in parts):
            rows.append(parts)
            continue

    return rows

def ensure_anchor(rows):
    if any(r == ['A','B','C','D','E'] for r in rows):
        return rows

    # Implicit A–E when last row has 5 values
    if rows and len(rows[-1]) == 5:
        rows.append(['A','B','C','D','E'])

    return rows

def deep_normalize_rows(rows):
    normalized = []

    for row in rows:
        flat = []

        for cell in row:
            # Split any cell that contains multiple uppercase tokens
            parts = cell.split()
            if len(parts) == 5 and all(p.isupper() and p.isalpha() for p in parts):
                flat = parts
                break
            else:
                flat.append(cell)

        # If we extracted a clean 5-value row, keep ONLY that
        if len(flat) == 5 and all(x.isupper() and x.isalpha() for x in flat):
            normalized.append(flat)
        else:
            normalized.append(row)

    return normalized

def extract_option_mapping(rows):
    options = {k: [] for k in "ABCDE"}

    if not rows:
        return options

    # -----------------------------
    # Vertical pairs: DS A DK B ...
    # -----------------------------
    if all(len(r) == 1 for r in rows):
        flat = [r[0] for r in rows]
        if len(flat) >= 10:
            for i in range(0, len(flat)-1, 2):
                val, lab = flat[i], flat[i+1]
                if lab in options:
                    options[lab].append(val)
            return options

    # -----------------------------
    # Anchor-based mapping
    # -----------------------------
    anchor_idx = None
    for i in range(len(rows)-1, -1, -1):
        if rows[i] == ['A','B','C','D','E']:
            anchor_idx = i
            break

    if anchor_idx is None:
        return options

    above = rows[:anchor_idx]

    # Full-width tables (multi-row)
    table_rows = [r for r in above if len(r) == 5]
    if table_rows:
        for r in table_rows:
            for i, k in enumerate("ABCDE"):
                options[k].append(r[i])
        return options

    return options



In [14]:
def extract_options(block):
    rows = tokenize_option_lines(block)
    rows = deep_normalize_rows(rows)
    rows = ensure_anchor(rows)
    return extract_option_mapping(rows)

In [15]:
def main(content: str) -> Tuple[List[int], List[Question]]:
    data: List[Question] = []
    undone: List[int] = []
    questions = extract_questions(content)

    for q_num, block in questions:
        images = extract_images(block)
        
        # If only images as option. Finding odd one out.
        if(len(images) == 5):
            data.append(
                Question(
                    id=q_num, 
                    question="Find odd one out.", 
                    option_A_url=images[0], 
                    option_B_url=images[1], 
                    option_C_url=images[2], 
                    option_D_url=images[3], 
                    option_E_url=images[4]
                )
            )
        
        # If there are more than 2 images.
        elif(len(images) <= 2) :
            options = extract_options(block)
            lead_in_image = f'<img src="{images[1]}" width = "100%">' if len(images) >= 2 else ""
            width = '30%' if lead_in_image else '100%'
            question_image = f'<img src="{images[0]}" width = "{width}">' if len(images) >= 1 else ""
            lead_in_image = f'<img src="{images[1]}" width = "30%">' if len(images) >= 2 else ""
            question = Question(id=q_num, question=question_image, lead_in=lead_in_image)

            if(options['A']):
                for letter in ['A', 'B', 'C', 'D', 'E']:
                    if letter in options:
                        field_name = f"option_{letter}"
                        value = "<br><br>".join(options[letter])
                        setattr(question, field_name, value)
            
            
            data.append(question)
        
        else:
            undone.append(q_num)

    return (undone, data)

In [16]:
def normalize_multi_sections(text: str) -> str:
    # 1. Remove everything from the start of the file until the first Section or Question
    # 2. Within each section, remove everything from 'Section X' until the first question number
    
    # This regex finds "Section X" (and any sub-headers) and everything after it 
    # up until the first question marker (like **1** or <p>1</p> or \n1\n)
    junk_pattern = re.compile(
        r'(###? SECTION \d+).*?(\s*(?:<[pb]>|\*\*|\n)\s*\d+)', 
        re.DOTALL | re.IGNORECASE
    )
    
    # We replace the junk with just the Section title and the first question
    # \1 is the Section title, \2 is the first question marker we found
    normalized = junk_pattern.sub(r'\1\n\2', text)
    
    # Finally, remove the massive "Instructions" block at the very top of the file
    intro_junk = re.compile(r'^.*?(?=###? SECTION 1)', re.DOTALL | re.IGNORECASE)
    normalized = intro_junk.sub('', normalized)
    
    return normalized.strip()

In [17]:
def save_to_csv(questions: List[Question], filename: str = "questions.csv"):
    column_map = {
        "question": "Question",
        "lead_in": "Lead In",
        "option_A": "Option A",
        "option_B": "Option B",
        "option_C": "Option C",
        "option_D": "Option D",
        "option_E": "Option E",
        "option_A_url": "Option A Image URL",
        "option_B_url": "Option B Image URL",
        "option_C_url": "Option C Image URL",
        "option_D_url": "Option D Image URL",
        "option_E_url": "Option E Image URL",
        "section_id" : "Section External ID",
        "multiple_correct": "Multiple Correct",
        "correct_option" : "Correct Options",
        "side_by_side" : "Side-by-Side Layout?"
    }
    rows = []
    for q in questions:
        raw_dict = q.model_dump()
        # Create a new dictionary containing only the mapped keys
        filtered_row = {
            csv_name: raw_dict.get(pydantic_name, "") 
            for pydantic_name, csv_name in column_map.items()
        }
        rows.append(filtered_row)

    # 3. Write to CSV
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=column_map.values())
        writer.writeheader()
        writer.writerows(rows)

In [18]:

with open(input_file, "r", encoding="utf-8") as f:
    content = f.read()

clean_content = normalize_multi_sections(content)
undone, questions_data = main(clean_content)

print(*questions_data, sep='\n')

save_to_csv(questions_data)

id=1 question='Find odd one out.' lead_in='' option_A='A' option_B='B' option_C='C' option_D='D' option_E='E' option_A_url='https://res.cloudinary.com/dgpvwrolc/image/upload/v1770638001/nslfyb8ah3z2hzgoon3d.jpg' option_B_url='https://res.cloudinary.com/dgpvwrolc/image/upload/v1770638022/rhdzfue7ekzkvhcw3ans.jpg' option_C_url='https://res.cloudinary.com/dgpvwrolc/image/upload/v1770638030/cn9yb5cuswxyuu6eoxaq.jpg' option_D_url='https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637922/diovlyfs0bi4yyyqiehc.jpg' option_E_url='https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637952/hxflo5ipoqltyzhglymz.jpg' section_id='nvr' multiple_correct='False' correct_option='A' side_by_side='True'
id=2 question='Find odd one out.' lead_in='' option_A='A' option_B='B' option_C='C' option_D='D' option_E='E' option_A_url='https://res.cloudinary.com/dgpvwrolc/image/upload/v1770638034/jobxfpjsvn1lf7kpfubm.jpg' option_B_url='https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637930/qfnhkll9gya

In [81]:
print(undone)

[]


#### Options md

In [265]:
options_MD = """
**26**![Four geometric shapes: a triangle with a circle inside, a left-pointing arrow with a circle inside, a triangle with a circle inside and two small circles on its sides, and a trophy with a circle inside.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637932/xy1sobxzcvsj7fs2vide.jpg)

AT  
BS  
AK  
DK

Four geometric shapes: a triangle with a circle inside, a left-pointing arrow with a circle inside, a triangle with a circle inside and two small circles on its sides, and a trophy with a circle inside.

![A trophy icon with a circle inside.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637920/phjfmzirfmlajjpprp4y.jpg)

A trophy icon with a circle inside.

AT      BK      DS      DT      DK  
A        B        C        D        E


**31**![Four arrows pointing left: AT (hatched), BS (solid), AK (solid), DK (solid).](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770638007/qccjprwjcnzrfkzrhyu8.jpg)

Four arrows pointing left: AT (hatched), BS (solid), AK (solid), DK (solid).

![A solid arrow pointing left.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637943/dml8wvhiqp9yocoalrnm.jpg)

A solid arrow pointing left.

 DS  
A

 DK  
B

 DT  
C

 AT  
D

 BS  
E


**36**![A sequence of four 2x2 grids. Grid 1: Top-left is a crescent moon, top-right is 'B', bottom-left is 'T', bottom-right is empty. Grid 2: Top-left is a staircase shape, top-right is 'L', bottom-left is empty, bottom-right is 'S'. Grid 3: Top-left is a crescent moon, top-right is 'B', bottom-left is 'S', bottom-right is empty. Grid 4: Top-left is a staircase shape, top-right is empty, bottom-left is 'L', bottom-right is empty.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637958/dnnymhpbtnpsk7il40nd.jpg)

A sequence of four 2x2 grids. Grid 1: Top-left is a crescent moon, top-right is 'B', bottom-left is 'T', bottom-right is empty. Grid 2: Top-left is a staircase shape, top-right is 'L', bottom-left is empty, bottom-right is 'S'. Grid 3: Top-left is a crescent moon, top-right is 'B', bottom-left is 'S', bottom-right is empty. Grid 4: Top-left is a staircase shape, top-right is empty, bottom-left is 'L', bottom-right is empty.

|   |   |   |   |   |
|---|---|---|---|---|
| L | B | L | L | B |
| S | S | T | V | T |
| A | B | C | D | E |


**43**![Five 2x2 grids showing transformations of bird-like shapes. Grid 1: Two bird shapes in the bottom-left cell, 'L' in top-right, 'I' in bottom-left. Grid 2: Two bird shapes in the bottom-left cell, 'G' in top-right, 'J' in bottom-left. Grid 3: Three bird shapes in the bottom-left cell, 'R' in top-right, 'J' in bottom-left. Grid 4: Three bird shapes in the bottom-left cell, 'R' in top-right, 'E' in bottom-left. Grid 5: Two bird shapes in the bottom-left cell, empty cells.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637899/w2xcridpstvewarvgprn.jpg)

Five 2x2 grids showing transformations of bird-like shapes. Grid 1: Two bird shapes in the bottom-left cell, 'L' in top-right, 'I' in bottom-left. Grid 2: Two bird shapes in the bottom-left cell, 'G' in top-right, 'J' in bottom-left. Grid 3: Three bird shapes in the bottom-left cell, 'R' in top-right, 'J' in bottom-left. Grid 4: Three bird shapes in the bottom-left cell, 'R' in top-right, 'E' in bottom-left. Grid 5: Two bird shapes in the bottom-left cell, empty cells.

|   |   |   |   |   |
|---|---|---|---|---|
| L | G | R | G | G |
|---|---|---|---|---|

|   |   |   |   |   |
|---|---|---|---|---|
| I | I | I | E | J |
|---|---|---|---|---|

|   |   |   |   |   |
|---|---|---|---|---|
| A | B | C | D | E |
|---|---|---|---|---|

| <p><b>30</b></p> <img alt="Four square icons with black and white shapes: a black triangle with a dot, a white triangle with a dot, a black triangle with a dot, and a white triangle with a dot." data-bbox="188 1119 241 1318" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710974/chkl4i6qtw1h2o3nkk9x.jpg"/> <p>XMO<br/>XNP<br/>YMP<br/>XNO</p> | <img alt="Square icon with a black triangle and a dot." data-bbox="410 1213 463 1265" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711008/z1bmalxz95bclsgpwsif.jpg"/> <p>XNP   XNP   XMP   XNO   XMO<br/>A      B      C      D      E</p>        |


| <b>25</b> <ul style="list-style-type: none"> <li><img alt="Triangle with dots" data-bbox="188 1120 241 1165" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710916/dbs35ioociwwtzu8l7vs.jpg"/> AYP</li> <li><img alt="Triangle with dots" data-bbox="188 1174 241 1219" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710899/fq8hxqncxeucod7rghnr.jpg"/> BZM</li> <li><img alt="Triangle with dots" data-bbox="188 1228 241 1273" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710895/zdq2fqcspitaudokqqni.jpg"/> BYP</li> <li><img alt="Triangle with dots" data-bbox="188 1282 241 1327" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710946/uqk5h2lx3rhzpl1hijn3.jpg"/> AZN</li> </ul>                                                                                                                                                                                                                                                                                                                                                                                                                 | <img alt="Triangle with dots" data-bbox="421 1214 473 1259" src="https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710893/ustvigolrquxxx86cmik.jpg"/> AZP AZM BZM BYP BZP<br>A B C D E 

1

![A circle containing a pentagon, which contains a black dot.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770638001/nslfyb8ah3z2hzgoon3d.jpg)

A circle containing a pentagon, which contains a black dot.

A

![A dashed circle containing a pentagon, which contains a black dot.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770638022/rhdzfue7ekzkvhcw3ans.jpg)

A dashed circle containing a pentagon, which contains a black dot.

B

![A circle containing a pentagon, which contains a black dot.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770638030/cn9yb5cuswxyuu6eoxaq.jpg)

A circle containing a pentagon, which contains a black dot.

C

![A circle containing a pentagon, which contains a black square.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637922/diovlyfs0bi4yyyqiehc.jpg)

A circle containing a pentagon, which contains a black square.

D

![A dashed circle containing a pentagon, which contains a black dot.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637952/hxflo5ipoqltyzhglymz.jpg)

A dashed circle containing a pentagon, which contains a black dot.

E

15

![Two blue shapes. The left shape is a vertical stack of three downward-pointing triangles, with a horizontal dashed line through the middle. The right shape is a vertical stack of three diamonds, with a horizontal dashed line through the middle.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710905/l1tveyzytg5kgdhkwso5.jpg)

Two blue shapes. The left shape is a vertical stack of three downward-pointing triangles, with a horizontal dashed line through the middle. The right shape is a vertical stack of three diamonds, with a horizontal dashed line through the middle.

![Five options A-E. A: Blue shape with three downward-pointing triangles, horizontal dashed line. B: Blue shape with three diamonds, horizontal dashed line. C: Blue shape with three circles, horizontal dashed line. D: Blue shape with three stars, horizontal dashed line. E: Blue shape with three diamonds, horizontal dashed line.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710987/flrqek52ctkymz2jej0j.jpg)

Five options A-E. A: Blue shape with three downward-pointing triangles, horizontal dashed line. B: Blue shape with three diamonds, horizontal dashed line. C: Blue shape with three circles, horizontal dashed line. D: Blue shape with three stars, horizontal dashed line. E: Blue shape with three diamonds, horizontal dashed line.

7

![Two shapes: a black cross with a white star in the center, and a blue hexagon with a white star in the center.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769711011/lpay44hhtdnfyymkd12u.jpg)

Two shapes: a black cross with a white star in the center, and a blue hexagon with a white star in the center.

![Five options: A (blue six-pointed star with white star), B (blue scalloped circle with white star), C (black six-pointed star with white star), D (blue arrow pointing right with white star), E (blue six-pointed star with white star).](https://res.cloudinary.com/dgpvwrolc/image/upload/v1769710922/ardgzf2tr5c7dgowkvgj.jpg)

Five options: A (blue six-pointed star with white star), B (blue scalloped circle with white star), C (black six-pointed star with white star), D (blue arrow pointing right with white star), E (blue six-pointed star with white star).

A B C D E

"""

#### Detecting options

In [270]:
questions = extract_questions(options_MD)

for q_no, block in questions:
    print(main(block))

[['AT'], ['BS'], ['AK'], ['DK'], ['AT', 'BK', 'DS', 'DT', 'DK'], ['A', 'B', 'C', 'D', 'E']]
{'A': ['AT'], 'B': ['BK'], 'C': ['DS'], 'D': ['DT'], 'E': ['DK']}
[['DS'], ['A'], ['DK'], ['B'], ['DT'], ['C'], ['AT'], ['D'], ['BS'], ['E']]
{'A': ['DS'], 'B': ['DK'], 'C': ['DT'], 'D': ['AT'], 'E': ['BS']}
[[], ['L', 'B', 'L', 'L', 'B'], ['S', 'S', 'T', 'V', 'T'], ['A', 'B', 'C', 'D', 'E']]
{'A': ['L', 'S'], 'B': ['B', 'S'], 'C': ['L', 'T'], 'D': ['L', 'V'], 'E': ['B', 'T']}
[[], ['L', 'G', 'R', 'G', 'G'], [], ['I', 'I', 'I', 'E', 'J'], [], ['A', 'B', 'C', 'D', 'E'], []]
{'A': ['L', 'I'], 'B': ['G', 'I'], 'C': ['R', 'I'], 'D': ['G', 'E'], 'E': ['G', 'J']}
[['XMO'], ['XNP'], ['YMP'], ['XNP', 'XNP', 'XMP', 'XNO', 'XMO'], ['A', 'B', 'C', 'D', 'E'], []]
{'A': ['XNP'], 'B': ['XNP'], 'C': ['XMP'], 'D': ['XNO'], 'E': ['XMO']}
[['AZP', 'AZM', 'BZM', 'BYP', 'BZP'], ['A', 'B', 'C', 'D', 'E']]
{'A': ['AZP'], 'B': ['AZM'], 'C': ['BZM'], 'D': ['BYP'], 'E': ['BZP']}
[['A'], ['B'], ['C'], ['D'], ['E']]
{'A':

In [271]:
questions

[('26',
  '**![Four geometric shapes: a triangle with a circle inside, a left-pointing arrow with a circle inside, a triangle with a circle inside and two small circles on its sides, and a trophy with a circle inside.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637932/xy1sobxzcvsj7fs2vide.jpg)\n\nAT  \nBS  \nAK  \nDK\n\nFour geometric shapes: a triangle with a circle inside, a left-pointing arrow with a circle inside, a triangle with a circle inside and two small circles on its sides, and a trophy with a circle inside.\n\n![A trophy icon with a circle inside.](https://res.cloudinary.com/dgpvwrolc/image/upload/v1770637920/phjfmzirfmlajjpprp4y.jpg)\n\nA trophy icon with a circle inside.\n\nAT \xa0\xa0\xa0\xa0 BK \xa0\xa0\xa0\xa0 DS \xa0\xa0\xa0\xa0 DT \xa0\xa0\xa0\xa0 DK  \nA \xa0\xa0\xa0\xa0\xa0\xa0 B \xa0\xa0\xa0\xa0\xa0\xa0 C \xa0\xa0\xa0\xa0\xa0\xa0 D \xa0\xa0\xa0\xa0\xa0\xa0 E'),
 ('31',
  '**![Four arrows pointing left: AT (hatched), BS (solid), AK (solid), DK (solid).]