In [1]:
import spacy
from collections import Counter
import random

nlp = spacy.load("en_core_web_lg")

def generate_mcqs_advanced(text, num_questions, difficulty_level):
    doc = nlp(text)
    sentences = [sent.text for sent in doc.sents]
    ranked_sentences = sorted(sentences, key=lambda x: len(x), reverse=True)
    random.shuffle(ranked_sentences)
    mcqs = []
    for sentence in ranked_sentences:
        if len(mcqs) >= num_questions:
            break
        sent_doc = nlp(sentence)
        nouns = [token.text for token in sent_doc if token.pos_ == "NOUN"]
        verbs = [token.text for token in sent_doc if token.pos_ == "VERB"]
        adjectives = [token.text for token in sent_doc if token.pos_ == "ADJ"]
        key_terms = nouns + verbs + adjectives
        if len(key_terms) < 2:
            continue
        if difficulty_level == "easy":
            subject = random.choice(nouns) if nouns else random.choice(key_terms)
            question_stem = sentence.replace(subject, "_______")
            answer_choices = [subject]
            distractors = random.sample(set(key_terms) - {subject}, min(3, len(set(key_terms)) - 1))
            answer_choices.extend(distractors)
            random.shuffle(answer_choices)
            correct_answer = chr(64 + answer_choices.index(subject) + 1)
            mcqs.append(("Fill in the blank: " + question_stem, answer_choices, correct_answer))
        elif difficulty_level == "medium":
            subject = random.choice(key_terms)
            question_text = f"What does the term \"{subject}\" refer to in the sentence: \"{sentence}\"?"
            distractors = random.sample(set(key_terms) - {subject}, min(3, len(set(key_terms)) - 1))
            answer_choices = [subject] + distractors
            random.shuffle(answer_choices)
            correct_answer = chr(64 + answer_choices.index(subject) + 1)
            mcqs.append((question_text, answer_choices, correct_answer))
        elif difficulty_level == "difficult":
            subject = random.choice(key_terms)
            question_text = f"How does \"{subject}\" contribute to the context of: \"{sentence}\"?"
            distractors = random.sample(set(key_terms) - {subject}, min(3, len(set(key_terms)) - 1))
            answer_choices = [subject] + distractors
            random.shuffle(answer_choices)
            correct_answer = chr(64 + answer_choices.index(subject) + 1)
            mcqs.append((question_text, answer_choices, correct_answer))
    return mcqs[:num_questions]

text = """
The Greek historian knew what he was talking about. The Nile River fed Egyptian civilization for hundreds of years. 
The Longest River the Nile is 4,160 miles long—the world’s longest river. It begins near the equator in Africa and 
flows north to the Mediterranean Sea. In the south the Nile churns with cataracts. A cataract is a waterfall. Near the 
sea the Nile branches into a delta. A delta is an area near a river’s mouth where the water deposits fine soil called silt. 
In the delta, the Nile divides into many streams. The river is called the upper Nile in the south and the lower Nile in the
north. For centuries, heavy rains in Ethiopia caused the Nile to flood every summer. The floods deposited rich soil along the 
Nile’s shores. This soil was fertile, which means it was good for growing crops. Unlike the Tigris and Euphrates,
the Nile River flooded at the same time every year, so farmers could predict when to plant their crops.
"""

num_questions = int(input("How many questions do you want to generate? "))
difficulty_level = input("Enter difficulty level (easy, medium, difficult): ").lower()

while difficulty_level not in ["easy", "medium", "difficult"]:
    print("Invalid difficulty level! Please choose from easy, medium, or difficult.")
    difficulty_level = input("Enter difficulty level (easy, medium, difficult): ").lower()

mcqs = generate_mcqs_advanced(text, num_questions, difficulty_level)

for idx, mcq in enumerate(mcqs, start=1):
    question, options, correct = mcq
    print(f"Q{idx}: {question}")
    for i, option in enumerate(options, start=1):
        print(f"{chr(64+i)}) {option}")
    print(f"Correct Answer: {correct}")
    print()

How many questions do you want to generate?  3
Enter difficulty level (easy, medium, difficult):  difficult


Q1: How does "waterfall" contribute to the context of: "A cataract is a waterfall."?
A) waterfall
B) cataract
Correct Answer: A

Q2: How does "good" contribute to the context of: "This soil was fertile, which means it was good for growing crops."?
A) crops
B) soil
C) growing
D) good
Correct Answer: D

Q3: How does "world" contribute to the context of: "The Longest River the Nile is 4,160 miles long—the world’s longest river."?
A) miles
B) river
C) longest
D) world
Correct Answer: D



since Python 3.9 and will be removed in a subsequent version.
  distractors = random.sample(set(key_terms) - {subject}, min(3, len(set(key_terms)) - 1))
