In [3]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tag import pos_tag
from nltk.corpus import wordnet as wn
from nltk.chunk import RegexpParser
import random

# Download required NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('maxent_ne_chunker')
nltk.download('words')

# Sample input text
input_text = """
Google was founded on September 4, 1998, by American computer scientists Larry Page and Sergey Brin while they were PhD students at Stanford University in California. Together, they own about 14% of its publicly listed shares and control 56% of its stockholder voting power through super-voting stock. The company went public via an initial public offering (IPO) in 2004. In 2015, Google was reorganized as a wholly owned subsidiary of Alphabet Inc. Google is Alphabet's largest subsidiary and is a holding company for Alphabet's internet properties and interests. Sundar Pichai was appointed CEO of Google on October 24, 2015, replacing Larry Page, who became the CEO of Alphabet. On December 3, 2019, Pichai also became the CEO of Alphabet.
"""

def extract_keywords(text):
    # Tokenize sentences
    sentences = sent_tokenize(text)
    keywords = set()
    
    # Define a chunk grammar to identify noun phrases
    grammar = r"""
        NP: {<DT>?<JJ>*<NN.*>+}  # Noun phrase
    """
    chunk_parser = RegexpParser(grammar)
    
    for sentence in sentences:
        tokens = word_tokenize(sentence)
        tagged = pos_tag(tokens)
        tree = chunk_parser.parse(tagged)
        
        for subtree in tree.subtrees():
            if subtree.label() == 'NP':
                # Join the leaves to form the noun phrase
                noun_phrase = ' '.join(word for word, tag in subtree.leaves())
                # Filter out short words and common stopwords
                if len(noun_phrase) > 3:
                    keywords.add(noun_phrase.lower())
    
    return list(keywords)

def generate_distractors(keyword):
    distractors = set()
    keyword_synsets = wn.synsets(keyword, pos=wn.NOUN)
    
    if not keyword_synsets:
        return ['technology', 'software', 'innovation']  # Default distractors if no synsets are found
    
    # Use the first synset
    keyword_synset = keyword_synsets[0]
    
    # Get hypernyms, hyponyms, and similar words
    hypernyms = keyword_synset.hypernyms()
    hyponyms = keyword_synset.hyponyms()
    similar_words = keyword_synset.similar_tos()
    
    # Add hypernyms, hyponyms, and similar words to distractors
    for hypernym in hypernyms:
        for lemma in hypernym.lemmas():
            distractors.add(lemma.name().replace('_', ' '))
    for hyponym in hyponyms:
        for lemma in hyponym.lemmas():
            distractors.add(lemma.name().replace('_', ' '))
    for similar in similar_words:
        for lemma in similar.lemmas():
            distractors.add(lemma.name().replace('_', ' '))
    
    # Remove the keyword itself from distractors
    distractors.discard(keyword)
    
    # If not enough distractors, add some default tricky options
    if len(distractors) < 3:
        default_distractors = ['technology', 'system', 'data']
        distractors.update(default_distractors)
    
    return random.sample(distractors, min(4, len(distractors)))  # Select up to 4 distractors

def generate_mcqs(text):
    # Tokenize sentences
    sentences = sent_tokenize(text)
    
    # Extract keywords
    keywords = extract_keywords(text)
    
    # Generate MCQs
    mcqs = []
    for keyword in keywords:
        for sentence in sentences:
            if keyword in sentence.lower():
                distractors = generate_distractors(keyword)
                question = sentence.lower().replace(keyword, '_____')
                options = [keyword] + distractors
                random.shuffle(options)
                
                mcqs.append({
                    "question": question.capitalize(),
                    "options": options,
                    "correct_answer": keyword
                })
                break  # Move to the next keyword after creating one question
    
    return mcqs

# Generate MCQs
mcqs = generate_mcqs(input_text)

# Print the MCQs in a formatted way
for i, mcq in enumerate(mcqs, 1):
    print(f"\nQuestion {i}:")
    print(mcq['question'])
    print("\nOptions:")
    for j, option in enumerate(['A', 'B', 'C', 'D'][:len(mcq['options'])]):
        print(f"{option}) {mcq['options'][j]}")
    print(f"\nCorrect Answer: {mcq['correct_answer']}")
    print("-" * 80)



Question 1:

google was founded on september 4, 1998, by american computer scientists _____ and sergey brin while they were phd students at stanford university in california.

Options:
A) larry page
B) technology
C) innovation
D) software

Correct Answer: larry page
--------------------------------------------------------------------------------

Question 2:
In 2015, google was reorganized as a wholly owned subsidiary of _____ is alphabet's largest subsidiary and is a holding company for alphabet's internet properties and interests.

Options:
A) alphabet inc. google
B) innovation
C) software
D) technology

Correct Answer: alphabet inc. google
--------------------------------------------------------------------------------

Question 3:
Sundar _____ was appointed ceo of google on october 24, 2015, replacing larry page, who became the ceo of alphabet.

Options:
A) software
B) innovation
C) technology
D) pichai

Correct Answer: pichai
------------------------------------------------------

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\dsoni\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\dsoni\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\dsoni\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\dsoni\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\dsoni\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
since Python 3.9 and will be removed in a subsequent version.
  return random.sample(distractors, min(4, len(distractors)))  # Select up to 4 distracto