<h1>Installing Dependencies</h1>

In [3]:
! pip install spacy



In [4]:
! python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB 1.3 MB/s eta 0:00:10
     --------------------------------------- 0.0/12.8 MB 393.8 kB/s eta 0:00:33
     --------------------------------------- 0.1/12.8 MB 819.2 kB/s eta 0:00:16
     --------------------------------------- 0.1/12.8 MB 774.0 kB/s eta 0:00:17
      --------------------------------------- 0.2/12.8 MB 1.1 MB/s eta 0:00:12
      --------------------------------------- 0.3/12.8 MB 1.0 MB/s eta 0:00:13
     - -------------------------------------- 0.4/12.8 MB 1.1 MB/s eta 0:00:11
     - -------------------------------------- 0.4/12.8 MB 1.1 MB/s eta 0:00:12
     - -------------------------------------- 0.6/12.8 MB 1.5 MB/s eta 0:00:09
     -- ------------------------------

<h1>Importing Dependencies</h1>

In [5]:
import spacy
import random
from collections import Counter


In [6]:
# input text
text = """
The Greek historian knew what he was talking about. The Nile River fed Egyptian civilization for hundreds of years. 
The Longest River the Nile is 4,160 miles long—the world’s longest river. It begins near the equator in Africa and 
flows north to the Mediterranean Sea. In the south the Nile churns with cataracts. A cataract is a waterfall. Near the 
sea the Nile branches into a delta. A delta is an area near a river’s mouth where the water deposits fine soil called silt. 
In the delta, the Nile divides into many streams. The river is called the upper Nile in the south and the lower Nile in the
north. For centuries, heavy rains in Ethiopia caused the Nile to flood every summer. The floods deposited rich soil along the 
Nile’s shores. This soil was fertile, which means it was good for growing crops. Unlike the Tigris and Euphrates,
the Nile River flooded at the same time every year, so farmers could predict when to plant their crops.
"""

num_questions = 5

In [7]:
# Load the spacy model
nlp = spacy.load('en_core_web_sm')

<h1>Concept Made</h1>

In [8]:
#text
nlp(text)


The Greek historian knew what he was talking about. The Nile River fed Egyptian civilization for hundreds of years. 
The Longest River the Nile is 4,160 miles long—the world’s longest river. It begins near the equator in Africa and 
flows north to the Mediterranean Sea. In the south the Nile churns with cataracts. A cataract is a waterfall. Near the 
sea the Nile branches into a delta. A delta is an area near a river’s mouth where the water deposits fine soil called silt. 
In the delta, the Nile divides into many streams. The river is called the upper Nile in the south and the lower Nile in the
north. For centuries, heavy rains in Ethiopia caused the Nile to flood every summer. The floods deposited rich soil along the 
Nile’s shores. This soil was fertile, which means it was good for growing crops. Unlike the Tigris and Euphrates,
the Nile River flooded at the same time every year, so farmers could predict when to plant their crops.

In [10]:
# Process the text with spaCy
doc =  nlp(text)

#each sentences one by one each line ma dekhaucha i.e. Extract sentences from the text
sentences = [sent.text for sent in doc.sents] 
sentences

# len([sent for sent in doc.sents])


['\nThe Greek historian knew what he was talking about.',
 'The Nile River fed Egyptian civilization for hundreds of years. \n',
 'The Longest River the Nile is 4,160 miles long—the world’s longest river.',
 'It begins near the equator in Africa and \nflows north to the Mediterranean Sea.',
 'In the south the Nile churns with cataracts.',
 'A cataract is a waterfall.',
 'Near the \nsea the Nile branches into a delta.',
 'A delta is an area near a river’s mouth where the water deposits fine soil called silt. \n',
 'In the delta, the Nile divides into many streams.',
 'The river is called the upper Nile in the south and the lower Nile in the\nnorth.',
 'For centuries, heavy rains in Ethiopia caused the Nile to flood every summer.',
 'The floods deposited rich soil along the \nNile’s shores.',
 'This soil was fertile, which means it was good for growing crops.',
 'Unlike the Tigris and Euphrates,\nthe Nile River flooded at the same time every year, so farmers could predict when to plant t

In [20]:
# Randomly select sentences to form questions
selected_sentences=random.sample(sentences,min(num_questions,len(sentences)))
selected_sentences
# min(num_questions,len(sentences))

['Unlike the Tigris and Euphrates,\nthe Nile River flooded at the same time every year, so farmers could predict when to plant their crops.\n',
 'In the delta, the Nile divides into many streams.',
 'A cataract is a waterfall.',
 'This soil was fertile, which means it was good for growing crops.',
 'In the south the Nile churns with cataracts.']

In [46]:
mcqs = []
# Generate MCQs for each selected sentence
for sentence in selected_sentences:
  sentence = sentence.lower() #yo garena bhane capital wala noun dekhaudaina
  # process with spacy (sentence)
  sent_doc = nlp(sentence)
  # Extract entities(nouns) from sentence
  nouns = [token.text for token in sent_doc if token.pos_ == "NOUN"] #pos = parts of speech
  
  # to generate proper noun
  if len(nouns) < 2:
    continue

  # Count the occurrence of each noun
  noun_counts = Counter(nouns)
  # print(noun_counts)

 
  # extract subject from the noun list
  if noun_counts:
    subject = noun_counts.most_common(1)[0][0]
    # print(subject)

    answer_choices = [subject]
   
    # Generate question in a question way
    question_stem = sentence.replace(subject,"_________")
    # print(question_stem)

    for _ in range(3):
      distractor = random.choice(list(set(nouns) -  set([subject]))) #for unique we used set
      answer_choices.append(distractor)
    # print(answer_choices)

    random.shuffle(answer_choices) #so that it wont be obvious that 1st one is the answer
   
    correct_answer = chr(64 + answer_choices.index(subject) + 1) # to conver index to letter, we can use this formula

    mcqs.append((question_stem,answer_choices,correct_answer))





  

In [47]:
mcqs

[('unlike the _________ and euphrates,\nthe nile river flooded at the same time every year, so farmers could predict when to plant their crops.\n',
  ['river', 'tigris', 'river', 'crops'],
  'B'),
 ('in the _________, the nile divides into many streams.',
  ['streams', 'streams', 'streams', 'delta'],
  'D'),
 ('a _________ is a waterfall.',
  ['cataract', 'waterfall', 'waterfall', 'waterfall'],
  'A'),
 ('this _________ was fertile, which means it was good for growing crops.',
  ['soil', 'crops', 'crops', 'crops'],
  'A'),
 ('in the _________ the nile churns with cataracts.',
  ['cataracts', 'south', 'cataracts', 'cataracts'],
  'B')]

<h1>Overall Concept of MCQ put in a FUNCTION</h1>

In [48]:

def generate_mcqs(text, num_questions=5):
    # Process the text with spaCy
    doc = nlp(text)

    # Extract sentences from the text
    sentences = [sent.text for sent in doc.sents]

    # Randomly select sentences to form questions
    selected_sentences = random.sample(sentences, min(num_questions, len(sentences)))

    # Initialize list to store generated MCQs
    mcqs = []

    # Generate MCQs for each selected sentence
    for sentence in selected_sentences:
        # Process the sentence with spaCy
        sent_doc = nlp(sentence)

        # Extract entities (nouns) from the sentence
        nouns = [token.text for token in sent_doc if token.pos_ == "NOUN"]

        # Ensure there are enough nouns to generate MCQs
        if len(nouns) < 2:
            continue

        # Count the occurrence of each noun
        noun_counts = Counter(nouns)

        # Select the most common noun as the subject of the question
        if noun_counts:
            subject = noun_counts.most_common(1)[0][0]

            # Generate the question stem
            question_stem = sentence.replace(subject, "_______")

            # Generate answer choices
            answer_choices = [subject]

            # Add some random words from the text as distractors
            for _ in range(3):
                distractor = random.choice(list(set(nouns) - set([subject])))
                answer_choices.append(distractor)

            # Shuffle the answer choices
            random.shuffle(answer_choices)

            # Append the generated MCQ to the list
            correct_answer = chr(64 + answer_choices.index(subject) + 1)  # Convert index to letter
            mcqs.append((question_stem, answer_choices, correct_answer))

    return mcqs
    
    
    