In [29]:
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

In [30]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(token) for token in tokens]
    return tokens

In [49]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import spacy

def understand_text(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    concepts = [chunk.text for chunk in doc.noun_chunks]
    relationships = []
    for token in doc:
        if token.dep_ in ('amod', 'prep'):
            relationships.append((token.head.text, token.text))

    understanding = {
        'entities': entities,
        'concepts': concepts,
        'relationships': relationships
    }

    return understanding

def generate_questions(text_understanding):
    generated_questions = []
    for concept in text_understanding['concepts']:
        question = f"What is {concept}?"
        generated_questions.append(question)

    return generated_questions

with open('egypt.txt', 'r', encoding='UTF-8') as file:
    text = file.read()

preprocessed_text = preprocess_text(text)
understand_text = understand_text(text)
generated_questions = generate_questions(understand_text)

In [50]:
generated_questions = generate_questions(understand_text)
print(generated_questions)


['What is The Greek historian?', 'What is what?', 'What is he?', 'What is The Nile River?', 'What is Egyptian civilization?', 'What is hundreds?', 'What is years?', 'What is The Longest River?', 'What is the Nile?', 'What is the world’s longest river?', 'What is It?', 'What is the equator?', 'What is Africa?', 'What is the Mediterranean Sea?', 'What is the south?', 'What is the Nile?', 'What is cataracts?', 'What is A cataract?', 'What is a waterfall?', 'What is the sea?', 'What is a delta?', 'What is A delta?', 'What is an area?', 'What is a river’s mouth?', 'What is the water?', 'What is fine soil?', 'What is silt?', 'What is the delta?', 'What is the Nile?', 'What is many streams?', 'What is The river?', 'What is the upper Nile?', 'What is the south?', 'What is the lower Nile?', 'What is the north?', 'What is centuries?', 'What is heavy rains?', 'What is Ethiopia?', 'What is the Nile?', 'What is The floods?', 'What is rich soil?', 'What is the Nile’s shores?', 'What is This soil?', 