In [2]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag

# Download resources (run once)
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger_eng')   # new tagger name

# User input
text = input("Enter a sentence: ")

# 1. Tokenization
tokens = word_tokenize(text)

# 2. Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_tokens = [w for w in tokens if w.lower() not in stop_words]

# 3. POS tagging
tagged = pos_tag(filtered_tokens)

# Map POS to WordNet
from nltk.corpus.reader.wordnet import NOUN, VERB

def get_wordnet_pos(tag):
    if tag.startswith('V'):
        return VERB
    elif tag.startswith('N'):
        return NOUN
    return None

# 4. Lemmatization + keep nouns/verbs
lemmatizer = WordNetLemmatizer()
final_tokens = []

for word, tag in tagged:
    wn_tag = get_wordnet_pos(tag)
    if wn_tag:
        lemma = lemmatizer.lemmatize(word, wn_tag)
        final_tokens.append(lemma)

print("Processed tokens:", final_tokens)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


Enter a sentence: John enjoys playing football while Mary loves reading books in the library.
Processed tokens: ['John', 'enjoy', 'play', 'football', 'Mary', 'love', 'read', 'book']


In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")

# Take input from the user
text = input("Enter text: ")

# Process the text with spaCy
doc = nlp(text)

# 1. Named Entity Recognition (NER)
print("Named Entities:")
for ent in doc.ents:
    print(f"{ent.text}  -->  {ent.label_}")

# 2. Pronoun ambiguity detection
pronouns = {"he", "she", "they"}
words = {token.text.lower() for token in doc}

if pronouns.intersection(words):
    print("Warning: Possible pronoun ambiguity detected!")


Enter text: Chris met Alex at Apple headquarters in California. He told him about the new iPhone launch.
Named Entities:
Chris  -->  PERSON
Alex  -->  PERSON
Apple  -->  ORG
California  -->  GPE
iPhone  -->  ORG
