In [3]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

text = "John enjoys playing football while Mary loves reading books in the library."

# Process the text
doc = nlp(text)

filtered_tokens = []

for token in doc:
    # 1. Segment → token.text
    # 2. Remove stopwords & punctuation
    if token.is_stop or token.is_punct:
        continue

    # 3. Lemmatize token → token.lemma_
    lemma = token.lemma_

    # 4. Keep only nouns & verbs using POS tags
    if token.pos_ in ["NOUN", "VERB"]:
        filtered_tokens.append(lemma)

print("Final Output Tokens:", filtered_tokens)


Final Output Tokens: ['enjoy', 'play', 'football', 'read', 'book', 'library']


In [4]:
import spacy

# Load spaCy English NER model
nlp = spacy.load("en_core_web_sm")

text = "Chris met Alex at Apple headquarters in California. He told him about the new iPhone launch."

# Process the text
doc = nlp(text)

# 1. Named Entity Recognition
print("Named Entities:")
for ent in doc.ents:
    print(f"{ent.text} --> {ent.label_}")

# 2. Pronoun ambiguity detection
pronouns = {"he", "she", "they", "him", "her", "them"}

if any(token.text.lower() in pronouns for token in doc):
    print("\nWarning: Possible pronoun ambiguity detected!")


Named Entities:
Chris --> PERSON
Alex --> PERSON
Apple --> ORG
California --> GPE
iPhone --> ORG

