In [3]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
text = "Kamran loves apples, particularly on Wednesdays."
multi_sent_text = "Kamran loves apples, particularly on Wednesdays... Matcha (his cat) fights with Kitsu (his dog) >< ! wow."

doc = nlp(text)
multi_sent_doc = nlp(multi_sent_text)

# Tokenization
tokens = [token.text for token in doc]
print("Tokenization:", tokens)

# Lemmatization
lemmas = [token.lemma_ for token in doc]
print("Lemmatization:", lemmas)

# Part-of-speech tagging
pos_tags = [(token.text, token.pos_) for token in doc]
print("POS tagging:", pos_tags)

# Dependency parsing
options = {"compact": True, "color": "blue", "bg": "white", "offset_x": 150, "distance": 100}
displacy.render(doc, style="dep", options=options, jupyter=True)

# Named entity recognition
displacy.render(doc, style="ent", jupyter=True)

# Sent boundary detection
sentences = [sent.text for sent in multi_sent_doc.sents]
print("Sentence boundary detection:", sentences)

Tokenization: ['Kamran', 'loves', 'apples', ',', 'particularly', 'on', 'Wednesdays', '.']
Lemmatization: ['Kamran', 'love', 'apple', ',', 'particularly', 'on', 'Wednesdays', '.']
POS tagging: [('Kamran', 'PROPN'), ('loves', 'VERB'), ('apples', 'NOUN'), (',', 'PUNCT'), ('particularly', 'ADV'), ('on', 'ADP'), ('Wednesdays', 'PROPN'), ('.', 'PUNCT')]


Sentence boundary detection: ['Kamran loves apples, particularly on Wednesdays...', 'Matcha (his cat) fights with Kitsu (his dog) >< !', 'wow.']
