In [None]:
#1. Create a Doc object from the file peterrabbit.txt

In [8]:
import spacy

# Load the English model in spaCy
nlp = spacy.load("en_core_web_sm")

# Read the file content
with open("peterrabbit.txt", "r") as file:
    text = file.read()

# Create a Doc object
doc = nlp(text)

# Optional: print first 100 characters
print(doc[:100])


The Tale of Peter Rabbit, by Beatrix Potter (1902).

Once upon a time there were four little Rabbits, and their names
were--

          Flopsy,
       Mopsy,
   Cotton-tail,
and Peter.

They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.

'Now my dears,' said old Mrs. Rabbit one morning, 'you may go into
the fields or down the lane, but don't go into


In [None]:
#2.For every token in the third sentence, print the token text, the POS tag, the fine-grained TAG tag, and the description of the fine-grained tag.

In [19]:
# Get all sentences
sentences = list(doc.sents)

# Get the third sentence
third_sentence = sentences[2]

# Print token details
print("Token Text | POS | Fine-grained TAG")
print("-" * 40)
for token in third_sentence:
    print(f"{token.text:12} | {token.pos_:6} | {token.tag_}")

Token Text | POS | Fine-grained TAG
----------------------------------------
They         | PRON   | PRP
lived        | VERB   | VBD
with         | ADP    | IN
their        | PRON   | PRP$
Mother       | PROPN  | NNP
in           | ADP    | IN
a            | DET    | DT
sand         | NOUN   | NN
-            | PUNCT  | HYPH
bank         | NOUN   | NN
,            | PUNCT  | ,
underneath   | ADP    | IN
the          | DET    | DT
root         | NOUN   | NN
of           | ADP    | IN
a            | DET    | DT

            | SPACE  | _SP
very         | ADV    | RB
big          | ADJ    | JJ
fir          | NOUN   | NN
-            | PUNCT  | HYPH
tree         | NOUN   | NN
.            | PUNCT  | .


           | SPACE  | _SP


In [20]:
#3. Provide a frequency list of POS tags from the entire document
from collections import Counter

pos_counts = Counter([token.pos_ for token in doc])

for pos, freq in pos_counts.items():
    print(f"{pos}: {freq}")

DET: 90
PROPN: 75
ADP: 124
PUNCT: 172
NUM: 8
SPACE: 99
ADV: 65
SCONJ: 20
NOUN: 173
PRON: 108
VERB: 131
ADJ: 54
CCONJ: 61
AUX: 50
PART: 28


In [21]:
#4. CHALLENGE: What percentage of tokens are nouns?
total_tokens = len([token for token in doc if token.is_alpha])
noun_tokens = len([token for token in doc if token.pos_ == "NOUN"])

percentage_nouns = (noun_tokens / total_tokens) * 100
print(f"Percentage of nouns: {percentage_nouns:.2f}%")


Percentage of nouns: 17.98%


In [25]:
#5.Display the Dependency Parse for the third sentence.
displacy.render(third_sentence, style="dep", jupyter=True)

In [26]:
#6. Show the first two named entities from Beatrix Potter's The Tale of Peter Rabbit
for ent in doc.ents[:2]:
    print(f"Entity: {ent.text}, Label: {ent.label_}, Description: {spacy.explain(ent.label_)}")

Entity: The Tale of Peter Rabbit, Label: WORK_OF_ART, Description: Titles of books, songs, etc.
Entity: Beatrix Potter, Label: PERSON, Description: People, including fictional


In [27]:
#7. How many sentences are contained in The Tale of Peter Rabbit? 
print(f"Number of sentences: {len(sentences)}")

Number of sentences: 57


In [28]:
#8.CHALLENGE: How many sentences contain named entities?
sentences_with_ents = [sent for sent in sentences if any(tok.ent_type_ for tok in sent)]
print(f"Number of sentences with named entities: {len(sentences_with_ents)}")


Number of sentences with named entities: 38


In [None]:
#9. Display the named entity visualization for list_of_sents[0] from the previous problem
displacy.render(sentences[0], style="ent", jupyter=True)  # If you're in Jupyter Notebook

# Or serve in browser:
displacy.serve(sentences[0], style="ent")




Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

