# Que:1. Create a Doc object from the file peterrabbit.txt

In [46]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [47]:
with open("peterrabbit.txt","r") as file:
    text = file.read()

In [48]:
doc = nlp(text)

In [49]:
token = len(doc)
token

1258

# Que:2. For every token in the third sentence, print the token text, the POS tag, the fine-grained TAG tag, and the description of the fine-grained tag.

In [50]:
sent = len(list(doc.sents))
print(sent)

57


In [51]:
sent = list(doc.sents)

In [52]:
for token in sent[2]:
    print(f'{token.text:15} {token.pos_:10} {token.tag_:8} {spacy.explain(token.tag_)}')

They            PRON       PRP      pronoun, personal
lived           VERB       VBD      verb, past tense
with            ADP        IN       conjunction, subordinating or preposition
their           PRON       PRP$     pronoun, possessive
Mother          PROPN      NNP      noun, proper singular
in              ADP        IN       conjunction, subordinating or preposition
a               DET        DT       determiner
sand            NOUN       NN       noun, singular or mass
-               PUNCT      HYPH     punctuation mark, hyphen
bank            NOUN       NN       noun, singular or mass
,               PUNCT      ,        punctuation mark, comma
underneath      ADP        IN       conjunction, subordinating or preposition
the             DET        DT       determiner
root            NOUN       NN       noun, singular or mass
of              ADP        IN       conjunction, subordinating or preposition
a               DET        DT       determiner

               SPACE      _

# Que:3. Provide a frequency list of POS tags from the entire document

In [53]:
import spacy 
from collections import Counter

In [54]:
x = Counter([token.pos_ for token in doc])

In [55]:
print('Frequency of pos tag:/n')
for pos,freq in x.items():
    print(f'{pos: <10}: {freq}')

Frequency of pos tag:/n
DET       : 90
PROPN     : 75
ADP       : 124
PUNCT     : 172
NUM       : 8
SPACE     : 99
ADV       : 65
SCONJ     : 20
NOUN      : 173
PRON      : 108
VERB      : 131
ADJ       : 54
CCONJ     : 61
AUX       : 50
PART      : 28


# Que:4. CHALLENGE: What percentage of tokens are nouns?

In [56]:
tokens = [token for token in doc if not token.is_punct and not token.is_space]

In [57]:
noun = [token for token in tokens if token.pos_ in ["NOUN", "PROPN"]]

In [58]:
percentage = (len(noun) / len(tokens)) * 100 if tokens else 0

In [59]:
print(f"Nouns percentage: {percentage:.2f}%")

Nouns percentage: 25.08%


# Que:5. Display the Dependency Parse for the third sentence.

In [60]:
s=list(doc.sents)

In [61]:
ts=s[2]

In [62]:
print('3rd Sentence:',ts)

3rd Sentence: They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.




In [63]:
for token in ts:
    print(f'{token.text:12} {token.dep_:10} {token.pos_:8} {token.head.text:7}')

They         nsubj      PRON     lived  
lived        ROOT       VERB     lived  
with         prep       ADP      lived  
their        poss       PRON     Mother 
Mother       pobj       PROPN    with   
in           prep       ADP      lived  
a            det        DET      bank   
sand         compound   NOUN     bank   
-            punct      PUNCT    bank   
bank         pobj       NOUN     in     
,            punct      PUNCT    bank   
underneath   prep       ADP      bank   
the          det        DET      root   
root         pobj       NOUN     underneath
of           prep       ADP      root   
a            det        DET      tree   

            dep        SPACE    a      
very         advmod     ADV      big    
big          amod       ADJ      tree   
fir          compound   NOUN     tree   
-            punct      PUNCT    tree   
tree         pobj       NOUN     of     
.            punct      PUNCT    lived  


           dep        SPACE    .      


In [64]:
from spacy import displacy
displacy.render(ts,style='dep',jupyter=True)

# Que:6. Show the first two named entities from Beatrix Potter's The Tale of Peter Rabbit

In [65]:
e=[(ent.text,ent.label_) for ent in doc.ents]
print('First two named entities')
for ent_text,ent_label in e[:2]:
  print(f'{ent_text} |{ent_label}')

First two named entities
The Tale of Peter Rabbit |WORK_OF_ART
Beatrix Potter |PERSON


# Que:7. How many sentences are contained in The Tale of Peter Rabbit?

In [66]:
sentences=list(doc.sents)

In [67]:
print("No. of sentences:",len(sentences))

No. of sentences: 57


In [68]:
for i, sent in enumerate(sentences[:3], start=1):
    print(f"Sentence {i}= {sent.text.strip()}")

Sentence 1= The Tale of Peter Rabbit, by Beatrix Potter (1902).
Sentence 2= Once upon a time there were four little Rabbits, and their names
were--

          Flopsy,
       Mopsy,
   Cotton-tail,
and Peter.
Sentence 3= They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.


# Que:8. CHALLENGE: How many sentences contain named entities?

In [69]:
sentences = list(doc.sents)
entities = [sent for sent in sentences if any(ent.start >= sent.start and ent.end <= sent.end for ent in doc.ents)]

In [70]:
print(f"Total sentences: {len(sentences)}")
print(f"Sentences containing named entities: {len(entities)}")

Total sentences: 57
Sentences containing named entities: 38


In [71]:
for sent in entities:
    ents = [ent.text for ent in doc.ents if ent.start >= sent.start and ent.end <= sent.end]
    print(f"\nSentence: {sent.text.strip()}")
    print(f"Named Entities: {ents}")


Sentence: The Tale of Peter Rabbit, by Beatrix Potter (1902).
Named Entities: ['The Tale of Peter Rabbit', 'Beatrix Potter', '1902']

Sentence: Once upon a time there were four little Rabbits, and their names
were--

          Flopsy,
       Mopsy,
   Cotton-tail,
and Peter.
Named Entities: ['four', 'Mopsy', 'Cotton-tail', 'Peter']

Sentence: 'Now my dears,' said old Mrs. Rabbit one morning, 'you may go into
the fields or down the lane, but don't go into Mr. McGregor's garden:
your Father had an accident there; he was put in a pie by Mrs.
McGregor.'

'Now run along, and don't get into mischief.
Named Entities: ['Rabbit', 'one morning', 'McGregor', 'McGregor']

Sentence: Then old Mrs. Rabbit took a basket and her umbrella, and went through
the wood to the baker's.
Named Entities: ['Rabbit']

Sentence: She bought a loaf of brown bread and five
currant buns.
Named Entities: ['five']

Sentence: Flopsy, Mopsy, and Cottontail, who were good little bunnies, went
down the lane to gather black

# Que:9. Display the named entity visualization for list_of_sents[0] from the previous problem

In [72]:
l=list(doc.sents)

In [73]:
fs=l[0]
print("Sentence to visualise",fs.text)

Sentence to visualise The Tale of Peter Rabbit, by Beatrix Potter (1902).




In [74]:
from spacy import displacy
displacy.render(fs,style='ent',jupyter=True)

In [75]:
file.close()