In [None]:
import spacy
nlp=spacy.load('en_core_web_sm')
 
text='It took me more than two hours to translate a few pages of English.'

for token in nlp(text):
 print(token.text, '=>',token.pos_,'=>',token.tag_)
#pos_ returns the universal POS tags, and tag_ returns detailed POS tags for words in the sentence.

It => PRON => PRP
took => VERB => VBD
me => PRON => PRP
more => ADJ => JJR
than => SCONJ => IN
two => NUM => CD
hours => NOUN => NNS
to => PART => TO
translate => VERB => VB
a => DET => DT
few => ADJ => JJ
pages => NOUN => NNS
of => ADP => IN
English => PROPN => NNP
. => PUNCT => .


In [None]:
import spacy
nlp=spacy.load('en_core_web_sm')

text='It took me more than two hours to translate a few pages of English.'

for token in nlp(text):
 print(token.text,'=>',token.dep_,'=>',token.head.text)
#dependency parsing - dep_ returns the dependency tag for a word, and head.text returns the respective head word
# word took has a dependency tag of ROOT. This tag is assigned to the word which acts as the head of many words in a sentence
#but is not a child of any other word. Generally, it is the main verb of the sentence similar to ‘took’ in this case.

It => nsubj => took
took => ROOT => took
me => dative => took
more => amod => two
than => quantmod => two
two => nummod => hours
hours => dobj => took
to => aux => translate
translate => xcomp => took
a => det => pages
few => amod => pages
pages => dobj => translate
of => prep => pages
English => pobj => of
. => punct => took


In [None]:
from spacy import displacy
displacy.render(nlp(text),jupyter=True)
#The tree generated by dependency parsing is known as a dependency tree. parsing means generating a parse tree.

In [None]:
#Constituency Parsing is the process of analyzing the sentences by breaking down it into sub-phrases also known as constituents. 
#These sub-phrases belong to a specific category of grammar like NP (noun phrase) and VP(verb phrase).
!pip install benepar

Collecting benepar
  Downloading benepar-0.2.0.tar.gz (33 kB)
Collecting torch-struct>=0.5
  Downloading torch_struct-0.5-py3-none-any.whl (34 kB)
Collecting tokenizers>=0.9.4
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 50.2 MB/s 
[?25hCollecting transformers[tokenizers,torch]>=4.2.2
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 46.7 MB/s 
Collecting sentencepiece>=0.1.91
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 53.3 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 53.0 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████

In [None]:
%tensorflow_version 1.x
import benepar
benepar.download('benepar_en3') #download the benerpar_en2 model.

[nltk_data] Downloading package benepar_en3 to /root/nltk_data...
[nltk_data]   Unzipping models/benepar_en3.zip.


True

In [None]:
from benepar.spacy_plugin import BeneparComponent
import spacy
# Loading spaCy’s en model and adding benepar model to its pipeline
nlp = spacy.load('en')
nlp.add_pipe(BeneparComponent('benepar_en3'))

text='It took me more than two hours to translate a few pages of English.'

# Generating a parse tree for the text
list(nlp(text).sents)[0]._.parse_string
# _.parse_string generates the parse tree in the form of string.

  'with `validate_args=False` to turn off validation.')


'(S (NP (NP (PRP It))) (VP (VBD took) (NP (PRP me)) (NP (QP (JJR more) (IN than) (CD two)) (NNS hours)) (S (VP (TO to) (VP (VB translate) (NP (NP (DT a) (JJ few) (NNS pages)) (PP (IN of) (NP (NNP English)))))))) (. .))'