In [1]:
import nltk
from nltk.tokenize import word_tokenize

text =  "This is one simple example for Natural language programming using POS Tagging."

In [2]:
nltk.download('universal_tagset')

[nltk_data] Downloading package universal_tagset to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!


True

In [3]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
displacy.render(doc, style = "dep")

In [4]:
displacy.render(doc, style = "ent")

In [5]:
from nltk.tokenize import TreebankWordTokenizer as twt

tokens = twt().tokenize(text)
tags = nltk.pos_tag(tokens, tagset = "universal")
tags

[('This', 'DET'),
 ('is', 'VERB'),
 ('one', 'NUM'),
 ('simple', 'ADJ'),
 ('example', 'NOUN'),
 ('for', 'ADP'),
 ('Natural', 'NOUN'),
 ('language', 'NOUN'),
 ('programming', 'VERB'),
 ('using', 'VERB'),
 ('POS', 'NOUN'),
 ('Tagging', 'NOUN'),
 ('.', '.')]

In [6]:
span_generator = twt().span_tokenize(text)
spans = [span for span in span_generator]
spans

[(0, 4),
 (5, 7),
 (8, 11),
 (12, 18),
 (19, 26),
 (27, 30),
 (31, 38),
 (39, 47),
 (48, 59),
 (60, 65),
 (66, 69),
 (70, 77),
 (77, 78)]

In [7]:
pos_tags = ["PRON", "VERB", "NOUN", "ADJ", "ADP", "ADV", "CONJ", "DET", "NUM", "PRT"]

ents = []
for tag, span in zip(tags, spans):
    if tag[1] in pos_tags:
        ents.append({"start" : span[0],
                     "end" : span[1],
                     "label" : tag[1]})
ents

[{'start': 0, 'end': 4, 'label': 'DET'},
 {'start': 5, 'end': 7, 'label': 'VERB'},
 {'start': 8, 'end': 11, 'label': 'NUM'},
 {'start': 12, 'end': 18, 'label': 'ADJ'},
 {'start': 19, 'end': 26, 'label': 'NOUN'},
 {'start': 27, 'end': 30, 'label': 'ADP'},
 {'start': 31, 'end': 38, 'label': 'NOUN'},
 {'start': 39, 'end': 47, 'label': 'NOUN'},
 {'start': 48, 'end': 59, 'label': 'VERB'},
 {'start': 60, 'end': 65, 'label': 'VERB'},
 {'start': 66, 'end': 69, 'label': 'NOUN'},
 {'start': 70, 'end': 77, 'label': 'NOUN'}]

In [8]:
doc = {"text" : text, "ents" :ents} 

colors = {"PRON": "blueviolet",
              "VERB": "lightpink",
              "NOUN": "turquoise",
              "ADJ" : "lime",
              "ADP" : "khaki",
              "ADV" : "orange",
              "CONJ" : "cornflowerblue",
              "DET" : "forestgreen",
              "NUM" : "salmon",
              "PRT" : "yellow"}
    
options = {"ents" : pos_tags, "colors" : colors}

In [9]:
displacy.render(doc, 
                    style = "ent",     
                    options = options,  
                    manual = True,     
                   )