In [1]:
from adaptnlp import EasyTokenTagger
from pprint import pprint

# Examples of using EasyTokenTagger

In [2]:
# Set example text and instantiate tagger instance
example_text = '''Novetta Solutions is the best. Albert Einstein used to be employed at Novetta Solutions. 
The Wright brothers loved to visit the JBF headquarters, and they would have a chat with Albert.'''
tagger = EasyTokenTagger()

# With Transformers

In [3]:
sentences = tagger.tag_text(text=example_text, model_name_or_path="sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english")

2020-08-31 04:23:50,022 loading file sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english


Predicting text: 100%|██████████| 1/1 [00:00<00:00, 164.10it/s]


In [4]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    pprint(sen)

List string outputs of tags:

[{'entity_group': 'I-LOC',
  'offsets': (-1, 2),
  'score': 0.11716681718826294,
  'word': '[CLS] Novetta'},
 {'entity_group': 'B-ORG',
  'offsets': (2, 3),
  'score': 0.11758644878864288,
  'word': 'Solutions'},
 {'entity_group': 'I-LOC',
  'offsets': (3, 5),
  'score': 0.11716681718826294,
  'word': 'is the'},
 {'entity_group': 'B-ORG',
  'offsets': (5, 6),
  'score': 0.11758644878864288,
  'word': 'best'},
 {'entity_group': 'I-LOC',
  'offsets': (6, 13),
  'score': 0.11716681718826294,
  'word': '. Albert Einstein used to be employed'},
 {'entity_group': 'B-ORG',
  'offsets': (13, 15),
  'score': 0.11758644878864288,
  'word': 'at Nov'},
 {'entity_group': 'I-LOC',
  'offsets': (15, 24),
  'score': 0.11716681718826294,
  'word': '##etta Solutions . The Wright brothers loved to visit'},
 {'entity_group': 'B-ORG',
  'offsets': (24, 25),
  'score': 0.11758644878864288,
  'word': 'the'},
 {'entity_group': 'I-LOC',
  'offsets': (25, 27),
  'score': 0.11716681

# With Flair 

## Named Entity Recognition

In [5]:
# Tag the string
sentences = tagger.tag_text(text = example_text, model_name_or_path = "ner-ontonotes")

2020-08-31 04:23:55,368 loading file /home/andrew/.flair/models/en-ner-ontonotes-v0.4.pt


In [6]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <B-ORG> Solutions <E-ORG> is the best . Albert <B-PERSON> Einstein <E-PERSON> used to be employed at Novetta <B-ORG> Solutions <E-ORG> . The Wright <S-PERSON> brothers loved to visit the JBF <S-ORG> headquarters , and they would have a chat with Albert <S-PERSON> .


In [7]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

Span [1,2]: "Novetta Solutions"   [− Labels: ORG (0.9644)]
Span [7,8]: "Albert Einstein"   [− Labels: PERSON (0.9969)]
Span [14,15]: "Novetta Solutions"   [− Labels: ORG (0.9796)]
Span [18]: "Wright"   [− Labels: PERSON (0.9995)]
Span [24]: "JBF"   [− Labels: ORG (0.9898)]
Span [34]: "Albert"   [− Labels: PERSON (0.9999)]


In [8]:
print("Get json of tagged information:\n")
for sen in sentences:
    pprint(sen.to_dict(tag_type="ner"))

Get json of tagged information:

{'entities': [{'end_pos': 17,
               'labels': [ORG (0.9644)],
               'start_pos': 0,
               'text': 'Novetta Solutions'},
              {'end_pos': 46,
               'labels': [PERSON (0.9969)],
               'start_pos': 31,
               'text': 'Albert Einstein'},
              {'end_pos': 87,
               'labels': [ORG (0.9796)],
               'start_pos': 70,
               'text': 'Novetta Solutions'},
              {'end_pos': 100,
               'labels': [PERSON (0.9995)],
               'start_pos': 94,
               'text': 'Wright'},
              {'end_pos': 132,
               'labels': [ORG (0.9898)],
               'start_pos': 129,
               'text': 'JBF'},
              {'end_pos': 185,
               'labels': [PERSON (0.9999)],
               'start_pos': 179,
               'text': 'Albert'}],
 'labels': [],
 'text': 'Novetta Solutions is the best. Albert Einstein used to be employed '
         

## Parts of Speech

In [9]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "pos")

2020-08-31 04:23:58,151 loading file /home/andrew/.flair/models/en-pos-ontonotes-v0.5.pt


In [10]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <NNP> Solutions <NNPS> is <VBZ> the <DT> best <JJS> . <.> Albert <NNP> Einstein <NNP> used <VBD> to <TO> be <VB> employed <VBN> at <IN> Novetta <NNP> Solutions <NNPS> . <.> The <DT> Wright <NNP> brothers <NNS> loved <VBD> to <TO> visit <VB> the <DT> JBF <NNP> headquarters <NN> , <,> and <CC> they <PRP> would <MD> have <VB> a <DT> chat <NN> with <IN> Albert <NNP> . <.>


In [11]:
print("List text/entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("pos"):
        print(entity)

List text/entities tagged:

Span [1]: "Novetta"   [− Labels: NNP (0.9998)]
Span [2]: "Solutions"   [− Labels: NNPS (0.8235)]
Span [3]: "is"   [− Labels: VBZ (1.0)]
Span [4]: "the"   [− Labels: DT (1.0)]
Span [5]: "best"   [− Labels: JJS (0.9996)]
Span [6]: "."   [− Labels: . (0.9995)]
Span [7]: "Albert"   [− Labels: NNP (1.0)]
Span [8]: "Einstein"   [− Labels: NNP (1.0)]
Span [9]: "used"   [− Labels: VBD (0.9981)]
Span [10]: "to"   [− Labels: TO (0.9999)]
Span [11]: "be"   [− Labels: VB (1.0)]
Span [12]: "employed"   [− Labels: VBN (0.9971)]
Span [13]: "at"   [− Labels: IN (1.0)]
Span [14]: "Novetta"   [− Labels: NNP (1.0)]
Span [15]: "Solutions"   [− Labels: NNPS (0.6877)]
Span [16]: "."   [− Labels: . (0.5807)]
Span [17]: "The"   [− Labels: DT (1.0)]
Span [18]: "Wright"   [− Labels: NNP (0.9999)]
Span [19]: "brothers"   [− Labels: NNS (1.0)]
Span [20]: "loved"   [− Labels: VBD (1.0)]
Span [21]: "to"   [− Labels: TO (0.9994)]
Span [22]: "visit"   [− Labels: VB (1.0)]
Span [23]: "the" 

In [12]:
print("Get json of tagged information:\n")
for sen in sentences:
    pprint(sen.to_dict(tag_type="pos"))

Get json of tagged information:

{'entities': [{'end_pos': 7,
               'labels': [NNP (0.9998)],
               'start_pos': 0,
               'text': 'Novetta'},
              {'end_pos': 17,
               'labels': [NNPS (0.8235)],
               'start_pos': 8,
               'text': 'Solutions'},
              {'end_pos': 20,
               'labels': [VBZ (1.0)],
               'start_pos': 18,
               'text': 'is'},
              {'end_pos': 24,
               'labels': [DT (1.0)],
               'start_pos': 21,
               'text': 'the'},
              {'end_pos': 29,
               'labels': [JJS (0.9996)],
               'start_pos': 25,
               'text': 'best'},
              {'end_pos': 30,
               'labels': [. (0.9995)],
               'start_pos': 29,
               'text': '.'},
              {'end_pos': 37,
               'labels': [NNP (1.0)],
               'start_pos': 31,
               'text': 'Albert'},
              {'end_pos': 46,
  

## Chunk

In [None]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "chunk")

2020-08-31 04:24:03,605 https://nlp.informatik.hu-berlin.de/resources/models/chunk/en-chunk-conll2000-v0.4.pt not found in cache, downloading to /tmp/tmp5b7eqcx0


  6%|▋         | 16029696/249034168 [00:04<00:46, 5063404.31B/s]

In [12]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <B-NP> Solutions <E-NP> is <S-VP> the <B-NP> best <E-NP> . Albert <B-NP> Einstein <E-NP> used <B-VP> to <I-VP> be <I-VP> employed <E-VP> at <S-PP> Novetta <B-NP> Solutions <E-NP> . The <B-NP> Wright <I-NP> brothers <E-NP> loved <B-VP> to <I-VP> visit <E-VP> the <B-NP> JBF <I-NP> headquarters <E-NP> , and they <S-NP> would <B-VP> have <E-VP> a <B-NP> chat <E-NP> with <S-PP> Albert <S-NP> .


In [13]:
print("List text/entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("np"):
        print(entity)

List text/entities tagged:

NP-span [1,2]: "Novetta Solutions"
VP-span [3]: "is"
NP-span [4,5]: "the best"
NP-span [7,8]: "Albert Einstein"
VP-span [9,10,11,12]: "used to be employed"
PP-span [13]: "at"
NP-span [14,15]: "Novetta Solutions"
NP-span [17,18,19]: "The Wright brothers"
VP-span [20,21,22]: "loved to visit"
NP-span [23,24,25]: "the JBF headquarters"
NP-span [28]: "they"
VP-span [29,30]: "would have"
NP-span [31,32]: "a chat"
PP-span [33]: "with"
NP-span [34]: "Albert"


## Frame

In [14]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "frame")

2020-02-14 07:05:45,281 loading file /home/ubuntu/.flair/models/en-frame-ontonotes-v0.4.pt


In [15]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <_> Solutions <_> is <be.01> the <_> best <_> . <_> Albert <_> Einstein <_> used <use.03> to <_> be <be.03> employed <employ.01> at <_> Novetta <_> Solutions <_> . <_> The <_> Wright <_> brothers <_> loved <love.02> to <_> visit <visit.01> the <_> JBF <_> headquarters <_> , <_> and <_> they <_> would <_> have <have.03> a <_> chat <chat.01> with <_> Albert <_> . <_>


## Fast Sequence Labeling
#### Make sure to check out some of Flair's "fast" cpu-minded models that produce similar results to SOTA models

### NER

In [16]:
# Tag the string
sentences = tagger.tag_text(text = example_text, model_name_or_path = "ner-ontonotes-fast")

2020-02-14 07:05:46,098 loading file /home/ubuntu/.flair/models/en-ner-ontonotes-fast-v0.4.pt


In [17]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

ORG-span [1,2]: "Novetta Solutions"
PERSON-span [7,8]: "Albert Einstein"
ORG-span [14,15]: "Novetta Solutions"
PERSON-span [18]: "Wright"
ORG-span [24]: "JBF"
PERSON-span [34]: "Albert"


## Tag Tokens with All Loaded Models At Once

#### The tagger now has multiple models loaded after tagging all of the previous text
#### This means with one method call of `tag_all(text)`, we can tag the text with all the loaded models

In [18]:
sentences = tagger.tag_all(example_text)

In [19]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

ORG-span [1,2]: "Novetta Solutions"
PERSON-span [7,8]: "Albert Einstein"
ORG-span [14,15]: "Novetta Solutions"
PERSON-span [18]: "Wright"
ORG-span [24]: "JBF"
PERSON-span [34]: "Albert"


In [20]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("pos"):
        print(entity)

List entities tagged:

PROPN-span [1]: "Novetta"
PROPN-span [2]: "Solutions"
VERB-span [3]: "is"
DET-span [4]: "the"
ADJ-span [5]: "best"
PUNCT-span [6]: "."
PROPN-span [7]: "Albert"
PROPN-span [8]: "Einstein"
VERB-span [9]: "used"
PART-span [10]: "to"
VERB-span [11]: "be"
VERB-span [12]: "employed"
ADP-span [13]: "at"
PROPN-span [14]: "Novetta"
PROPN-span [15]: "Solutions"
PUNCT-span [16]: "."
DET-span [17]: "The"
PROPN-span [18]: "Wright"
NOUN-span [19]: "brothers"
VERB-span [20]: "loved"
PART-span [21]: "to"
VERB-span [22]: "visit"
DET-span [23]: "the"
PROPN-span [24]: "JBF"
NOUN-span [25]: "headquarters"
PUNCT-span [26]: ","
CCONJ-span [27]: "and"
PRON-span [28]: "they"
AUX-span [29]: "would"
VERB-span [30]: "have"
DET-span [31]: "a"
NOUN-span [32]: "chat"
ADP-span [33]: "with"
PROPN-span [34]: "Albert"
PUNCT-span [35]: "."


In [21]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("np"):
        print(entity)

List entities tagged:

NP-span [1,2]: "Novetta Solutions"
VP-span [3]: "is"
NP-span [4,5]: "the best"
NP-span [7,8]: "Albert Einstein"
VP-span [9,10,11,12]: "used to be employed"
PP-span [13]: "at"
NP-span [14,15]: "Novetta Solutions"
NP-span [17,18,19]: "The Wright brothers"
VP-span [20,21,22]: "loved to visit"
NP-span [23,24,25]: "the JBF headquarters"
NP-span [28]: "they"
VP-span [29,30]: "would have"
NP-span [31,32]: "a chat"
PP-span [33]: "with"
NP-span [34]: "Albert"
