In [1]:
from adaptnlp import EasyTokenTagger

# Examples of using EasyTokenTagger

In [2]:
# Set example text and instantiate tagger instance
example_text = '''Novetta Solutions is the best. Albert Einstein used to be employed at Novetta Solutions. 
The Wright brothers loved to visit the JBF headquarters, and they would have a chat with Albert.'''
tagger = EasyTokenTagger()

## Named Entity Recognition

In [3]:
# Tag the string (will dynamically download and load required models when `tag_***` methods are called)
sentences = tagger.tag_text(text = example_text, model_name_or_path = "ner-ontonotes")

2019-12-17 04:23:20,930 loading file /home/andrew/.flair/models/en-ner-ontonotes-v0.4.pt


In [4]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <B-ORG> Solutions <E-ORG> is the best. Albert <B-PERSON> Einstein <E-PERSON> used to be employed at Novetta <B-ORG> Solutions. <E-ORG> 
The Wright <S-PERSON> brothers loved to visit the JBF <S-ORG> headquarters, and they would have a chat with Albert. <S-PERSON>


In [5]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

ORG-span [1,2]: "Novetta Solutions"
PERSON-span [6,7]: "Albert Einstein"
ORG-span [13,14]: "Novetta Solutions."
PERSON-span [16]: "Wright"
ORG-span [22]: "JBF"
PERSON-span [31]: "Albert."


In [6]:
print("Get json of tagged information:\n")
for sen in sentences:
    print(sen.to_dict(tag_type="ner"),"\n")

Get json of tagged information:

{'text': 'Novetta Solutions is the best. Albert Einstein used to be employed at Novetta Solutions. \nThe Wright brothers loved to visit the JBF headquarters, and they would have a chat with Albert.', 'labels': [], 'entities': [{'text': 'Novetta Solutions', 'start_pos': 0, 'end_pos': 17, 'type': 'ORG', 'confidence': 0.9724629521369934}, {'text': 'Albert Einstein', 'start_pos': 31, 'end_pos': 46, 'type': 'PERSON', 'confidence': 0.9977981746196747}, {'text': 'Novetta Solutions.', 'start_pos': 70, 'end_pos': 88, 'type': 'ORG', 'confidence': 0.9176564514636993}, {'text': 'Wright', 'start_pos': 94, 'end_pos': 100, 'type': 'PERSON', 'confidence': 0.9994158744812012}, {'text': 'JBF', 'start_pos': 129, 'end_pos': 132, 'type': 'ORG', 'confidence': 0.9093995690345764}, {'text': 'Albert.', 'start_pos': 179, 'end_pos': 186, 'type': 'PERSON', 'confidence': 0.6420339345932007}]} 



## Parts of Speech

In [7]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "pos")

2019-12-17 04:23:25,198 loading file /home/andrew/.flair/models/en-pos-ontonotes-v0.4.pt


In [8]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <PROPN> Solutions <PROPN> is <VERB> the <DET> best. <ADJ> Albert <PROPN> Einstein <PROPN> used <VERB> to <PART> be <VERB> employed <VERB> at <ADP> Novetta <PROPN> Solutions. <PROPN> 
The <DET> Wright <PROPN> brothers <NOUN> loved <VERB> to <PART> visit <VERB> the <DET> JBF <PROPN> headquarters, <NOUN> and <CCONJ> they <PRON> would <AUX> have <VERB> a <DET> chat <NOUN> with <ADP> Albert. <PROPN>


In [9]:
print("List text/entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("pos"):
        print(entity)

List text/entities tagged:

PROPN-span [1]: "Novetta"
PROPN-span [2]: "Solutions"
VERB-span [3]: "is"
DET-span [4]: "the"
ADJ-span [5]: "best."
PROPN-span [6]: "Albert"
PROPN-span [7]: "Einstein"
VERB-span [8]: "used"
PART-span [9]: "to"
VERB-span [10]: "be"
VERB-span [11]: "employed"
ADP-span [12]: "at"
PROPN-span [13]: "Novetta"
PROPN-span [14]: "Solutions."
DET-span [15]: "
The"
PROPN-span [16]: "Wright"
NOUN-span [17]: "brothers"
VERB-span [18]: "loved"
PART-span [19]: "to"
VERB-span [20]: "visit"
DET-span [21]: "the"
PROPN-span [22]: "JBF"
NOUN-span [23]: "headquarters,"
CCONJ-span [24]: "and"
PRON-span [25]: "they"
AUX-span [26]: "would"
VERB-span [27]: "have"
DET-span [28]: "a"
NOUN-span [29]: "chat"
ADP-span [30]: "with"
PROPN-span [31]: "Albert."


In [10]:
print("Get json of tagged information:\n")
for sen in sentences:
    print(sen.to_dict(tag_type="pos"),"\n")

Get json of tagged information:

{'text': 'Novetta Solutions is the best. Albert Einstein used to be employed at Novetta Solutions. \nThe Wright brothers loved to visit the JBF headquarters, and they would have a chat with Albert.', 'labels': [], 'entities': [{'text': 'Novetta', 'start_pos': 0, 'end_pos': 7, 'type': 'PROPN', 'confidence': 0.9914647936820984}, {'text': 'Solutions', 'start_pos': 8, 'end_pos': 17, 'type': 'PROPN', 'confidence': 0.9967130422592163}, {'text': 'is', 'start_pos': 18, 'end_pos': 20, 'type': 'VERB', 'confidence': 0.9999997615814209}, {'text': 'the', 'start_pos': 21, 'end_pos': 24, 'type': 'DET', 'confidence': 0.9999990463256836}, {'text': 'best.', 'start_pos': 25, 'end_pos': 30, 'type': 'ADJ', 'confidence': 0.6421029567718506}, {'text': 'Albert', 'start_pos': 31, 'end_pos': 37, 'type': 'PROPN', 'confidence': 0.9979984164237976}, {'text': 'Einstein', 'start_pos': 38, 'end_pos': 46, 'type': 'PROPN', 'confidence': 0.9998689889907837}, {'text': 'used', 'start_pos':

## Chunk

In [11]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "chunk")

2019-12-17 04:23:26,424 loading file /home/andrew/.flair/models/en-chunk-conll2000-v0.4.pt


In [12]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <B-NP> Solutions <E-NP> is <S-VP> the <B-NP> best. <I-NP> Albert <I-NP> Einstein <E-NP> used <B-VP> to <I-VP> be <I-VP> employed <E-VP> at <S-PP> Novetta <B-NP> Solutions. <E-NP> 
The <B-NP> Wright <I-NP> brothers <E-NP> loved <B-VP> to <I-VP> visit <E-VP> the <B-NP> JBF <I-NP> headquarters, <E-NP> and they <S-NP> would <B-VP> have <E-VP> a <B-NP> chat <E-NP> with <S-PP> Albert. <S-NP>


In [13]:
print("List text/entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("np"):
        print(entity)

List text/entities tagged:

NP-span [1,2]: "Novetta Solutions"
VP-span [3]: "is"
NP-span [4,5,6,7]: "the best. Albert Einstein"
VP-span [8,9,10,11]: "used to be employed"
PP-span [12]: "at"
NP-span [13,14]: "Novetta Solutions."
NP-span [15,16,17]: "
The Wright brothers"
VP-span [18,19,20]: "loved to visit"
NP-span [21,22,23]: "the JBF headquarters,"
NP-span [25]: "they"
VP-span [26,27]: "would have"
NP-span [28,29]: "a chat"
PP-span [30]: "with"
NP-span [31]: "Albert."


## Frame

In [14]:
sentences = tagger.tag_text(text = example_text, model_name_or_path = "frame")

2019-12-17 04:23:26,847 loading file /home/andrew/.flair/models/en-frame-ontonotes-v0.4.pt


In [15]:
# See Results
print("List string outputs of tags:\n")
for sen in sentences:
    print(sen.to_tagged_string())

List string outputs of tags:

Novetta <_> Solutions <_> is <be.01> the <_> best. <_> Albert <_> Einstein <_> used <use.03> to <_> be <be.03> employed <employ.01> at <_> Novetta <_> Solutions. <_> 
The <_> Wright <_> brothers <_> loved <love.02> to <_> visit <visit.01> the <_> JBF <_> headquarters, <_> and <_> they <_> would <_> have <have.03> a <_> chat <chat.01> with <_> Albert. <_>


## Fast Sequence Labeling
#### This is for those that want to run faster sequence labeling with similar accuracy results.  Everything is done the same as above, just load in the FastEasySequenceTagger tagger instead of the EasySequenceTagger

### NER

In [16]:
# Tag the string (will dynamically download and load required models when `tag_***` methods are called)
# sentences = tagger.tag_ner_ontonotes(example_text) ***use this if you want more tag labels for ner***
sentences = tagger.tag_text(text = example_text, model_name_or_path = "ner-ontonotes-fast")

2019-12-17 04:23:27,367 loading file /home/andrew/.flair/models/en-ner-ontonotes-fast-v0.4.pt


In [17]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

ORG-span [1,2]: "Novetta Solutions"
PERSON-span [6,7]: "Albert Einstein"
ORG-span [13,14]: "Novetta Solutions."
PERSON-span [16]: "Wright"
ORG-span [22]: "JBF"


## Tag Tokens with All Loaded Models At Once

In [18]:
sentences = tagger.tag_all(example_text)

In [19]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("ner"):
        print(entity)

List entities tagged:

ORG-span [1,2]: "Novetta Solutions"
PERSON-span [6,7]: "Albert Einstein"
ORG-span [13,14]: "Novetta Solutions."
PERSON-span [16]: "Wright"
ORG-span [22]: "JBF"


In [20]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("pos"):
        print(entity)

List entities tagged:

PROPN-span [1]: "Novetta"
PROPN-span [2]: "Solutions"
VERB-span [3]: "is"
DET-span [4]: "the"
ADJ-span [5]: "best."
PROPN-span [6]: "Albert"
PROPN-span [7]: "Einstein"
VERB-span [8]: "used"
PART-span [9]: "to"
VERB-span [10]: "be"
VERB-span [11]: "employed"
ADP-span [12]: "at"
PROPN-span [13]: "Novetta"
PROPN-span [14]: "Solutions."
DET-span [15]: "
The"
PROPN-span [16]: "Wright"
NOUN-span [17]: "brothers"
VERB-span [18]: "loved"
PART-span [19]: "to"
VERB-span [20]: "visit"
DET-span [21]: "the"
PROPN-span [22]: "JBF"
NOUN-span [23]: "headquarters,"
CCONJ-span [24]: "and"
PRON-span [25]: "they"
AUX-span [26]: "would"
VERB-span [27]: "have"
DET-span [28]: "a"
NOUN-span [29]: "chat"
ADP-span [30]: "with"
PROPN-span [31]: "Albert."


In [21]:
print("List entities tagged:\n")
for sen in sentences:
    for entity in sen.get_spans("np"):
        print(entity)

List entities tagged:

NP-span [1,2]: "Novetta Solutions"
VP-span [3]: "is"
NP-span [4,5,6,7]: "the best. Albert Einstein"
VP-span [8,9,10,11]: "used to be employed"
PP-span [12]: "at"
NP-span [13,14]: "Novetta Solutions."
NP-span [15,16,17]: "
The Wright brothers"
VP-span [18,19,20]: "loved to visit"
NP-span [21,22,23]: "the JBF headquarters,"
NP-span [25]: "they"
VP-span [26,27]: "would have"
NP-span [28,29]: "a chat"
PP-span [30]: "with"
NP-span [31]: "Albert."
