# Load transformer model

In [2]:
import spacy
nlp_trf = spacy.load("en_core_web_trf")

nlp_bg = spacy.load("en_core_web_lg")


  from .autonotebook import tqdm as notebook_tqdm


# Pass text through model

In [3]:
text = 'I love paris. There are 10245565 parisians each of whom weigh 61.08348 kgs on average. The average radius is 143.8957349182391 cm.'
 
doc_trf = nlp_trf(text)
doc_bg = nlp_bg(text)

# Print Tokens

In [4]:
list_trf = []
list_bg = []

#print tokens
for token in doc_trf:
    print(token.text)
    list_trf.append(token.text)

print('#############')

#print tokens
for token in doc_bg:
    print(token.text)
    list_bg.append(token.text)
    
print(len(list_trf), len(list_bg))

#check if the two lists are same
print(list_trf == list_bg)


I
love
paris
.
There
are
10245565
parisians
each
of
whom
weigh
61.08348
kgs
on
average
.
The
average
radius
is
143.8957349182391
cm
.
#############
I
love
paris
.
There
are
10245565
parisians
each
of
whom
weigh
61.08348
kgs
on
average
.
The
average
radius
is
143.8957349182391
cm
.
24 24
True


# Print entities detected

In [5]:
for ent in doc_trf.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

paris 7 12 NORP
10245565 24 32 CARDINAL
parisians 33 42 NORP
61.08348 kgs 62 74 QUANTITY
143.8957349182391 cm 109 129 QUANTITY


# Load libraries

In [6]:
import transformers
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

# Load model and tokenizer

In [7]:
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-large-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-large-NER")

# Pass example input

In [8]:
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = "My name is Wolfgang Schaffer and I live in Berlin"

ner_results = nlp(example)

# Analyze outputs

In [9]:
#print tokens
for token in ner_results:
    print(token)
    print(token['word'], token['entity'])

{'entity': 'B-PER', 'score': 0.99859005, 'index': 4, 'word': 'Wolfgang', 'start': 11, 'end': 19}
Wolfgang B-PER
{'entity': 'I-PER', 'score': 0.99945515, 'index': 5, 'word': 'Sc', 'start': 20, 'end': 22}
Sc I-PER
{'entity': 'I-PER', 'score': 0.9952127, 'index': 6, 'word': '##ha', 'start': 22, 'end': 24}
##ha I-PER
{'entity': 'I-PER', 'score': 0.9401913, 'index': 7, 'word': '##ffer', 'start': 24, 'end': 28}
##ffer I-PER
{'entity': 'B-LOC', 'score': 0.99870574, 'index': 12, 'word': 'Berlin', 'start': 43, 'end': 49}
Berlin B-LOC


In [10]:
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-large-NER")

In [11]:
print(type(model))

<class 'transformers.models.bert.modeling_bert.BertForTokenClassification'>


In [12]:
print(type(model.classifier))

<class 'torch.nn.modules.linear.Linear'>


In [13]:
print(model.classifier)

Linear(in_features=1024, out_features=9, bias=True)


In [16]:
import torch.nn as nn

model.classifier = nn.Linear(model.classifier.in_features, 25)

In [17]:
print(model.classifier)

Linear(in_features=1024, out_features=25, bias=True)


In [19]:
print(model.config.num_labels)

9
