In [None]:
# default_exp data.token_classification

In [None]:
#hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# data.token_classification

> This module contains the bits required to use the fastai DataBlock API and/or mid-level data processing pipelines to organize your data for token classification tasks (e.g., NER or named entity recognition, etc...).

In [None]:
#export
import ast
from functools import reduce

import torch
from transformers import *
from fastai.text.all import *

from blurr.utils import *
from blurr.data.core import *

In [None]:
#hide
import pdb

from nbdev.showdoc import *
from fastcore.test import *

In [None]:
#cuda
torch.cuda.set_device(1)
print(f'Using GPU #{torch.cuda.current_device()}: {torch.cuda.get_device_name()}')

Using GPU #1: GeForce GTX 1080 Ti


## Token classification tokenization, batch transform, and DataBlock methods

Token classification tasks attempt to predict a class for each token.  The idea is similar to that in image segmentation models where the objective is to predict a class for each pixel.  Such models are common in building named entity recognition (NER) systems.

In [None]:
# ensures these cols are represented as lists (rather than string)
df_converters = {'tokens': ast.literal_eval, 'labels': ast.literal_eval, 'nested-labels': ast.literal_eval}

path = Path('./')
germ_eval_df = pd.read_csv(path/'germeval2014_sample.csv', converters=df_converters); len(germ_eval_df)

1000

In [None]:
#hide
# for idx, el in germ_eval_df.iterrows():
#     print (el['tokens'])
#     print (el['labels'])
#     print('---------------')

In [None]:
labels = sorted(list(set([lbls for sublist in germ_eval_df.labels.tolist() for lbls in sublist])))
print(labels)

['B-LOC', 'B-LOCderiv', 'B-LOCpart', 'B-ORG', 'B-ORGpart', 'B-OTH', 'B-OTHderiv', 'B-OTHpart', 'B-PER', 'B-PERderiv', 'B-PERpart', 'I-LOC', 'I-LOCderiv', 'I-ORG', 'I-ORGpart', 'I-OTH', 'I-PER', 'O']


In [None]:
task = HF_TASKS_AUTO.TokenClassification

pretrained_model_name = "bert-base-multilingual-cased"
n_labels = len(labels)

hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name, 
                                                                               task=task,
                                                                               config_kwargs={'num_labels': n_labels})
hf_arch, type(hf_config), type(hf_tokenizer), type(hf_model)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

('bert',
 transformers.configuration_bert.BertConfig,
 transformers.tokenization_bert.BertTokenizer,
 transformers.modeling_bert.BertForTokenClassification)

Below, we define a new class and transform for token classification targets/predictions.

In [None]:
#export
class HF_TokenTensorCategory(TensorBase): pass

In [None]:
#export
class HF_TokenCategorize(Transform):
    "Reversible transform of a list of category string to `vocab` id"
    
    def __init__(self, vocab=None, ignore_token=None, ignore_token_id=None):  
        self.vocab = None if vocab is None else CategoryMap(vocab)
        self.ignore_token = '[xIGNx]' if ignore_token is None else ignore_token
        self.ignore_token_id = CrossEntropyLossFlat().ignore_index if ignore_token_id is None else ignore_token_id
        
        self.loss_func, self.order = CrossEntropyLossFlat(ignore_index=self.ignore_token_id), 1

    def setups(self, dsets):
        if self.vocab is None and dsets is not None: self.vocab = CategoryMap(dsets)
        self.c = len(self.vocab)

    def encodes(self, labels):
        ids = [[self.vocab.o2i[lbl]] + [self.ignore_token_id]*(n_subtoks-1) for lbl, n_subtoks in labels] 
        return HF_TokenTensorCategory(reduce(operator.concat, ids))
    
    def decodes(self, encoded_labels): 
        return Category([(self.vocab[lbl_id]) for lbl_id in encoded_labels if lbl_id != self.ignore_token_id ])

`HF_TokenCategorize` modifies the fastai `Categorize` transform in a couple of ways.  First, it allows your targets to consist of a `Category` ***per*** token, and second, it uses the idea of an `ignore_token` to mask subtokens that don't need a prediction.  For example, the target of special tokens (e.g., pad, cls, sep) are set to `ignore_token` as are subsequent sub-tokens of a given token should more than 1 sub-token make it up.

In [None]:
#export
def HF_TokenCategoryBlock(vocab=None, ignore_token=None, ignore_token_id=None):
    "`TransformBlock` for single-label categorical targets"
    return TransformBlock(type_tfms=HF_TokenCategorize(vocab=vocab, 
                                                       ignore_token=ignore_token,
                                                       ignore_token_id=ignore_token_id))

In [None]:
show_doc(HF_TokenCategoryBlock)

<h4 id="HF_TokenCategoryBlock" class="doc_header"><code>HF_TokenCategoryBlock</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>HF_TokenCategoryBlock</code>(**`vocab`**=*`None`*, **`ignore_token`**=*`None`*, **`ignore_token_id`**=*`None`*)

`TransformBlock` for single-label categorical targets

Again, we define a custom class, `HF_TokenClassInput`, for the @typedispatched methods to use so that we can override how token classification inputs/targets are assembled, as well as, how the data is shown via methods like `show_batch` and `show_results`.

In [None]:
#export
class HF_TokenClassInput(list): pass

In [None]:
#export
class HF_TokenClassBatchTransform(HF_BatchTransform):
    def __init__(self, hf_arch, hf_tokenizer, ignore_token_id=None, **kwargs):
        super().__init__(hf_arch, hf_tokenizer, HF_TokenClassInput, **kwargs)
        self.ignore_token_id = CrossEntropyLossFlat().ignore_index if ignore_token_id is None else ignore_token_id
        
    def encodes(self, samples):  
        samples = super().encodes(samples)
        if (len(samples[0]) == 1): return samples
        
        target_cls = type(samples[0][1])
        updated_samples = []
        
        # we assume that first target = the categories we want to predict for each token
        for s in samples:
            targ_len = len(s[1])
            idx_first_input_id = s[0]['special_tokens_mask'].tolist().index(0)
            targ_ids = target_cls([ self.ignore_token_id if (el == 1 or idx > targ_len) 
                                   else s[1][idx-idx_first_input_id].item() 
                                   for idx, el in enumerate(s[0]['special_tokens_mask']) ])

            updated_samples.append((s[0], targ_ids))
        
        return updated_samples

`HF_TokenClassBatchTransform` is used to turn any targets we don't want to include in the loss calcuation (e.g. padding, cls, sep, etc...).

In [None]:
hf_batch_tfm = HF_TokenClassBatchTransform(hf_arch, hf_tokenizer)

blocks = (
    HF_TextBlock(hf_arch, hf_tokenizer, is_pretokenized=True,
                 hf_batch_tfm=hf_batch_tfm,
                 tok_kwargs={ 'return_special_tokens_mask': True }), 
    HF_TokenCategoryBlock(vocab=labels)
)

def get_y(inp):
    return [ (label, len(hf_tokenizer.tokenize(str(entity)))) for entity, label in zip(inp.tokens, inp.labels) ]

dblock = DataBlock(blocks=blocks, 
                   get_x=ColReader('tokens'),
                   get_y=get_y,
                   splitter=RandomSplitter())

Note in the example above we had to define a `get_y` in order to return both the entity we want to predict a category for, as well as, how many subtokens are used by the `hf_tokenizer` to represent it.  This is necessary for the input/target alignment discussed above.

In [None]:
# dblock.summary(test_df)

In [None]:
dls = dblock.dataloaders(germ_eval_df, bs=4)

In [None]:
b = dls.one_batch()

In [None]:
len(b), b[0]['input_ids'].shape, b[1].shape

(2, torch.Size([4, 27]), torch.Size([4, 27]))

In [None]:
#export
@typedispatch
def show_batch(x:HF_TokenClassInput, y, samples, dataloaders=None, ctxs=None, max_n=6, **kwargs):  
    hf_tokenizer = dataloaders.valid.hf_tokenizer
    
    res = L()
    for inp, trg, sample in zip(x[0], y, samples):
        # recontstruct the string and split on space to get back your pre-tokenized list of tokens
        toks = hf_tokenizer.convert_ids_to_tokens(inp, skip_special_tokens=True)
        pretokenized_toks =  hf_tokenizer.convert_tokens_to_string(toks).split()

        res.append([f'{[ (tok, lbl) for tok, lbl in zip(pretokenized_toks, ast.literal_eval(sample[1])) ]}'])
        
    display_df(pd.DataFrame(res, columns=['token / target label'])[:max_n])
    return ctxs

In [None]:
dls.show_batch(dataloaders=dls, max_n=2)

Unnamed: 0,token / target label
0,"[('Josef', 'B-PER'), ('Hlávka', 'I-PER'), ('war', 'O'), ('eine', 'O'), ('Zentralgestalt', 'O'), ('des', 'O'), ('kulturellen', 'O'), ('Lebens', 'O'), ('der', 'O'), ('Tschechen', 'B-LOCderiv'), ('im', 'O'), ('ausgehenden', 'O'), ('19', 'O'), ('.', 'O'), ('Jahrhundert', 'O')]"
1,"[('Ich', 'O'), ('finde', 'O'), (',', 'O'), ('durch', 'O'), ('externe', 'O'), ('Aktionen', 'O'), (',', 'O'), ('hier', 'O'), ('eher', 'O'), ('Worte', 'O'), ('wie', 'O'), ('""', 'O'), ('üble', 'O'), ('Nachrede', 'O'), ('oder', 'O'), ('Geschäftsschädigung', 'O'), ('""', 'O'), ('angebracht', 'O'), ('!', 'O')]"


## Tests

The tests below to ensure the core DataBlock code above works for **all** pretrained token classification models available in huggingface.  These tests are excluded from the CI workflow because of how long they would take to run and the amount of data that would be required to download.

**Note**: Feel free to modify the code below to test whatever pretrained classification models you are working with ... and if any of your pretrained token classification models fail, please submit a github issue *(or a PR if you'd like to fix it yourself)*

In [None]:
BLURR_MODEL_HELPER.get_models(task='TokenClassification')

[transformers.modeling_albert.AlbertForTokenClassification,
 transformers.modeling_auto.AutoModelForTokenClassification,
 transformers.modeling_bert.BertForTokenClassification,
 transformers.modeling_camembert.CamembertForTokenClassification,
 transformers.modeling_distilbert.DistilBertForTokenClassification,
 transformers.modeling_electra.ElectraForTokenClassification,
 transformers.modeling_longformer.LongformerForTokenClassification,
 transformers.modeling_mobilebert.MobileBertForTokenClassification,
 transformers.modeling_roberta.RobertaForTokenClassification,
 transformers.modeling_xlm.XLMForTokenClassification,
 transformers.modeling_xlm_roberta.XLMRobertaForTokenClassification,
 transformers.modeling_xlnet.XLNetForTokenClassification]

In [None]:
pretrained_model_names = [
    'albert-base-v1',
    'bert-base-multilingual-cased',
    'camembert-base',
    'distilbert-base-uncased',
    'monologg/electra-small-finetuned-imdb',
    'allenai/longformer-base-4096',
    'google/mobilebert-uncased',
    'roberta-base',
    'xlm-mlm-en-2048',
    'xlm-roberta-base',
    'xlnet-base-cased'
]

In [None]:
#slow
#hide_output
task = HF_TASKS_AUTO.TokenClassification
test_results = []

def get_y(inp, tokenizer):
        return [ (label, len(tokenizer.tokenize(str(entity)))) for entity, label in zip(inp.tokens, inp.labels) ]

for model_name in pretrained_model_names:
    error=None
    
    print(f'=== {model_name} ===\n')
    
    hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(model_name, task=task)
    
    print(f'architecture:\t{hf_arch}\ntokenizer:\t{type(hf_tokenizer).__name__}\n')
    
    hf_batch_tfm = HF_TokenClassBatchTransform(hf_arch, hf_tokenizer)

    blocks = (
        HF_TextBlock(hf_arch, hf_tokenizer, is_pretokenized=True, padding='max_length', max_length=128,
                     hf_batch_tfm=hf_batch_tfm,
                     tok_kwargs={ 'return_special_tokens_mask': True }), 
        HF_TokenCategoryBlock(vocab=labels)
    )

    dblock = DataBlock(blocks=blocks, 
                       get_x=ColReader('tokens'),
                       get_y= lambda inp: [ (label, len(hf_tokenizer.tokenize(str(entity)))) for entity, label in zip(inp.tokens, inp.labels) ],
                       splitter=RandomSplitter())
    
    dls = dblock.dataloaders(germ_eval_df, bs=4)
    b = dls.one_batch()
    
    try:
        print('*** TESTING DataLoaders ***\n')
        test_eq(len(b), 2)
        test_eq(len(b[0]['input_ids']), 4)
        test_eq(b[0]['input_ids'].shape, torch.Size([4, 128]))
        test_eq(len(b[1]), 4)

        if (hasattr(hf_tokenizer, 'add_prefix_space')):
            test_eq(dls.tfms[0].kwargs['add_prefix_space'], True)

        test_results.append((hf_arch, type(hf_tokenizer).__name__, model_name, 'PASSED', ''))
        dls.show_batch(dataloaders=dls, max_n=2)
        
    except Exception as err:
        test_results.append((hf_arch, type(hf_tokenizer).__name__, model_name, 'FAILED', err))

=== albert-base-v1 ===



Some weights of the model checkpoint at albert-base-v1 were not used when initializing AlbertForTokenClassification: ['predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.decoder.bias']
- This IS expected if you are initializing AlbertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing AlbertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at albert-base-v1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably

architecture:	albert
tokenizer:	AlbertTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('beschreibung', 'O'), ('rostow', 'B-PER'), ('beschreibt', 'O'), ('in', 'O'), ('seinem', 'O'), ('buch', 'O'), ('eine', 'O'), ('systematische', 'O'), ('abfolge', 'O'), ('von', 'O'), ('funf', 'O'), ('stufen', 'O'), ('in', 'O'), ('der', 'O'), ('wirtschaftlichen', 'O'), ('entwicklung', 'O'), ('.', 'O')]"
1,"[('spater', 'O'), ('widmete', 'O'), ('er', 'O'), ('sich', 'O'), ('mehr', 'O'), ('und', 'O'), ('mehr', 'O'), ('der', 'O'), ('landwirtschaft', 'O'), ('.', 'O')]"


=== bert-base-multilingual-cased ===



Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

architecture:	bert
tokenizer:	BertTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('Um', 'O'), ('444', 'O'), ('Sitze', 'O'), ('bewerben', 'O'), ('sich', 'O'), ('landesweit', 'O'), ('knapp', 'O'), ('5300', 'O'), ('Kandidaten', 'O'), ('.', 'O')]"
1,"[('Arbeitsgruppen', 'O'), ('für', 'O'), ('die', 'O'), ('verschiedenen', 'O'), ('Tätigkeitsbereiche', 'O'), ('der', 'O'), ('Parlamentsverwaltungen', 'O'), ('werden', 'O'), ('eingerichtet', 'O'), ('.', 'O')]"


=== camembert-base ===



Some weights of the model checkpoint at camembert-base were not used when initializing CamembertForTokenClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing CamembertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing CamembertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of CamembertForTokenClassification were not initialized from the model checkpoint at camembert-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream tas

architecture:	camembert
tokenizer:	CamembertTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('Der', 'O'), ('notenbeste', 'O'), ('Zweitligaspieler', 'O'), ('(', 'O'), ('2,91', 'O'), (')', 'O'), (',', 'O'), ('der', 'O'), ('seine', 'O'), ('persnliche', 'O'), ('Bilanz', 'O'), ('auf', 'O'), ('sieben', 'O'), ('Tore', 'O'), ('und', 'O'), ('13', 'O'), ('Assists', 'O'), ('aufstockte', 'O'), ('und', 'O'), ('schon', 'O'), ('vor', 'O'), ('Wochen', 'O'), ('seinen', 'O'), ('Wechsel', 'O'), ('zu', 'O'), ('Dortmund', 'B-ORG'), ('bekannt', 'O'), ('gegeben', 'O'), ('hatte', 'O'), (',', 'O'), ('war', 'O'), ('zuletzt', 'O'), ('in', 'O'), ('Mainz', 'B-LOC'), ('in', 'O'), ('die', 'O'), ('Kritik', 'O'), ('geraten', 'O'), ('.', 'O')]"
1,"[('Ab', 'O'), ('dem', 'O'), ('zweiten', 'O'), ('Jahr', 'O'), ('sind', 'O'), ('die', 'O'), ('Entwicklungen', 'O'), ('der', 'O'), ('Korbwerte', 'O'), ('fr', 'O'), ('die', 'O'), ('Berechnung', 'O'), ('der', 'O'), ('Kuponhhe', 'O'), ('mageblich', 'O'), ('.', 'O')]"


=== distilbert-base-uncased ===



Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN t

architecture:	distilbert
tokenizer:	DistilBertTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('das', 'O'), ('sei', 'O'), ('ihr', 'O'), ('ausdrucklicher', 'O'), ('wunsch', 'O'), ('gewesen', 'O'), ('.', 'O')]"
1,"[('das', 'O'), ('wage', 'O'), ('ich', 'O'), ('mal', 'O'), ('ganz', 'O'), ('stark', 'O'), ('zu', 'O'), ('bezweifeln', 'O'), ('.', 'O')]"


=== monologg/electra-small-finetuned-imdb ===

architecture:	electra
tokenizer:	ElectraTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('stand', 'O'), (':', 'O'), ('fruhjahr', 'O'), ('2008', 'O'), ('seit', 'O'), ('dem', 'O'), ('19', 'O'), ('.', 'O'), ('september', 'O'), ('2006', 'O'), ('ist', 'O'), ('der', 'O'), ('fußballklub', 'B-ORG'), ('sorkhpoushan', 'I-ORG'), ('delvar', 'I-ORG'), ('afzar', 'O'), ('das', 'O'), ('reserveteam', 'B-ORG'), ('persepolis', 'I-ORG'), ('teherans', 'O'), ('-', 'B-ORG'), ('sorkhpoushan', 'O'), ('spielt', 'O'), ('in', 'O'), ('der', 'O'), ('azadegan', 'O'), ('league', 'O'), ('(', 'O')]"
1,"[('barauszahlungen', 'O'), ('sind', 'O'), ('grundsatzlich', 'O'), ('nicht', 'O'), ('moglich', 'O'), ('.', 'O')]"


=== allenai/longformer-base-4096 ===



Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForTokenClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing LongformerForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing LongformerForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of LongformerForTokenClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN

architecture:	longformer
tokenizer:	LongformerTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('Neben', 'O'), ('dem', 'O'), ('Anstieg', 'O'), ('der', 'O'), ('Gewalt', 'O'), ('-', 'O'), ('614', 'O'), ('rechte', 'O'), ('Taten', 'O'), (',', 'O'), ('zehn', 'O'), ('Prozent', 'O'), ('mehr', 'O'), ('als', 'O'), ('2004', 'O'), (',', 'O'), ('wurden', 'O'), ('den', 'O'), ('neun', 'O'), ('Beratungsstellen', 'O'), ('gemeldet', 'O'), ('-', 'O'), ('registrieren', 'O'), ('die', 'O'), ('Berater', 'O'), ('zwei', 'O'), ('Trends', 'O'), ('.', 'O')]"
1,"[('Gogol', 'B-ORG'), ('Bordello', 'I-ORG'), ('Karten', 'O'), ('zu', 'O'), ('verkaufen', 'O'), ('!', 'O')]"


=== google/mobilebert-uncased ===



Some weights of the model checkpoint at google/mobilebert-uncased were not used when initializing MobileBertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing MobileBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing MobileBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MobileBertForTokenClassificati

architecture:	mobilebert
tokenizer:	MobileBertTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('ebenfalls', 'O'), ('musste', 'O'), ('eine', 'O'), ('klotzbremse', 'O'), ('eingebaut', 'O'), ('worden', 'O'), ('sein', 'O'), (',', 'O'), ('deren', 'O'), ('ausfuhrung', 'O'), ('ist', 'O'), ('nicht', 'O'), ('bekannt', 'O'), ('.', 'O')]"
1,"[('schwarzer', 'O'), ('humor', 'O'), ('britischer', 'B-LOCderiv'), ('pragung', 'O'), ('spielte', 'O'), ('eine', 'O'), ('große', 'O'), ('rolle', 'O'), ('.', 'O')]"


=== roberta-base ===



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able

architecture:	roberta
tokenizer:	RobertaTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('Das', 'O'), ('ESP', 'O'), ('klopft', 'O'), ('dem', 'O'), ('Fahrer', 'O'), ('recht', 'O'), ('früh', 'O'), ('auf', 'O'), ('die', 'O')]"
1,"[('Die', 'O'), ('auf', 'O'), ('etwa', 'O'), ('zehn', 'O'), ('Hektar', 'O'), ('verbliebene', 'O'), ('Campingstadt', 'O'), ('ist', 'O'), ('immer', 'O'), ('noch', 'O'), ('eine', 'O'), ('infrastrukturell', 'O'), ('autarke', 'O'), ('Enklave', 'O'), ('abseits', 'O'), ('des', 'O'), ('historischen', 'O'), ('Ortes', 'O'), ('.', 'O')]"


=== xlm-mlm-en-2048 ===



Some weights of the model checkpoint at xlm-mlm-en-2048 were not used when initializing XLMForTokenClassification: ['pred_layer.proj.weight', 'pred_layer.proj.bias']
- This IS expected if you are initializing XLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing XLMForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMForTokenClassification were not initialized from the model checkpoint at xlm-mlm-en-2048 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


architecture:	xlm
tokenizer:	XLMTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('interrail', 'B-ORG'), ('global', 'O'), ('pass', 'O'), ('der', 'O'), ('interrail', 'B-ORG'), ('global', 'O'), ('pass', 'O'), ('gilt', 'O'), ('fur', 'O'), ('mehrere', 'O'), ('europaische', 'B-LOCderiv'), ('bahnunternehmen', 'O'), ('in', 'O'), ('32', 'O'), ('landern', 'O'), ('in', 'O'), ('europa', 'B-LOC'), (':', 'O'), ('der', 'O'), ('pass', 'O'), ('ist', 'O'), ('jedoch', 'O'), ('nicht', 'O'), ('im', 'O'), ('eigenen', 'O'), ('wohnsitzland', 'O'), ('gultig', 'O'), ('.', 'O')]"
1,"[('zum', 'O'), ('fahnenmast', 'O'), ('hin', 'O'), ('sind', 'O'), ('sie', 'O'), ('gleichseitig', 'O'), (',', 'O'), ('und', 'O'), ('zur', 'O'), ('anderen', 'O'), ('seite', 'O'), ('hin', 'O'), ('doppelt', 'O'), ('so', 'O'), ('lang', 'O'), ('.', 'O')]"


=== xlm-roberta-base ===



Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-st

architecture:	xlm_roberta
tokenizer:	XLMRobertaTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,"[('Anni', 'B-PER'), ('Krahnstöver', 'I-PER'), ('gehörte', 'O'), ('1946/47', 'O'), ('dem', 'O'), ('zweiten', 'O'), ('ernannten', 'O'), ('Landtag', 'O'), ('von', 'O'), ('Schleswig-Holstein', 'B-LOC'), ('an', 'O'), ('und', 'O'), ('anschließend', 'O'), ('bis', 'O'), ('zum', 'O'), ('4.', 'O'), ('Januar', 'O'), ('1948', 'O'), ('dem', 'O'), ('ersten', 'O'), ('gewählten', 'O'), ('Landtag', 'O'), ('.', 'O')]"
1,"[('In', 'O'), ('den', 'O'), ('Jahren', 'O'), ('1991', 'O'), ('bis', 'O'), ('1996', 'O'), ('bekleidete', 'O'), ('er', 'O'), ('die', 'O'), ('Funktion', 'O'), ('des', 'O'), ('Direktors', 'O'), ('des', 'O'), ('International', 'B-ORG'), ('Film', 'I-ORG'), ('Festival', 'I-ORG'), ('Rotterdam', 'I-ORG'), ('und', 'O'), ('des', 'O'), ('Hubert', 'B-ORG'), ('Bals', 'I-ORG'), ('Fonds', 'I-ORG'), (',', 'O'), ('mit', 'O'), ('dem', 'O'), ('Filmproduktionen', 'O'), ('in', 'O'), ('Entwicklungsländern', 'O'), ('gefördert', 'O'), ('werden', 'O'), ('.', 'O')]"


=== xlnet-base-cased ===



Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForTokenClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing XLNetForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForTokenClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


architecture:	xlnet
tokenizer:	XLNetTokenizer

*** TESTING DataLoaders ***



Unnamed: 0,token / target label
0,[]
1,"[('Der', 'O'), ('niedersachsische', 'B-LOCderiv')]"


In [None]:
#slow
#hide_input
test_results_df = pd.DataFrame(test_results, columns=['arch', 'tokenizer', 'model_name', 'result', 'error'])
display_df(test_results_df)

Unnamed: 0,arch,tokenizer,model_name,result,error
0,albert,AlbertTokenizer,albert-base-v1,PASSED,
1,bert,BertTokenizer,bert-base-multilingual-cased,PASSED,
2,camembert,CamembertTokenizer,camembert-base,PASSED,
3,distilbert,DistilBertTokenizer,distilbert-base-uncased,PASSED,
4,electra,ElectraTokenizer,monologg/electra-small-finetuned-imdb,PASSED,
5,longformer,LongformerTokenizer,allenai/longformer-base-4096,PASSED,
6,mobilebert,MobileBertTokenizer,google/mobilebert-uncased,PASSED,
7,roberta,RobertaTokenizer,roberta-base,PASSED,
8,xlm,XLMTokenizer,xlm-mlm-en-2048,PASSED,
9,xlm_roberta,XLMRobertaTokenizer,xlm-roberta-base,PASSED,


## Cleanup

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_utils.ipynb.
Converted 01_data-core.ipynb.
Converted 01a_data-token-classification.ipynb.
Converted 01b_data-question-answering.ipynb.
Converted 01e_data-summarization.ipynb.
Converted 01z_data-language-modeling.ipynb.
Converted 02_modeling-core.ipynb.
Converted 02a_modeling-token-classification.ipynb.
Converted 02b_modeling-question-answering.ipynb.
Converted 02e_modeling-summarization.ipynb.
Converted 02z_modeling-language-modeling.ipynb.
Converted index.ipynb.
