# Install Transformers libraries 

In [1]:
# Transformers installation
! pip install transformers datasets
# To install from source instead of the last release, comment the command above and uncomment the following one.
# ! pip install git+https://github.com/huggingface/transformers.git

Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 datasets-2.12.0 dill-0.3.6 frozenlist-1.3.3 huggingface-hub-0.14.1 multidict-6.0.4 multiprocess-0.70.14 responses-0.18.0 tokenizers-0.13.3 transformers-4.28.1 xxhash-3.2.0 yarl-1.9.2


## Load PhoNER-COVID_19 dataset

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install datasets

In [None]:
from datasets import Dataset

In [32]:
import json
import pandas as pd

In [None]:
#Read training set 
trainWord = [json.loads(line) for line in open('gdrive/MyDrive/Ct550/word/train_syllable.json', 'r', encoding='utf-8')]
trainWordData = pd.DataFrame(trainWord)
# trainWordData = trainWordData.rename(columns={'words':'source_text', 'tags':'target_text'})

In [None]:
trainWordData

In [None]:
dataset[0]

convert labels to ids with dictionary 'label2id'

In [None]:
label2id = {
    "O":0,
    "B-PATIENT_ID":1,
    "I-PATIENT_ID": 2,
    "B-NAME":3,
    "I-NAME":4,
    "B-AGE":5,
    "I-AGE": 6,
    "B-GENDER":7,
    "I-GENDER":8,
    "B-JOB":9,
    "I-JOB":10,
    "B-LOCATION":11,
    "I-LOCATION":12,
    "B-ORGANIZATION":13,
    "I-ORGANIZATION":14,
    "B-SYMPTOM_AND_DISEASE":15,
    "I-SYMPTOM_AND_DISEASE":16,
    "B-TRANSPORTATION":17,
    "I-TRANSPORTATION":18,
    "B-DATE":19,
    "I-DATE":20
}

In [None]:
labels = []
for each in trainWordData['tags']:
  label = [label2id[entity] for entity in each]
  labels.append(label)

In [None]:
labels[0]

create an object and make it a dataset

In [None]:
dict_obj = {'inputs':trainWordData['words'], 'labels':labels}
dataset = Dataset.from_dict(dict_obj)

In [None]:
dataset[0]

{'inputs': ['ƒê·ªìng',
  'th·ªùi',
  ',',
  'b·ªánh',
  'vi·ªán',
  'ti·∫øp',
  't·ª•c',
  'th·ª±c',
  'hi·ªán',
  'c√°c',
  'bi·ªán',
  'ph√°p',
  'ph√≤ng',
  'ch·ªëng',
  'd·ªãch',
  'b·ªánh',
  'COVID',
  '-',
  '19',
  'theo',
  'h∆∞·ªõng',
  'd·∫´n',
  'c·ªßa',
  'B·ªô',
  'Y',
  't·∫ø',
  '.'],
 'labels': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  13,
  14,
  14,
  0]}

# load pretrained tokenizer

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [None]:
example = wnut["train"][0]
tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
tokens

In [None]:
example = dataset[0]
input_ids = tokenizer(example['inputs'], is_split_into_words=True)
tokens = tokenizer.convert_ids_to_tokens(input_ids['input_ids']) 
tokens

In [None]:
dataset

Dataset({
    features: ['inputs', 'labels'],
    num_rows: 5027
})

# handle mismatch betwwen words and labels

In [None]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["inputs"], truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples['labels']):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# tokenize dataset with map method 

In [None]:
vt5_dataset = dataset.map(tokenize_and_align_labels, batched=True)

Map:   0%|          | 0/5027 [00:00<?, ? examples/s]

In [None]:
vt5_dataset

Dataset({
    features: ['inputs', 'labels', 'input_ids', 'attention_mask'],
    num_rows: 5027
})

Now create a batch of examples using [DataCollatorWithPadding](https://huggingface.co/docs/transformers/main/en/main_classes/data_collator#transformers.DataCollatorWithPadding). It's more efficient to *dynamically pad* the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.

In [None]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

## Evaluate

Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the ü§ó [Evaluate](https://huggingface.co/docs/evaluate/index) library. For this task, load the [seqeval](https://huggingface.co/spaces/evaluate-metric/seqeval) framework (see the ü§ó Evaluate [quick tour](https://huggingface.co/docs/evaluate/a_quick_tour) to learn more about how to load and compute a metric). Seqeval actually produces several scores: precision, recall, F1, and accuracy.

In [None]:
!pip install evaluate

Installing collected packages: evaluate
Successfully installed evaluate-0.4.0


In [None]:
!pip install seqeval

  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16180 sha256=23adb2f7fb8731018cfc61aae453d8f4c87183c677f6d8b388c04f354dc90bf1
  Stored in directory: /root/.cache/pip/wheels/e2/a5/92/2c80d1928733611c2747a9820e1324a6835524d9411510c142
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [None]:
import evaluate

seqeval = evaluate.load("seqeval")

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

Get the NER labels first, and then create a function that passes your true predictions and true labels to [compute](https://huggingface.co/docs/evaluate/main/en/package_reference/main_classes#evaluate.EvaluationModule.compute) to calculate the scores:

In [None]:
# labels = [label_list[i] for i in example[f"ner_tags"]]
# labels

TypeError: ignored

In [None]:
# import numpy as np

# labels = [label_list[i] for i in example[f"ner_tags"]]


# def compute_metrics(p):
#     predictions, labels = p
#     predictions = np.argmax(predictions, axis=2)

#     true_predictions = [
#         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
#         for prediction, label in zip(predictions, labels)
#     ]
#     true_labels = [
#         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
#         for prediction, label in zip(predictions, labels)
#     ]

#     results = seqeval.compute(predictions=true_predictions, references=true_labels)
#     return {
#         "precision": results["overall_precision"],
#         "recall": results["overall_recall"],
#         "f1": results["overall_f1"],
#         "accuracy": results["overall_accuracy"],
#     }

Your `compute_metrics` function is ready to go now, and you'll return to it when you setup your training.

## Train

Before you start training your model, create a map of the expected ids to their labels with `id2label` and `label2id`:

In [None]:
id2label = {
    0: "O",
    1: "B-PATIENT_ID",
    2: "I-PATIENT_ID",
    3: "B-NAME",
    4: "I-NAME",
    5: "B-AGE",
    6: "I-AGE",
    7: "B-GENDER",
    8: "I-GENDER",
    9: "B-JOB",
    10: "I-JOB",
    11: "B-LOCATION",
    12: "I-LOCATION",
    13: "B-ORGANIZATION",
    14: "I-ORGANIZATION",
    15: "B-SYMPTOM_AND_DISEASE",
    16: "I-SYMPTOM_AND_DISEASE",
    17: "B-TRANSPORTATION",
    18: "I-TRANSPORTATION",
    19: "B-DATE",
    20: "I-DATE"
}
label2id = {
    "O":0,
    "B-PATIENT_ID":1,
    "I-PATIENT_ID": 2,
    "B-NAME":3,
    "I-NAME":4,
    "B-AGE":5,
    "I-AGE": 6,
    "B-GENDER":7,
    "I-GENDER":8,
    "B-JOB":9,
    "I-JOB":10,
    "B-LOCATION":11,
    "I-LOCATION":12,
    "B-ORGANIZATION":13,
    "I-ORGANIZATION":14,
    "B-SYMPTOM_AND_DISEASE":15,
    "I-SYMPTOM_AND_DISEASE":16,
    "B-TRANSPORTATION":17,
    "I-TRANSPORTATION":18,
    "B-DATE":19,
    "I-DATE":20
}

<Tip>

If you aren't familiar with finetuning a model with the [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer), take a look at the basic tutorial [here](https://huggingface.co/docs/transformers/main/en/tasks/../training#train-with-pytorch-trainer)!

</Tip>

You're ready to start training your model now! Load DistilBERT with [AutoModelForTokenClassification](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForTokenClassification) along with the number of expected labels, and the label mappings:

# load BERT model

In [None]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

model = AutoModelForTokenClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=21, id2label=id2label, label2id=label2id
)

In [None]:
model.to('cuda')

At this point, only three steps remain:

1. Define your training hyperparameters in [TrainingArguments](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments). The only required parameter is `output_dir` which specifies where to save your model. You'll push this model to the Hub by setting `push_to_hub=True` (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) will evaluate the seqeval scores and save the training checkpoint.
2. Pass the training arguments to [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) along with the model, dataset, tokenizer, data collator, and `compute_metrics` function.
3. Call [train()](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.train) to finetune your model.

# train model with 30 epochs,....

In [None]:
training_args = TrainingArguments(
    output_dir="gdrive/MyDrive/Ct550/checkpoints",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=30,
    weight_decay=0.01,
    # evaluation_strategy="epoch",
    save_strategy="epoch",
    # load_best_model_at_end=True,
    # push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=vt5_dataset,
    # eval_dataset=tokenized_wnut["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    # compute_metrics=compute_metrics,
)

trainer.train()

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.4067
1000,0.1219
1500,0.0686
2000,0.045
2500,0.0309
3000,0.0204
3500,0.0158
4000,0.0102
4500,0.0089
5000,0.0077


TrainOutput(global_step=9450, training_loss=0.04042253855044249, metrics={'train_runtime': 1592.687, 'train_samples_per_second': 94.689, 'train_steps_per_second': 5.933, 'total_flos': 4347914296353786.0, 'train_loss': 0.04042253855044249, 'epoch': 30.0})

## Inference

Great, now that you've finetuned a model, you can use it for inference!

Grab some text you'd like to run inference on:

In [4]:
text = "Ng√†y 15 / 3 , Vi·ªán Pasteur Th√†nh ph·ªë H·ªì Ch√≠ Minh v·ª´a c√¥ng b·ªë tr∆∞·ªùng h·ª£p b·ªánh nh√¢n 13 d∆∞∆°ng t√≠nh v·ªõi Covid - 19 , b·ªánh nh√¢n hi·ªán c√¥ng t√°c t·∫°i C·∫ßn Th∆° ."

The simplest way to try out your finetuned model for inference is to use it in a [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline). Instantiate a `pipeline` for NER with your model, and pass your text to it:

In [3]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForTokenClassification.from_pretrained("gdrive/MyDrive/Ct550/checkpoints/checkpoint-9450")

In [None]:
model.to('cpu')

In [None]:
from transformers import pipeline

# classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
classifier = pipeline("ner", model=model, tokenizer=tokenizer)
classifier(text)

You can also manually replicate the results of the `pipeline` if you'd like:

Tokenize the text and return PyTorch tensors:

# inference of a single example

In [6]:
import torch 

In [None]:
def true_word_labels(tokens, labels): 
  word_labels = []
  token_list = tokens[1:len(tokens)]
  label_list = labels[1:len(labels)]

  j = 0
  for i in range(len(token_list)-1): 
    ch = token_list[i][0:1]
    if ch == '#':
      j += 1
    elif ch == '/':
      if token_list[i+1].isdigit():
        j += 1
    elif ch.isdigit():
      if token_list[i-1] == '/' or token_list[i-1] == '.':
        j += 1
      else:
        word_labels.append(label_list[j])
        j += 1
    elif ch == '.' and token_list[i+1] != '[SEP]':
      j += 1
    else:
      word_labels.append(label_list[j])
      j += 1
  return word_labels

In [None]:
from transformers import AutoTokenizer
from transformers import AutoModelForTokenClassification
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForTokenClassification.from_pretrained("gdrive/MyDrive/Ct550/checkpoints/checkpoint-9450")
# inputs = tokenizer(text, return_tensors="pt")

In [None]:
model.to('cuda')

In [12]:
text = input("Sentence: ")
text = text.split()
inputs = tokenizer(text, return_tensors="pt", is_split_into_words=True)
with torch.no_grad():
    logits = model(**inputs).logits

predictions = torch.argmax(logits, dim=2)
predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]

Sentence: Th√†nh ph·ªë C·∫ßn Th∆°


# create a method which transforms tokens to words with their true labels

In [13]:
ids = inputs['input_ids'][0]
tokens = tokenizer.convert_ids_to_tokens(ids)
for i in range(len(ids)): 
  print (tokens[i], "-", predicted_token_class[i])

[CLS] - O
than - B-LOCATION
##h - I-LOCATION
ph - I-LOCATION
##o - I-LOCATION
can - I-LOCATION
tho - I-LOCATION
[SEP] - I-NAME


In [48]:
textt = ['Th√†nh', 'ph·ªë', 'C·∫ßn', 'Th∆°', 'l√†', 'th√†nh', 'ph·ªë', 'A']
labell = ['B-LOCATION', 'I-LOCATION', 'I-LOCATION', 'I-LOCATION', 'O', 'O', 'O', 'B-LOCATION']

def toTokenLevel(text, label):
  n_labell = []
  for i in range(len(text)):
    tokenized_each = tokenizer(text[i]).input_ids
    real_tokens = tokenized_each[1:len(tokenized_each)-1]
    # print (real_tokens)
    tag = label[i][2:] #LOCATION
    
    for j in range(len(real_tokens)):
      if label[i][0:1] == 'B':
        if j == 0:
          n_labell.append('B-'+tag)
        else:
          n_labell.append('I-'+tag)
      else:
        n_labell.append(label[i])
  return n_labell

In [49]:
print (toTokenLevel(textt, labell))

['B-LOCATION', 'I-LOCATION', 'I-LOCATION', 'I-LOCATION', 'I-LOCATION', 'I-LOCATION', 'O', 'O', 'O', 'O', 'O', 'B-LOCATION']


In [None]:
# print result with words and their labels
label = true_word_labels(tokens, predicted_token_class)
sentence = text
for i in range(len(sentence)):
  print (sentence[i], "_", label[i])
# printdata = {}
# printdata['sentence'] = sentence 
# printdata['label'] = label 
# pd.set_option('display.max_columns', 0)
# pd.DataFrame(printdata).T

ƒê√¢y _ O
l√† _ O
c∆° _ O
s·ªü _ O
y _ O
t·∫ø _ O
th·ª© _ O
8 _ O
·ªü _ O
H√† _ B-LOCATION
N·ªôi _ I-LOCATION
ph·∫£i _ O
c√°ch _ O
ly _ O
s·ªë _ O
l∆∞·ª£ng _ O
l·ªõn _ O
nh√¢n _ O
vi√™n _ O
ho·∫∑c _ O
phong _ O
to·∫£ _ O
khu _ O
v·ª±c _ O
/ _ O
to√†n _ O
b·ªô _ O
b·ªánh _ O
vi·ªán _ O
sau _ O
khi _ O
ph√°t _ O
hi·ªán _ O
c√≥ _ O
b·ªánh _ O
nh√¢n _ O
ho·∫∑c _ O
c√≥ _ O
ti·∫øp _ O
x√∫c _ O
v·ªõi _ O
b·ªánh _ O
nh√¢n _ O


IndexError: ignored

# evaluation 

# first load devaluation dataset



In [33]:
eval_dataset = [json.loads(line) for line in open('gdrive/MyDrive/Ct550/word/dev_syllable.json', 'r', encoding='utf-8')]
eval_dataset = pd.DataFrame(eval_dataset)

In [56]:
word, tag

(['B√°c',
  'sƒ©',
  'Nguy·ªÖn',
  'Trung',
  'Nguy√™n',
  ',',
  'Gi√°m',
  'ƒë·ªëc',
  'Trung',
  't√¢m',
  'Ch·ªëng',
  'ƒë·ªôc',
  ',',
  'B·ªánh',
  'vi·ªán',
  'B·∫°ch',
  'Mai',
  ',',
  'cho',
  'bi·∫øt',
  'b·ªánh',
  'nh√¢n',
  'ƒë∆∞·ª£c',
  'chuy·ªÉn',
  'ƒë·∫øn',
  'b·ªánh',
  'vi·ªán',
  'ng√†y',
  '7/3',
  ',',
  'ch·∫©n',
  'ƒëo√°n',
  'ng·ªô',
  'ƒë·ªôc',
  'thu·ªëc',
  'ƒëi·ªÅu',
  'tr·ªã',
  's·ªët',
  'r√©t',
  'chloroquine',
  '.'],
 ['O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-ORGANIZATION',
  'I-ORGANIZATION',
  'I-ORGANIZATION',
  'I-ORGANIZATION',
  'I-ORGANIZATION',
  'I-ORGANIZATION',
  'I-ORGANIZATION',
  'I-ORGANIZATION',
  'I-ORGANIZATION',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-DATE',
  'O',
  'O',
  'O',
  'B-SYMPTOM_AND_DISEASE',
  'I-SYMPTOM_AND_DISEASE',
  'I-SYMPTOM_AND_DISEASE',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O'])

In [53]:
word = eval_dataset['words'][0]
tag = eval_dataset['tags'][0]
new_word = tokenizer.convert_ids_to_tokens(tokenizer(word, is_split_into_words=True).input_ids)
new_tag = toTokenLevel(word, tag)
len(new_tag), len(new_word[1:-1])

(73, 73)

In [34]:
input_texts = [' '.join(sentence) for sentence in eval_dataset['words']]

In [35]:
input_texts[1024]

'Tr∆∞·ªõc ƒë√≥ s√°ng 10 - 4 , √¥ng ƒêo√†n VƒÉn Tr·ªçng - ch·ªß t·ªãch UBND huy·ªán M√™ Linh , cho bi·∫øt sau khi ghi nh·∫≠n 5 ca COVID - 19 , huy·ªán M√™ Linh ƒë√£ r√† so√°t , ti·∫øn h√†nh c√°ch ly v·ªõi 411 tr∆∞·ªùng h·ª£p thu·ªôc di·ªán F1 , trong ƒë√≥ c√≥ 361 ng∆∞·ªùi ·ªü x√£ M√™ Linh , c√≤n l·∫°i ·ªü x√£ kh√°c ; ho√†n t·∫•t r√† so√°t v·ªõi 653 ng∆∞·ªùi ti·∫øp x√∫c thu·ªôc di·ªán F2 , ƒë√£ √°p d·ª•ng bi·ªán ph√°p c√°ch ly , theo d√µi s·ª©c kho·∫ª .'

# load 2000 sentence to produce predictions

In [None]:
predicted_true_labels = []
for i in range(len(input_texts)):
  # print (i)
  texts = input_texts[i]
  inputs = tokenizer(texts, return_tensors='pt')

  with torch.no_grad():
    logits = model(**inputs).logits

  predictions = torch.argmax(logits, dim=2)
  predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]] 

  ids = inputs['input_ids'][0]
  tokens = tokenizer.convert_ids_to_tokens(ids) 
  predicted_true_labels.append(true_word_labels(tokens, predicted_token_class)) 

In [None]:
# pd.DataFrame(predicted_true_labels)
len(predicted_true_labels)

2000

In [None]:
references = [i for i in eval_dataset['tags']]

In [None]:
references

In [None]:
count = 0
rlabels = []
ractual = []
error = []
for i in range(2000):
  if len(predicted_true_labels[i]) == len(references[i]):
    rlabels.append(predicted_true_labels[i]) 
    ractual.append(references[i])
  # elif len(predicted_true_labels[i]) > len(references[i]):
  #   rlabels.append(predicted_true_labels[i][0:len(references[i])-1])
  #   ractual.append(references[i])
  #   print (len(predicted_true_labels[i]), len(references[i]))
  # else:
  #   error.append(input_texts[i])
  else:
    error.append(input_texts[i])

In [None]:
len(rlabels)

1841

In [None]:
len(error)

159

In [None]:
error

In [None]:
result = seqeval.compute(predictions=rlabels, references=ractual)
result

{'AGE': {'precision': 0.9898305084745763,
  'recall': 0.9668874172185431,
  'f1': 0.9782244556113904,
  'number': 302},
 'DATE': {'precision': 0.9619771863117871,
  'recall': 0.9902152641878669,
  'f1': 0.9758919961427194,
  'number': 1022},
 'GENDER': {'precision': 0.9672131147540983,
  'recall': 0.9711934156378601,
  'f1': 0.9691991786447639,
  'number': 243},
 'JOB': {'precision': 0.7619047619047619,
  'recall': 0.6597938144329897,
  'f1': 0.707182320441989,
  'number': 97},
 'LOCATION': {'precision': 0.8692185007974481,
  'recall': 0.896013152486642,
  'f1': 0.8824124671119207,
  'number': 2433},
 'NAME': {'precision': 0.9032258064516129,
  'recall': 0.6363636363636364,
  'f1': 0.7466666666666666,
  'number': 88},
 'ORGANIZATION': {'precision': 0.8020408163265306,
  'recall': 0.8103092783505155,
  'f1': 0.8061538461538462,
  'number': 485},
 'PATIENT_ID': {'precision': 0.9728171334431631,
  'recall': 0.9907718120805369,
  'f1': 0.9817123857024107,
  'number': 1192},
 'SYMPTOM_AND_D

In [None]:
# 1821 with the same length
result = seqeval.compute(predictions=rlabels, references=ractual)
result

{'AGE': {'precision': 0.9898305084745763,
  'recall': 0.9668874172185431,
  'f1': 0.9782244556113904,
  'number': 302},
 'DATE': {'precision': 0.9619771863117871,
  'recall': 0.9902152641878669,
  'f1': 0.9758919961427194,
  'number': 1022},
 'GENDER': {'precision': 0.9672131147540983,
  'recall': 0.9711934156378601,
  'f1': 0.9691991786447639,
  'number': 243},
 'JOB': {'precision': 0.7619047619047619,
  'recall': 0.6597938144329897,
  'f1': 0.707182320441989,
  'number': 97},
 'LOCATION': {'precision': 0.8692185007974481,
  'recall': 0.896013152486642,
  'f1': 0.8824124671119207,
  'number': 2433},
 'NAME': {'precision': 0.9032258064516129,
  'recall': 0.6363636363636364,
  'f1': 0.7466666666666666,
  'number': 88},
 'ORGANIZATION': {'precision': 0.8020408163265306,
  'recall': 0.8103092783505155,
  'f1': 0.8061538461538462,
  'number': 485},
 'PATIENT_ID': {'precision': 0.9728171334431631,
  'recall': 0.9907718120805369,
  'f1': 0.9817123857024107,
  'number': 1192},
 'SYMPTOM_AND_D

# tokenize evaldataset to token level and their labels for token level prediction 

In [None]:
eval_dataset['words'][0]

In [None]:
labelsss = []
j = 0
for i in range(len(eval_dataset['words'][0])):
  word = eval_dataset['words'][0][i]
  ids = tokenizer(word).input_ids
  token = tokenizer.convert_ids_to_tokens(ids)
  token = token[1:len(token)-1]
  print (token)

  first_word = 0
  for i in range(len(token)):
    if first_word == 0:
      labelsss.append(eval_dataset['tags'][0][i])
      first_word += 1
    else:
      abc = eval_dataset['tags'][0][i]
      abc = abc[2:]
      labelsss.append("I-".join(abc))
  j += 1