# Setup

### Installs

In [1]:
!pip install datasets transformers seqeval
!pip install spacy 





### Imports

In [2]:
from datasets import Dataset, ClassLabel, Sequence, load_dataset, load_metric
import numpy as np
import pandas as pd
from spacy import displacy
import tensorflow as tf
from tensorflow import keras
import transformers
from transformers import (AutoModelForTokenClassification,
                          TFAutoModelForTokenClassification,
                          AutoTokenizer, 
                          DataCollatorForTokenClassification,
                          pipeline,
                          TrainingArguments, 
                          Trainer)

2023-04-04 00:13:21.861396: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-04 00:13:21.928481: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-04 00:13:21.930751: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-04 00:13:21.930756: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart d

# Data Preparation

In [3]:
ds = load_dataset("ade_corpus_v2", "Ade_corpus_v2_drug_ade_relation")
ds

Found cached dataset ade_corpus_v2 (/home/ddds/.cache/huggingface/datasets/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/1.0.0/940d61334dbfac6b01ac5d00286a2122608b8dc79706ee7e9206a1edb172c559)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'drug', 'effect', 'indexes'],
        num_rows: 6821
    })
})

In [4]:
ds["train"][0]

{'text': 'Intravenous azithromycin-induced ototoxicity.',
 'drug': 'azithromycin',
 'effect': 'ototoxicity',
 'indexes': {'drug': {'start_char': [12], 'end_char': [24]},
  'effect': {'start_char': [33], 'end_char': [44]}}}

In [5]:
# There are duplicate texts with different drug(s) and/or effect(s)
# Consolidate to prevent model giving different labels for the same sentence.

consolidated_dataset = {}

for row in ds["train"]:
    if row["text"] in consolidated_dataset:
        consolidated_dataset[row["text"]]["drug_indices_start"].update(row["indexes"]["drug"]["start_char"])
        consolidated_dataset[row["text"]]["drug_indices_end"].update(row["indexes"]["drug"]["end_char"])
        consolidated_dataset[row["text"]]["effect_indices_start"].update(row["indexes"]["effect"]["start_char"])
        consolidated_dataset[row["text"]]["effect_indices_end"].update(row["indexes"]["effect"]["end_char"])
        consolidated_dataset[row["text"]]["drug"].append(row["drug"])
        consolidated_dataset[row["text"]]["effect"].append(row["effect"])
        
    else:
        consolidated_dataset[row["text"]] = {
            "text": row["text"],
            "drug": [row["drug"]],
            "effect": [row["effect"]],
            # use sets because the indices can repeat for various reasons
            "drug_indices_start": set(row["indexes"]["drug"]["start_char"]),
            "drug_indices_end": set(row["indexes"]["drug"]["end_char"]),
            "effect_indices_start": set(row["indexes"]["effect"]["start_char"]),
            "effect_indices_end": set(row["indexes"]["effect"]["end_char"])
        }

In [6]:
df = pd.DataFrame(list(consolidated_dataset.values()))
df.head()

Unnamed: 0,text,drug,effect,drug_indices_start,drug_indices_end,effect_indices_start,effect_indices_end
0,Intravenous azithromycin-induced ototoxicity.,[azithromycin],[ototoxicity],{12},{24},{33},{44}
1,"Immobilization, while Paget's bone disease was...",[dihydrotachysterol],[increased calcium-release],{91},{109},{143},{168}
2,Unaccountable severe hypercalcemia in a patien...,[dihydrotachysterol],[hypercalcemia],{84},{102},{21},{34}
3,METHODS: We report two cases of pseudoporphyri...,"[naproxen, oxaprozin]","[pseudoporphyria, pseudoporphyria]","{58, 71}","{80, 66}",{32},{47}
4,"Naproxen, the most common offender, has been a...",[Naproxen],[erythropoietic protoporphyria],{0},{8},{134},{163}


In [7]:
# since no spans overlap, we can sort to get 1:1 matched index spans
# note that sets don't preserve insertion order

df["drug_indices_start"] = df["drug_indices_start"].apply(list).apply(sorted)
df["drug_indices_end"] = df["drug_indices_end"].apply(list).apply(sorted)
df["effect_indices_start"] = df["effect_indices_start"].apply(list).apply(sorted)
df["effect_indices_end"] = df["effect_indices_end"].apply(list).apply(sorted)

df["text"] = df["text"].apply(str.lower)
df["drug"] = [[w.lower() for w in line] for line in df["drug"]]
df["effect"] = [[w.lower() for w in line] for line in df["effect"]]

In [8]:
# check string are lowercased
df.head()

Unnamed: 0,text,drug,effect,drug_indices_start,drug_indices_end,effect_indices_start,effect_indices_end
0,intravenous azithromycin-induced ototoxicity.,[azithromycin],[ototoxicity],[12],[24],[33],[44]
1,"immobilization, while paget's bone disease was...",[dihydrotachysterol],[increased calcium-release],[91],[109],[143],[168]
2,unaccountable severe hypercalcemia in a patien...,[dihydrotachysterol],[hypercalcemia],[84],[102],[21],[34]
3,methods: we report two cases of pseudoporphyri...,"[naproxen, oxaprozin]","[pseudoporphyria, pseudoporphyria]","[58, 71]","[66, 80]",[32],[47]
4,"naproxen, the most common offender, has been a...",[naproxen],[erythropoietic protoporphyria],[0],[8],[134],[163]


In [9]:
# save to JSON to then import into Dataset object
df.to_json("dataset.jsonl", orient="records", lines=True)
cons_dataset = load_dataset("json", data_files="dataset.jsonl")

Downloading and preparing dataset json/default to /home/ddds/.cache/huggingface/datasets/json/default-3c669057dd99a6d1/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /home/ddds/.cache/huggingface/datasets/json/default-3c669057dd99a6d1/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
model_checkpoint = "bert-base-uncased"
batch_size = 16
epochs = 5
MAX_SEQUENCE_LENGTH = 119

In [11]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [12]:
# BIO Tagging

label_list = ['O', 'B-DRUG', 'I-DRUG', 'B-EFFECT', 'I-EFFECT']

def generate_row_labels(row, verbose=False):
    """ Given a row from the consolidated `Ade_corpus_v2_drug_ade_relation` dataset, 
    generates BIO tags for drug and effect entities. 
    
    """

    text = row["text"]

    labels = []
    label = "O"
    prefix = ""
    
    # while iterating through tokens, increment to traverse all drug and effect spans
    drug_index = 0
    effect_index = 0
    
    tokens = tokenizer(text, return_offsets_mapping=True, truncation=True, padding='max_length', max_length=MAX_SEQUENCE_LENGTH)

    for n in range(len(tokens["input_ids"])):
        offset_start, offset_end = tokens["offset_mapping"][n]

        # should only happen for [CLS] and [SEP]
        if offset_end - offset_start == 0:
            labels.append(0)
            continue
        
        if drug_index < len(row["drug_indices_start"]) and offset_start == row["drug_indices_start"][drug_index]:
            label = "DRUG"
            prefix = "B-"

        elif effect_index < len(row["effect_indices_start"]) and offset_start == row["effect_indices_start"][effect_index]:
            label = "EFFECT"
            prefix = "B-"
        
        labels.append(label_list.index(f"{prefix}{label}"))
            
        if drug_index < len(row["drug_indices_end"]) and offset_end == row["drug_indices_end"][drug_index]:
            label = "O"
            prefix = ""
            drug_index += 1
            
        elif effect_index < len(row["effect_indices_end"]) and offset_end == row["effect_indices_end"][effect_index]:
            label = "O"
            prefix = ""
            effect_index += 1

        # need to transition "inside" if we just entered an entity
        if prefix == "B-":
            prefix = "I-"
    
    if verbose:
        print(f"{row}\n")
        orig = tokenizer.convert_ids_to_tokens(tokens["input_ids"])
        for n in range(len(labels)):
            print(orig[n], labels[n])
    tokens["labels"] = labels
    
    return tokens

In [13]:
# Take a look at the ouput

generate_row_labels(cons_dataset["train"][2], verbose=True)

{'text': 'unaccountable severe hypercalcemia in a patient treated for hypoparathyroidism with dihydrotachysterol.', 'drug': ['dihydrotachysterol'], 'effect': ['hypercalcemia'], 'drug_indices_start': [84], 'drug_indices_end': [102], 'effect_indices_start': [21], 'effect_indices_end': [34]}

[CLS] 0
una 0
##cco 0
##unt 0
##able 0
severe 0
hyper 3
##cal 4
##ce 4
##mia 4
in 0
a 0
patient 0
treated 0
for 0
h 0
##yp 0
##opa 0
##rath 0
##yr 0
##oid 0
##ism 0
with 0
di 1
##hy 2
##dro 2
##ta 2
##chy 2
##ster 2
##ol 2
. 0
[SEP] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[PAD] 0
[P

{'input_ids': [101, 14477, 21408, 16671, 3085, 5729, 23760, 9289, 3401, 10092, 1999, 1037, 5776, 5845, 2005, 1044, 22571, 29477, 27362, 12541, 9314, 2964, 2007, 4487, 10536, 22196, 2696, 11714, 6238, 4747, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [14]:
labeled_dataset = cons_dataset.map(generate_row_labels)
labeled_dataset

Map:   0%|          | 0/4271 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'drug', 'effect', 'drug_indices_start', 'drug_indices_end', 'effect_indices_start', 'effect_indices_end', 'input_ids', 'token_type_ids', 'attention_mask', 'offset_mapping', 'labels'],
        num_rows: 4271
    })
})

In [15]:
# What columns does the dataframe have?

df = labeled_dataset['train'].to_pandas()
df.head()

Unnamed: 0,text,drug,effect,drug_indices_start,drug_indices_end,effect_indices_start,effect_indices_end,input_ids,token_type_ids,attention_mask,offset_mapping,labels
0,intravenous azithromycin-induced ototoxicity.,[azithromycin],[ototoxicity],[12],[24],[33],[44],"[101, 26721, 8159, 3560, 17207, 8939, 21716, 2...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[[0, 0], [0, 5], [5, 8], [8, 11], [12, 14], [1...","[0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 3, 4, 4, 4, ..."
1,"immobilization, while paget's bone disease was...",[dihydrotachysterol],[increased calcium-release],[91],[109],[143],[168],"[101, 10047, 5302, 14454, 3989, 1010, 2096, 39...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[[0, 0], [0, 2], [2, 4], [4, 7], [7, 14], [14,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,unaccountable severe hypercalcemia in a patien...,[dihydrotachysterol],[hypercalcemia],[84],[102],[21],[34],"[101, 14477, 21408, 16671, 3085, 5729, 23760, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[[0, 0], [0, 3], [3, 6], [6, 9], [9, 13], [14,...","[0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 0, 0, 0, 0, 0, ..."
3,methods: we report two cases of pseudoporphyri...,"[naproxen, oxaprozin]","[pseudoporphyria, pseudoporphyria]","[58, 71]","[66, 80]",[32],[47],"[101, 4725, 1024, 2057, 3189, 2048, 3572, 1997...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[[0, 0], [0, 7], [7, 8], [9, 11], [12, 18], [1...","[0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 0, 0, 1, ..."
4,"naproxen, the most common offender, has been a...",[naproxen],[erythropoietic protoporphyria],[0],[8],[134],[163],"[101, 18996, 3217, 2595, 2368, 1010, 1996, 208...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[[0, 0], [0, 3], [3, 5], [5, 6], [6, 8], [8, 9...","[0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [16]:
# Train Test Split

df_train, df_val, df_test = np.split(df.sample(frac=1, random_state=42),
                            [int(.8 * len(df)), int(.9 * len(df))])

# Classification with BERT Model

In [17]:
model = TFAutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=len(label_list))

2023-04-04 00:13:27.205815: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
All model checkpoint layers were used when initializing TFBertForTokenClassification.

Some layers of TFBertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
# Input and Output Data

# Train Data
train_txt =  df_train['text'].values.tolist()
train_encodings = tokenizer(train_txt,
                            padding='max_length', 
                            max_length = MAX_SEQUENCE_LENGTH, 
                            truncation=True, 
                            return_tensors="tf") 
train_labels = list(df_train['labels'])


# Validation Data
val_txt =  df_val['text'].values.tolist()
val_encodings = tokenizer(val_txt,
                          padding='max_length', 
                          max_length = MAX_SEQUENCE_LENGTH, 
                          truncation=True, 
                          return_tensors="tf")
val_labels = list(df_val['labels'])

# Test Data
test_txt =  df_test['text'].values.tolist()
test_encodings = tokenizer(test_txt,
                           padding='max_length', 
                           max_length = MAX_SEQUENCE_LENGTH, 
                           truncation=True, 
                           return_tensors="tf")
test_labels = list(df_test['labels'])

In [19]:
def create_bert_cls_model(max_sequence_length=119,
                          hidden_size = 200, 
                          num_classes = 5,
                          dropout=0.05,
                          learning_rate=0.00001):

    model.trainable=True    

    input_ids = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                   'token_type_ids': token_type_ids,
                   'attention_mask': attention_mask}

    bert_out = model(bert_inputs)

    cls_token = bert_out[0]

    last_hidden_output = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(cls_token)
    last_hidden_output = tf.keras.layers.Dropout(dropout)(last_hidden_output)  

    classification = tf.keras.layers.Dense(num_classes, activation='softmax',name='classification_layer')(last_hidden_output)

    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])

    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                               metrics='sparse_categorical_accuracy')


    return classification_model

In [20]:
bert_model = create_bert_cls_model(num_classes=len(label_list))

In [21]:
bert_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 attention_mask_layer (InputLay  [(None, 119)]       0           []                               
 er)                                                                                              
                                                                                                  
 input_ids_layer (InputLayer)   [(None, 119)]        0           []                               
                                                                                                  
 token_type_ids_layer (InputLay  [(None, 119)]       0           []                               
 er)                                                                                              
                                                                                              

### Fit

In [22]:
bert_model_history = bert_model.fit([train_encodings.input_ids,
                                     train_encodings.token_type_ids, 
                                     train_encodings.attention_mask], 
                                    np.array(train_labels),   
                                    validation_data=(
                                        [val_encodings.input_ids,
                                         val_encodings.token_type_ids, 
                                         val_encodings.attention_mask], 
                                    np.array(val_labels)),    
                                    batch_size=16, 
                                    epochs=5
                                   )

bert_model_history

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f75d06769b0>

### Evaluate

In [23]:
bert_score = bert_model.evaluate([test_encodings.input_ids, test_encodings.token_type_ids, test_encodings.attention_mask], 
                                 np.array(test_labels))

print('Test loss', bert_score[0])
print('Test accuracy', bert_score[1])

Test loss 0.07577356696128845
Test accuracy 0.9829380512237549


### Predict

In [24]:
bert_predictions = bert_model.predict([test_encodings.input_ids[:], test_encodings.token_type_ids[:], test_encodings.attention_mask[:]])
bert_predictions = tf.argmax(bert_predictions, axis=-1)



In [25]:
bert_predictions[0:4]

<tf.Tensor: shape=(4, 119), dtype=int64, numpy=
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 3, 4, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 4, 4, 4, 0, 1, 2, 2, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2,
        0, 0, 3, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0,
        0, 0, 3, 4, 4, 4, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0

# Metric (Precision, Recall, and F1)

In [26]:
metric = load_metric("seqeval")
bert_predictions = bert_model.predict([test_encodings.input_ids[:], test_encodings.token_type_ids[:], test_encodings.attention_mask[:]])
predictions = np.argmax(bert_predictions, axis=2)
labels = np.array(test_labels)
true_predictions = [
    [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]
true_labels = [
    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]

results = metric.compute(predictions=true_predictions, references=true_labels)
results

  metric = load_metric("seqeval")




{'DRUG': {'precision': 0.8880994671403197,
  'recall': 0.9433962264150944,
  'f1': 0.9149130832570905,
  'number': 530},
 'EFFECT': {'precision': 0.6524216524216524,
  'recall': 0.8403669724770643,
  'f1': 0.7345629510825982,
  'number': 545},
 'overall_precision': 0.7573122529644268,
 'overall_recall': 0.8911627906976745,
 'overall_f1': 0.8188034188034187,
 'overall_accuracy': 0.982938035027095}

In [27]:
# Print out some predictions and compare to labels

print(true_predictions[0])
print(true_predictions[1])
print(true_predictions[2])
print(true_predictions[3])
print(true_predictions[4])

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-EFFECT', 'I-EFFECT', 'O', 'O', 'O', 'O', 'B-DRUG', 'I-DRUG', 'I-DRUG', 'I-DRUG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'O', 'B-DRUG', 'I-DRUG', 'I-DRUG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-EFFECT', 'I-EFFECT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-DRUG', 'I-DRUG', 'I-DRUG', 'O', 'O', 'B-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT'

In [28]:
# Print out some labels and compare to predictions

print(true_labels[0])
print(true_labels[1])
print(true_labels[2])
print(true_labels[3])
print(true_labels[4])

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-EFFECT', 'I-EFFECT', 'O', 'O', 'O', 'O', 'B-DRUG', 'I-DRUG', 'I-DRUG', 'I-DRUG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'B-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'I-EFFECT', 'O', 'B-DRUG', 'I-DRUG', 'I-DRUG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-DRUG', 'I-DRUG', 'I-DRUG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '