In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertConfig, BertForTokenClassification

# Build an English pipeline
#stanza.download('en', package='mimic', processors={'ner': 'i2b2'}) # download English model
#nlp = stanza.Pipeline('en', package='mimic', processors={'ner': 'i2b2'}) # initialize English neural pipeline

In [2]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [3]:
token_df = pd.read_csv('/home/chudeo/coding-evidence-extraction-main/work_sentence.csv')

In [4]:
token_df.head()

Unnamed: 0,sentence,word_labels,pos_tags,sentence_id
0,She continued to slowly progress and her menta...,"O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O","PRON,VERB,PART,ADV,VERB,CCONJ,PRON,ADJ,NOUN,AD...",1
1,Discharge Disposition : Expired,"O,O,O,O","PROPN,NOUN,PUNCT,PROPN",2
2,Amt : 270 ********************************* CP...,"O,O,O,O,O,O,O,O","NOUN,PUNCT,NUM,PUNCT,PUNCT,PUNCT,PUNCT,PUNCT,P...",3
3,"COMPARISON : Preoperative studies , most recen...","O,O,O,O,O,O,O,O,O,O,O,O,O,O","NOUN,PUNCT,ADJ,NOUN,PUNCT,ADV,ADV,VERB,PUNCT,P...",4
4,There is no free fluid or free air in the abdo...,"O,O,O,O,O,O,O,O,O,O,O,O","PRON,VERB,DET,ADJ,NOUN,CCONJ,ADJ,NOUN,ADP,DET,...",5


In [5]:
token_df.count()

sentence       4378
word_labels    4378
pos_tags       4378
sentence_id    4378
dtype: int64

In [6]:
#checking for null values
token_df.isnull().sum()

sentence       0
word_labels    0
pos_tags       0
sentence_id    0
dtype: int64

In [7]:
data = token_df.drop(columns=['sentence_id'])

In [8]:
from transformers import BertTokenizer, BertForTokenClassification, AdamW
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

2024-05-04 13:02:23.480740: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
# Extract words, labels, and POS tags from DataFrame
split_data = {
    "sentence": [word for sent in data["sentence"].str.split() for word in sent],
    "word_labels": [label.split(',') for label in data["word_labels"]],
    "pos_tags": [pos.split(',') for pos in data["pos_tags"]]
}


In [12]:
# Split data into sentences and labels
sentences = split_data["sentence"]
labels = split_data["word_labels"]
pos = split_data["pos_tags"]

In [13]:
# Tokenization
tokenizer = BertTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1")
tokenized_sentences = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [14]:
# Define classes
tag2idx = {"O": 0, "B": 1, "I": 2}
idx2tag = {idx: tag for tag, idx in tag2idx.items()}
num_classes = len(tag2idx)
MAX_LEN= 128
TRAIN_BATCH_SIZE =16
VALID_BATCH_SIZE = 8

In [15]:
def tokenize_and_preserve_labels(sentence, text_labels, tokenizer):
    tokenized_sentence = []
    labels = []

    sentence = sentence.strip()

    for word, label in zip(sentence.split(), text_labels.split(",")):
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        tokenized_sentence.extend(tokenized_word)
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

In [22]:
import pandas as pd

# Extract all unique labels from the DataFrame
unique_labels = set(','.join(data['word_labels'].values).split(','))

# Define the desired label order
desired_labels = ['O', 'B', 'I']

# Create the label-to-id mapping
label2id = {label: idx for idx, label in enumerate(desired_labels)}

# Create the id-to-label mapping
id2label = {idx: label for label, idx in label2id.items()}

# Print the label-to-id mapping
print(label2id)

{'O': 0, 'B': 1, 'I': 2}


In [23]:
class NERDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __getitem__(self, index):
        sentence = self.data.sentence[index]
        word_labels = self.data.word_labels[index]
        tokenized_sentence, labels = tokenize_and_preserve_labels(sentence, word_labels, self.tokenizer)

        tokenized_sentence = ["[CLS]"] + tokenized_sentence + ["[SEP]"]
        labels.insert(0, "O")
        labels.insert(-1, "O")

        if len(tokenized_sentence) > self.max_len:
            tokenized_sentence = tokenized_sentence[:self.max_len]
            labels = labels[:self.max_len]
        else:
            tokenized_sentence = tokenized_sentence + ['[PAD]' for _ in range(self.max_len - len(tokenized_sentence))]
            labels = labels + ["O" for _ in range(self.max_len - len(labels))]

        attn_mask = [1 if tok != '[PAD]' else 0 for tok in tokenized_sentence]

        ids = self.tokenizer.convert_tokens_to_ids(tokenized_sentence)
        label_ids = [label2id[label] for label in labels]

        return {
            'input_ids': torch.tensor(ids, dtype=torch.long),
            'attention_mask': torch.tensor(attn_mask, dtype=torch.long),
            'labels': torch.tensor(label_ids, dtype=torch.long)
        }

    def __len__(self):
        return self.len


In [24]:
train_size = 0.8
train_data = data.sample(frac=train_size, random_state=200)
test_data = data.drop(train_data.index).reset_index(drop=True)
train_data = train_data.reset_index(drop=True)

print("FULL Dataset: {}".format(data.shape))
print("TRAIN Dataset: {}".format(train_data.shape))
print("TEST Dataset: {}".format(test_data.shape))

train_dataset = NERDataset(train_data, tokenizer, MAX_LEN)
test_dataset = NERDataset(test_data, tokenizer, MAX_LEN)


FULL Dataset: (4378, 3)
TRAIN Dataset: (3502, 3)
TEST Dataset: (876, 3)


In [25]:
train_dataset[0]

{'input_ids': tensor([  101, 36408,  8745,  1665,  3575,  3773,  1105,   172,  1477, 13408,
          3773,   119,   102,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,   

In [26]:
train_dataset[0]['input_ids']

tensor([  101, 36408,  8745,  1665,  3575,  3773,  1105,   172,  1477, 13408,
         3773,   119,   102,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0])

In [27]:
# print the first 30 tokens and corresponding labels
for token, label in zip(tokenizer.convert_ids_to_tokens(train_dataset[0]["input_ids"][:30]), train_dataset[0]["labels"][:30]):
  print('{0:10}  {1}'.format(token, id2label[label.item()]))

[CLS]       O
ano         O
##xi        O
##c         O
brain       O
injury      O
and         O
c           B
##2         B
cord        I
injury      I
.           O
[SEP]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O


In [28]:
import os
from typing import List, Tuple

import torch
from torch import nn
from transformers import AutoModel

LOG_INF = 10e5


class BertCrf(nn.Module):
    def __init__(
        self,
        num_labels: int,
        bert_name: str,
        dropout: float = 0.2,
        use_crf: bool = True,
    ):
        super().__init__()
        self.num_labels = num_labels
        self.use_crf = use_crf
        self.cross_entropy = nn.CrossEntropyLoss()

        self.bert = AutoModel.from_pretrained(bert_name)

        self.dropout = nn.Dropout(dropout)
        self.hidden2label = nn.Linear(self.bert.config.hidden_size, num_labels)

        self.start_transitions = nn.Parameter(torch.empty(num_labels))
        self.end_transitions = nn.Parameter(torch.empty(num_labels))
        self.transitions = nn.Parameter(torch.empty(num_labels, num_labels))

        nn.init.uniform_(self.start_transitions, -0.1, 0.1)
        nn.init.uniform_(self.end_transitions, -0.1, 0.1)
        nn.init.uniform_(self.transitions, -0.1, 0.1)

    def _compute_log_denominator(self, features: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
        seq_len = features.shape[0]

        log_score_over_all_seq = self.start_transitions + features[0]

        for i in range(1, seq_len):
            next_log_score_over_all_seq = torch.logsumexp(
                log_score_over_all_seq.unsqueeze(2) + self.transitions + features[i].unsqueeze(1),
                dim=1,
            )
            log_score_over_all_seq = torch.where(
                mask[i].unsqueeze(1),
                next_log_score_over_all_seq,
                log_score_over_all_seq,
            )
        log_score_over_all_seq += self.end_transitions
        return torch.logsumexp(log_score_over_all_seq, dim=1)

    def _compute_log_numerator(self, features: torch.Tensor, labels: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
        seq_len, bs, _ = features.shape

        score_over_seq = self.start_transitions[labels[0]] + features[0, torch.arange(bs), labels[0]]

        for i in range(1, seq_len):
            score_over_seq += (
                self.transitions[labels[i - 1], labels[i]] + features[i, torch.arange(bs), labels[i]]
            ) * mask[i]
        seq_lens = mask.sum(dim=0) - 1
        last_tags = labels[seq_lens.long(), torch.arange(bs)]
        score_over_seq += self.end_transitions[last_tags]
        return score_over_seq

    def get_bert_features(
        self, input_ids: torch.Tensor, attention_mask: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        hidden = self.bert(input_ids, attention_mask=attention_mask)["last_hidden_state"]
        hidden = self.dropout(hidden)
        return self.hidden2label(hidden), hidden

    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: torch.Tensor,
        labels: torch.Tensor,
    ) -> torch.Tensor:
        features, _ = self.get_bert_features(input_ids=input_ids, attention_mask=attention_mask)
        attention_mask = attention_mask.bool()

        if self.use_crf:
            features = torch.swapaxes(features, 0, 1)
            attention_mask = torch.swapaxes(attention_mask, 0, 1)
            labels = torch.swapaxes(labels, 0, 1)

            log_numerator = self._compute_log_numerator(features=features, labels=labels, mask=attention_mask)
            log_denominator = self._compute_log_denominator(features=features, mask=attention_mask)

            return torch.mean(log_denominator - log_numerator)
        else:
            return self.cross_entropy(
                features.flatten(end_dim=1),
                torch.where(attention_mask.bool(), labels, -100).flatten(end_dim=1),
            )

    def _viterbi_decode(self, features: torch.Tensor, mask: torch.Tensor) -> List[List[int]]:
        seq_len, bs, _ = features.shape

        log_score_over_all_seq = self.start_transitions + features[0]

        backpointers = torch.empty_like(features)

        for i in range(1, seq_len):
            next_log_score_over_all_seq = (
                log_score_over_all_seq.unsqueeze(2) + self.transitions + features[i].unsqueeze(1)
            )

            next_log_score_over_all_seq, indices = next_log_score_over_all_seq.max(dim=1)

            log_score_over_all_seq = torch.where(
                mask[i].unsqueeze(1),
                next_log_score_over_all_seq,
                log_score_over_all_seq,
            )
            backpointers[i] = indices

        backpointers = backpointers[1:].int()

        log_score_over_all_seq += self.end_transitions
        seq_lens = mask.sum(dim=0) - 1

        best_paths = []
        for seq_ind in range(bs):
            best_label_id = torch.argmax(log_score_over_all_seq[seq_ind]).item()
            best_path = [best_label_id]

            for backpointer in reversed(backpointers[: seq_lens[seq_ind]]):
                best_path.append(backpointer[seq_ind][best_path[-1]].item())

            best_path.reverse()
            best_paths.append(best_path)

        return best_paths

    def decode(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> List[List[int]]:
        features, _ = self.get_bert_features(input_ids=input_ids, attention_mask=attention_mask)
        attention_mask = attention_mask.bool()

        if self.use_crf:
            features = torch.swapaxes(features, 0, 1)
            mask = torch.swapaxes(attention_mask, 0, 1)
            return self._viterbi_decode(features=features, mask=mask)
        else:
            labels = torch.argmax(features, dim=2)
            predictions = []
            for i in range(len(labels)):
                predictions.append(labels[i][attention_mask[i]].tolist())
            return predictions

    def save_to(self, path: str):
        os.makedirs(os.path.dirname(path), exist_ok=True)
        torch.save(self.state_dict(), path)

    def load_from(self, path: str):
        self.load_state_dict(torch.load(path))


In [30]:
from sklearn.metrics import f1_score
from torch.optim import Adam
from tqdm import tqdm
from typing import Dict, List, Optional


In [None]:

def train_ner(
    num_labels: int,
    bert_name: str,
    train_tokenized_texts_path: str,
    test_tokenized_texts_path: str,
    dropout: float,
    batch_size: int,
    epochs: int,
    log_every: int,
    lr_bert: float,
    lr_new_layers: float,
    use_crf: bool = True,
    save_to: Optional[str] = None,
    device="cuda" if torch.cuda.is_available() else "cpu",
):
    model = BertCrf(num_labels, bert_name, dropout=dropout, use_crf=use_crf)
    model = model.to(device)
    model.train()

    train_dataset = NERDataset(train_tokenized_texts_path)
    train_data_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=train_dataset.collate_function,
    )

    test_dataset = NERDataset(test_tokenized_texts_path)
    test_data_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=test_dataset.collate_function,
    )

    optimizer = Adam(
        [
            {"params": model.start_transitions},
            {"params": model.end_transitions},
            {"params": model.hidden2label.parameters()},
            {"params": model.transitions},
            {"params": model.bert.parameters(), "lr": lr_bert},
        ],
        lr=lr_new_layers,
    )

    loss_history = []
    f1 = []

    step = 0
    for epoch in range(1, epochs + 1):
        for batch in tqdm(train_data_loader):
            step += 1

            optimizer.zero_grad()

            batch = dict_to_device(batch, device)

            loss = model(**batch)

            loss.backward()
            optimizer.step()

            loss_history.append(loss.item())

            if step % log_every == 0:
                model.eval()
                predictions = []
                ground_truth = []
                with torch.no_grad():
                    for batch in test_data_loader:
                        labels = batch["labels"]
                        del batch["labels"]
                        batch = dict_to_device(batch)

                        prediction = model.decode(**batch)

                        flatten_prediction = [item for sublist in prediction for item in sublist]
                        flatten_labels = torch.masked_select(labels, batch["attention_mask"].bool()).tolist()

                        predictions.extend(flatten_prediction)
                        ground_truth.extend(flatten_labels)
                f1_micro = f1_score(ground_truth, predictions, average="micro")
                f1.append(f1_micro)
                model.train()

            draw_plots(loss_history, f1)
            print(f"Epoch {epoch}/{epochs}")
    if save_to is not None:
        model.save_to(save_to)



##############################################################

In [None]:
# Define  DataLoader parameters
train_params = {'batch_size': TRAIN_BATCH_SIZE, 'shuffle': True, 'num_workers': 0}
test_params = {'batch_size': VALID_BATCH_SIZE, 'shuffle': False, 'num_workers': 0}

# Create DataLoader
train_loader = DataLoader(train_dataset, **train_params)
test_loader = DataLoader(test_dataset, **test_params)

In [None]:

# Define model
model = BertForTokenClassification.from_pretrained("dmis-lab/biobert-large-cased-v1.1", num_labels=num_classes)

# Define optimizer
optimizer = AdamW(model.parameters(), lr=1e-05)

In [None]:
import torch
# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
ids = train_dataset[0]["input_ids"].unsqueeze(0)
mask = train_dataset[0]["attention_mask"].unsqueeze(0)
targets = train_dataset[0]["labels"].unsqueeze(0)
ids = ids.to(device)
mask = mask.to(device)
targets = targets.to(device)
outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
initial_loss = outputs[0]
initial_loss

In [None]:
tr_logits = outputs[1]
tr_logits.shape

In [None]:
MAX_GRAD_NORM = 10

In [None]:
# Defining the training function on the 80% of the dataset for tuning the bert model
def train(epoch):
    tr_loss, tr_accuracy = 0, 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()

    for idx, batch in enumerate(train_loader):

        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        targets = batch['labels'].to(device, dtype = torch.long)

        outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
        loss, tr_logits = outputs.loss, outputs.logits
        tr_loss += loss.item()

        nb_tr_steps += 1
        nb_tr_examples += targets.size(0)

        if idx % 100==0:
            loss_step = tr_loss/nb_tr_steps
            print(f"Training loss per 100 training steps: {loss_step}")

        # compute training accuracy
        flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
        active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        # now, use mask to determine where we should compare predictions with targets (includes [CLS] and [SEP] token predictions)
        active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
        targets = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_preds.extend(predictions)
        tr_labels.extend(targets)

        tmp_tr_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy

        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=MAX_GRAD_NORM
        )

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    print(f"Training loss epoch: {epoch_loss}")
    print(f"Training accuracy epoch: {tr_accuracy}")

In [None]:
EPOCHS = 10
for epoch in range(EPOCHS):
    print(f"Training epoch: {epoch + 1}")
    train(epoch)

In [None]:
from seqeval.metrics import classification_report as seqeval_classification_report

def valid(model, testing_loader):
    # put model in evaluation mode
    model.eval()

    eval_loss = 0
    nb_eval_steps = 0
    eval_preds, eval_labels = [], []

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):

            ids = batch['input_ids'].to(device, dtype=torch.long)
            mask = batch['attention_mask'].to(device, dtype=torch.long)
            targets = batch['labels'].to(device, dtype=torch.long)

            outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
            loss, eval_logits = outputs.loss, outputs.logits

            eval_loss += loss.item()

            nb_eval_steps += 1

            # compute evaluation accuracy
            active_logits = eval_logits.view(-1, model.num_labels)
            active_labels = targets.view(-1)

            eval_preds.extend(torch.argmax(active_logits, axis=1).cpu().numpy())
            eval_labels.extend(active_labels.cpu().numpy())

    eval_loss = eval_loss / nb_eval_steps

    labels = [[id2label[id_]] for id_ in eval_labels]
    predictions = [[id2label[id_]] for id_ in eval_preds]

    print(f"Validation Loss: {eval_loss}")
    print("Classification Report:")
    print(seqeval_classification_report(labels, predictions))

    return labels, predictions

labels, predictions = valid(model, test_loader)


In [None]:
from seqeval.metrics import classification_report as seqeval_classification_report
from sklearn.metrics import classification_report as sklearn_classification_report

def valid(model, testing_loader):
    # put model in evaluation mode
    model.eval()

    eval_loss = 0
    nb_eval_steps = 0
    eval_preds, eval_labels = [], []

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):

            ids = batch['input_ids'].to(device, dtype=torch.long)
            mask = batch['attention_mask'].to(device, dtype=torch.long)
            targets = batch['labels'].to(device, dtype=torch.long)

            outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
            loss, eval_logits = outputs.loss, outputs.logits

            eval_loss += loss.item()

            nb_eval_steps += 1

            # compute evaluation accuracy
            active_logits = eval_logits.view(-1, model.num_labels)
            active_labels = targets.view(-1)

            eval_preds.extend(torch.argmax(active_logits, axis=1).cpu().numpy())
            eval_labels.extend(active_labels.cpu().numpy())

    eval_loss = eval_loss / nb_eval_steps

    labels = [[id2label[id_]] for id_ in eval_labels]
    predictions = [[id2label[id_]] for id_ in eval_preds]

    print(f"Validation Loss: {eval_loss}")

    # Get unique labels
    unique_labels = set([label for sublist in labels for label in sublist] + [label for sublist in predictions for label in sublist])
    unique_labels = sorted(unique_labels)

    # Fill in empty lists for missing labels
    filled_labels = []
    filled_predictions = []
    for lbl, pred in zip(labels, predictions):
        filled_lbl = lbl + [l for l in unique_labels if l not in lbl]
        filled_pred = pred + [l for l in unique_labels if l not in pred]
        filled_labels.append(filled_lbl)
        filled_predictions.append(filled_pred)

    print("Classification Report (SeqEval):")
    print(seqeval_classification_report(filled_labels, filled_predictions))

    print("Classification Report (Sklearn):")
    print(sklearn_classification_report([label[0] for label in filled_labels], [pred[0] for pred in filled_predictions]))

    return labels, predictions

labels, predictions = valid(model, test_loader)


SAVE THE MODEL

In [None]:
torch.save(model.state_dict(), 'bert_model.pth')


In [None]:
#Get predicted labels with corresponding words
def get_predicted_evidence(model, dataloader):
    all_pred_evidence = []

    model.eval()
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            # Forward pass through the model
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Get the predicted labels
            predicted_labels = torch.argmax(logits, dim=-1)

            # Convert predicted labels to evidence format
            for i in range(len(predicted_labels)):
                sentence_tokens = tokenizer.convert_ids_to_tokens(input_ids[i])
                sentence_labels = [id2label[label.item()] for label in predicted_labels[i]]

                # Collect evidence for each sentence
                sentence_evidence = []
                for j, label in enumerate(sentence_labels):
                    if label != 'O':
                        start = 0
                        for k in range(j):
                            start += len(tokenizer.tokenize(sentence_tokens[k]))
                        end = start + len(tokenizer.tokenize(sentence_tokens[j]))
                        sentence_evidence.append({
                            'word': sentence_tokens[j],
                            'label': label,
                            'start': start,
                            'end': end
                        })
                all_pred_evidence.append(sentence_evidence)

    return all_pred_evidence

In [None]:
# Get predicted evidence
all_pred_evidence = get_predicted_evidence(model, test_loader)

In [None]:
# Convert evidence to DataFrame
pred_df = pd.DataFrame({
    'Word': [e['word'] for sentence in all_pred_evidence for e in sentence],
    'Predicted_Label': [e['label'] for sentence in all_pred_evidence for e in sentence]
})

# Save predictions to CSV
pred_df.to_csv('predicted_evidence.csv', index=False)

#### **Predicted Labels** 

In [None]:
def get_predicted_labels(model, dataloader):
    all_pred_labels = []
    
    model.eval()
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            # Forward pass through the model
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            
            # Get the predicted labels
            predicted_labels = torch.argmax(logits, dim=-1)
            
            # Convert predicted labels to the required format
            batch_pred_labels = []
            for i in range(len(predicted_labels)):
                sentence_tokens = tokenizer.convert_ids_to_tokens(input_ids[i])
                sentence_labels = [id2label[label.item()] for label in predicted_labels[i]]
                
                # Convert the sentence-level predictions to the required format
                for j, label in enumerate(sentence_labels):
                    if label != 'O':
                        start = 0
                        for k in range(j):
                            start += len(tokenizer.tokenize(sentence_tokens[k]))
                        end = start + len(tokenizer.tokenize(sentence_tokens[j]))
                        batch_pred_labels.append({
                            'label': label,
                            'start': start,
                            'end': end
                        })
            all_pred_labels.append(batch_pred_labels)
    
    return all_pred_labels

In [None]:
all_pred_labels = get_predicted_labels(model, test_loader)

#### **NERVALUATE**

In [None]:
import os
import json
import pandas as pd
from nervaluate import Evaluator
import torch

In [None]:
# Load the CSV file
data = pd.read_csv('/home/chudeo/project/33k_sentence.csv')

In [None]:
# Extract the true labels
all_true_labels = []
for _, row in data.iterrows():
    sentence = row['words']
    word_labels = row['labels'].split(',')
    true_labels = []
    for i, label in enumerate(word_labels):
        if label != 'O':
            true_labels.append({
                'label': label,
                'start': 0,
                'end': 0 
            })
    all_true_labels.append(true_labels)

In [None]:
# Get the predicted labels from the model
all_pred_labels = []
for batch in test_loader:
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['labels'].to(device)

    outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    logits = outputs.logits
    predicted_labels = torch.argmax(logits, dim=-1)

    batch_pred_labels = []
    for i in range(len(predicted_labels)):
        sentence_tokens = tokenizer.convert_ids_to_tokens(input_ids[i])
        sentence_labels = [id2label[label.item()] for label in predicted_labels[i]]

        for j, label in enumerate(sentence_labels):
            if label != 'O':
                start = 0
                for k in range(j):
                    start += len(tokenizer.tokenize(sentence_tokens[k]))
                end = start + len(tokenizer.tokenize(sentence_tokens[j]))
                batch_pred_labels.append({
                    'label': label,
                    'start': start,
                    'end': end
                })
    all_pred_labels.append(batch_pred_labels)

In [None]:
# Ensure that the lengths of all_true_labels and all_pred_labels match
if len(all_true_labels) > len(all_pred_labels):
    all_pred_labels.extend([[] for _ in range(len(all_true_labels) - len(all_pred_labels))])
elif len(all_true_labels) < len(all_pred_labels):
    all_true_labels.extend([[] for _ in range(len(all_pred_labels) - len(all_true_labels))])


In [None]:
# Get the unique labels from the true labels
unique_labels = set()
for true_labels in all_true_labels:
    for label_dict in true_labels:
        unique_labels.add(label_dict['label'])
unique_labels = list(unique_labels)


In [None]:
# Pass the unique labels as the tags parameter
evaluator = Evaluator(all_true_labels, all_pred_labels, tags=unique_labels)

# Returns overall metrics and metrics for each tag
results, results_per_tag = evaluator.evaluate()

print(results)

In [None]:
print(results_per_tag)