# Bangla Named Entity Recognition

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

In [None]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install accelerate
!apt install git-lfs
!pip install cloud-tpu-client==0.10 torch==1.9.0 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl


In [None]:
import pandas as pd
import json
from datasets import Dataset, DatasetDict, ClassLabel, Sequence, Features, Value
from sklearn.model_selection import train_test_split

# Dataset Loading

In [None]:

filepath = '/kaggle/input/bner-6k/data_storage.json'


# Open the data file
with open(filepath, 'r') as file:
    # Load the JSON data from the file
    data = json.load(file)


In [None]:
data[0]

In [None]:
ner_labels = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']
ner_feature = ClassLabel(names=ner_labels)

# Map the raw ner_tag values (like '#1', '#2', etc.) to the actual ner_labels
ner_mapping = {
    '1': 'O',
    '2': 'B-PER',
    '3': 'I-PER',
    '4': 'B-ORG',
    '5': 'I-ORG',
    '6': 'B-LOC',
    '7': 'I-LOC',
    '8': 'B-MISC',
    '9': 'I-MISC'
}

# Function to process tokens and ner_tags

In [None]:

def process_data(data):
    for entry in data:
        # Check if 'tokens' is a string and convert it to a list, otherwise leave it as is
        if isinstance(entry['tokens'], str):
            entry['tokens'] = eval(entry['tokens'])  # Convert string representation of list to an actual list

        # Map 'ner_tag' to actual class labels using the ner_mapping
        if isinstance(entry['ner_tag'], str):
            entry['ner_tag'] = eval(entry['ner_tag'])  # Convert string representation of list to an actual list

        # Map each ner_tag from numeric to the respective class label
        entry['ner_tag'] = [tag for tag in entry['ner_tag']]  # Default to 'O' if invalid tag

    return data


# Process the data
data = process_data(data)

In [None]:
# Split the data into train, validation, and test sets (80%, 10%, 10%)
train_data, temp_data = train_test_split(data, test_size=0.2, random_state=42)
validation_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# Convert each list of dictionaries into a pandas DataFrame
train_df = pd.DataFrame(train_data)
validation_df = pd.DataFrame(validation_data)
test_df = pd.DataFrame(test_data)

In [None]:
# Retain only necessary columns
train_df = train_df[['id', 'tokens', 'ner_tag']].copy()

# Convert 'id' to int32
train_df['id'] = train_df['id'].astype('int32')

# Ensure 'tokens' is a list of strings and clean 'ner_tag' data
train_df['tokens'] = train_df['tokens'].apply(lambda x: list(map(str, x)))

In [None]:
# Repeat the cleaning process for validation and test DataFrames
validation_df = validation_df[['id', 'tokens', 'ner_tag']].copy()
validation_df['id'] = validation_df['id'].astype('int32')
validation_df['tokens'] = validation_df['tokens'].apply(lambda x: list(map(str, x)))


test_df = test_df[['id', 'tokens', 'ner_tag']].copy()
test_df['id'] = test_df['id'].astype('int32')
test_df['tokens'] = test_df['tokens'].apply(lambda x: list(map(str, x)))

# Convert DataFrames to Datasets

In [None]:

features = Features({
    'id': Value('int32'),
    'tokens': Sequence(Value('string')),
    'ner_tag': Sequence(Value('string'))  # Keep tags as strings
})


# Convert DataFrames to Datasets
train_dataset = Dataset.from_pandas(train_df, features=features)
validation_dataset = Dataset.from_pandas(validation_df, features=features)
test_dataset = Dataset.from_pandas(test_df, features=features)


# Create DatasetDict
raw_datasets = DatasetDict({
    'train': train_dataset,
    'validation': validation_dataset,
    'test': test_dataset
})


# Print features to verify
print(raw_datasets["train"].features["ner_tag"])


In [None]:
raw_datasets

In [None]:
pip install git+https://github.com/csebuetnlp/normalizer

# Dataset processing for BanglaBERT

In [None]:
#Load Pretrained BangleBERT
from transformers import AutoModelForPreTraining, AutoTokenizer
from normalizer import normalize
import torch

model = AutoModelForPreTraining.from_pretrained("csebuetnlp/banglabert")
tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/banglabert")
model_checkpoint="csebuetnlp/banglabert"


In [None]:
data=raw_datasets

In [None]:
tokenizer

# Function to allign Labels with tokens

In [None]:
def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None

    for word_id in word_ids:
        if word_id != current_word:
            current_word = word_id
            if word_id is None or word_id >= len(labels) or word_id < 0:
                label = -100
            else:
                try:
                    label = int(labels[word_id])
                except (ValueError, TypeError):
                    label = -100  # Default to -100 if conversion fails
            new_labels.append(label)

        elif word_id is None or word_id >= len(labels) or word_id < 0:
            new_labels.append(-100)

        else:
            try:
                label = int(labels[word_id])
                if label % 2 == 1:
                    label += 1
            except (ValueError, TypeError):
                label = -100  # Default to -100 if conversion fails

            new_labels.append(label)

    return new_labels


In [None]:

def tokenize_and_align_labels(examples):
  tokenized_inputs = tokenizer(examples['tokens'], truncation=True, is_split_into_words=True)

  all_labels = examples['ner_tag']

  new_labels = []
  for i, labels in enumerate(all_labels):
    word_ids = tokenized_inputs.word_ids(i)
    new_labels.append(align_labels_with_tokens(labels, word_ids))

  tokenized_inputs['labels'] = new_labels

  return tokenized_inputs

In [None]:
tokenized_datasets = data.map(tokenize_and_align_labels, batched=True, remove_columns=data['train'].column_names)

In [None]:
tokenized_datasets

# Dataset Processing for RemBERT

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification

model_name = "google/rembert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=9)
model_checkpoint2="google/rembert"

In [None]:
data=raw_datasets

In [None]:
def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None

    for word_id in word_ids:
        if word_id != current_word:
            current_word = word_id
            if word_id is None or word_id >= len(labels) or word_id < 0:
                label = -100
            else:
                try:
                    label = int(labels[word_id])
                except (ValueError, TypeError):
                    label = -100  # Default to -100 if conversion fails
            new_labels.append(label)

        elif word_id is None or word_id >= len(labels) or word_id < 0:
            new_labels.append(-100)

        else:
            try:
                label = int(labels[word_id])
                if label % 2 == 1:
                    label += 1
            except (ValueError, TypeError):
                label = -100  # Default to -100 if conversion fails

            new_labels.append(label)

    return new_labels


In [None]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        is_split_into_words=True,
        truncation=True,
        padding="max_length",  # ensure consistent input size
        max_length=128,        # or adjust based on your needs
        return_tensors=None    # ensures output stays as plain lists (not tensors)
    )

    all_labels = examples["ner_tag"]
    new_labels = []

    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        aligned_labels = []
        previous_word_idx = None

        for word_idx in word_ids:
            if word_idx is None:
                aligned_labels.append(-100)
            elif word_idx != previous_word_idx:
                aligned_labels.append(labels[word_idx])
            else:
                aligned_labels.append(-100)
            previous_word_idx = word_idx

        new_labels.append(aligned_labels)

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs


In [None]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        is_split_into_words=True,
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors=None
    )

    all_labels = examples["ner_tag"]
    new_labels = []

    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        aligned_labels = []
        previous_word_idx = None

        for word_idx in word_ids:
            if word_idx is None:
                aligned_labels.append(-100)
            elif word_idx != previous_word_idx:
                aligned_labels.append(labels[word_idx])
            else:
                aligned_labels.append(-100)
            previous_word_idx = word_idx

        # Pad labels to max_length
        while len(aligned_labels) < 128:
            aligned_labels.append(-100)

        new_labels.append(aligned_labels)

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs


In [None]:
def preprocess_labels(examples):
    # Convert items in ner_tag to integers if they are strings and represent numbers
    examples['ner_tag'] = [[int(tag) if isinstance(tag, str) and tag.isdigit() else tag for tag in inner_list]
                            for inner_list in examples['ner_tag']]
    return examples

# Apply this before calling `map()`
dataset2 = data.map(preprocess_labels, batched=True)


In [None]:
data2=dataset2

In [None]:
tokenized_datasets2 = data2.map(tokenize_and_align_labels, batched=True, remove_columns=data['train'].column_names)

In [None]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [None]:
pip install seqeval

# Metrics Computation Function

In [None]:

from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score

label_names = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_preds = [[label_names[p] for (p, l) in zip(prediction, label) if l != -100]
                  for prediction, label in zip(predictions, labels)]

    return {
        "accuracy": accuracy_score(true_labels, true_preds),
        "precision": precision_score(true_labels, true_preds),
        "recall": recall_score(true_labels, true_preds),
        "f1": f1_score(true_labels, true_preds),
    }


In [None]:
from datasets import concatenate_datasets

# Concatenate datasets
combined_dataset = concatenate_datasets([
    tokenized_datasets["train"],
    tokenized_datasets["validation"],
    tokenized_datasets["test"]
])

In [None]:
tokenized_datasets["train"] = combined_dataset


In [None]:
# Concatenate datasets
combined_dataset2 = concatenate_datasets([
    tokenized_datasets2["train"],
    tokenized_datasets2["validation"],
    tokenized_datasets2["test"]
])

In [None]:
tokenized_datasets2["train"] = combined_dataset2

In [None]:
rawdata= concatenate_datasets([
    raw_datasets["train"],
    raw_datasets["validation"],
    raw_datasets["test"]
])

In [None]:
len(rawdata)

In [None]:
len(combined_dataset)

In [None]:
len(combined_dataset2)

 # Dataset Convertion to tensor for CRF Trainging

In [None]:
import torch


# Safe conversion
def safe_int(tag):
    return int(''.join(c for c in tag if c.isdigit()))

def get_dataset_tensors(dataset):
 #Label mapping
 id2label = {
    "0": "O",
    "1": "B-PER",
    "2": "I-PER",
    "3": "B-ORG",
    "4": "I-ORG",
    "5": "B-LOC",
    "6": "I-LOC",
    "7": "B-MISC",
    "8": "I-MISC"
 }

 # Create one-hot mappings
 num_labels = len(id2label)
 label2onehot = {
    int(label_id): torch.nn.functional.one_hot(torch.tensor(int(label_id)), num_classes=num_labels).float()
    for label_id in id2label
 }


 # Convert to tensor
 data_tensor = []
 for entry in dataset:
    try:
        tag_tensor = torch.stack([label2onehot[safe_int(tag)] for tag in entry['ner_tag']])
        data_tensor.append(tag_tensor)
    except KeyError as e:
        print(f"Tag '{tag}' not found in label2onehot mapping.")
    except ValueError as ve:
        print(f"Invalid tag '{tag}' encountered. Skipping.")

 return data_tensor;


# CRF Layer implementation

In [None]:
import torch
import torch.nn as nn
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Sample input: `data_tensor` is a list of tensors of shape (seq_len, num_classes)
# e.g., data_tensor = [tensor(...), tensor(...), ...]

# Set seed for reproducibility
random.seed(42)
torch.manual_seed(42)

# CRF Layer implementation
class CRFLayer(nn.Module):
    def __init__(self, num_tags):
        super(CRFLayer, self).__init__()
        self.num_tags = num_tags
        self.transitions = nn.Parameter(torch.randn(num_tags, num_tags))
        self.start_transitions = nn.Parameter(torch.randn(num_tags))
        self.end_transitions = nn.Parameter(torch.randn(num_tags))

    def forward(self, emissions):
        return self.viterbi_decode(emissions)

    def compute_log_likelihood(self, emissions, tags):
        # emissions: (seq_len, num_tags)
        seq_len = emissions.shape[0]

        # Score for the given tag sequence
        score = self.start_transitions[tags[0]] + emissions[0, tags[0]]
        for i in range(1, seq_len):
            score += self.transitions[tags[i - 1], tags[i]] + emissions[i, tags[i]]
        score += self.end_transitions[tags[-1]]

        # Compute partition function using log-sum-exp
        alphas = self.start_transitions + emissions[0]
        for i in range(1, seq_len):
            emission = emissions[i].unsqueeze(0)  # (1, num_tags)
            alpha_exp = alphas.unsqueeze(1) + self.transitions  # (num_tags, num_tags)
            alphas = torch.logsumexp(alpha_exp, dim=0) + emission.squeeze()
        Z = torch.logsumexp(alphas + self.end_transitions, dim=0)
        return score - Z

    def viterbi_decode(self, emissions):
        seq_len = emissions.shape[0]
        backpointers = []

        viterbi_vars = self.start_transitions + emissions[0]
        for i in range(1, seq_len):
            broadcast_score = viterbi_vars.unsqueeze(1) + self.transitions
            best_score, best_tag = torch.max(broadcast_score, dim=0)
            viterbi_vars = best_score + emissions[i]
            backpointers.append(best_tag)

        best_score = viterbi_vars + self.end_transitions
        best_tag = torch.argmax(best_score).item()

        # Backtrace
        best_path = [best_tag]
        for bptrs in reversed(backpointers):
            best_tag = bptrs[best_tag].item()
            best_path.insert(0, best_tag)
        return best_path


# CRF Layer Training Function

In [None]:
def train_crflayer(train_data, val_data):
 model = CRFLayer(num_tags=9)
 optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

 EPOCHS = 80
 patience = 10  # Early stopping patience
 best_val_loss = float('inf')
 best_model = None
 no_improve_epochs = 0

 train_losses = []
 val_losses = []

 for epoch in range(EPOCHS):
    model.train()
    total_train_loss = 0
    for seq in train_data:
        emissions = seq  # shape: (seq_len, num_tags)
        tags = torch.argmax(seq, dim=1)  # true tags as class indices
        loss = -model.compute_log_likelihood(emissions, tags)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()

    # Validation
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for seq in val_data:
            emissions = seq
            tags = torch.argmax(seq, dim=1)
            val_loss = -model.compute_log_likelihood(emissions, tags)
            total_val_loss += val_loss.item()

    avg_train_loss = total_train_loss / len(train_data)
    avg_val_loss = total_val_loss / len(val_data)
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)

    print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    # Early stopping logic
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_model = model.state_dict()
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1
        if no_improve_epochs >= patience:
            print("Early stopping triggered.")
            break

 # Save best model
 torch.save(best_model, "crf_model.pt")
 print("Best model saved to crf_model.pt")


# Function to get word Logits for particular Model

In [None]:
def get_word_logits(model, tokenizer, tokens):
    encodings = tokenizer(tokens, is_split_into_words=True, return_tensors="pt", padding=True, truncation=True)
    word_ids = encodings.word_ids()

    with torch.no_grad():
        logits = model(**encodings).logits

    selected_logits = []
    seen = set()
    for idx, word_idx in enumerate(word_ids):
        if word_idx is None:
            continue
        if word_idx not in seen:
            selected_logits.append(logits[0, idx])
            seen.add(word_idx)

    return torch.stack(selected_logits)  # (num_words, num_labels)

# Ensembled Prediction with CRF Layer

In [None]:
def ensemble_predict(tokens,rembert_model,rembert_tokenizer,Current_banglabert_model,Current_banglabert_tokenizer,CRFmodel):

    rembert_logits = get_word_logits(rembert_model, rembert_tokenizer, tokens)
    banglabert_logits = get_word_logits(Current_banglabert_model, base_tokenizer, tokens)

    min_len = min(rembert_logits.shape[0], banglabert_logits.shape[0])
    rembert_logits = rembert_logits[:min_len]
    banglabert_logits = banglabert_logits[:min_len]

    ensemble_logits =  rembert_logits + banglabert_logits
    test_logits = [ensemble_logits]

    # Test on a new emission (logits) sequence
    with torch.no_grad():
      for logits in test_logits:  # test_logits = list of tensors
        en_crf_predicted_sequence = CRFmodel(logits)
        


    preds = torch.argmax(ensemble_logits, dim=-1)
    just_ensembled=preds.tolist()


    return en_crf_predicted_sequence,just_ensembled


In [None]:

id2label = {
    0: "O",
    1: "B-PER",
    2: "I-PER",
    3: "B-ORG",
    4: "I-ORG",
    5: "B-LOC",
    6: "I-LOC",
    7: "B-MISC",
    8: "I-MISC",
    "0": "O",
    "1": "B-PER",
    "2": "I-PER",
    "3": "B-ORG",
    "4": "I-ORG",
    "5": "B-LOC",
    "6": "I-LOC",
    "7": "B-MISC",
    "8": "I-MISC"
}

In [None]:



from seqeval.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification


def calculate(testdata,rembert_checkpoint,banglabert_checkpoint):


# Load BanglaBERT tokenizer and model
 banglabert_tokenizer = AutoTokenizer.from_pretrained(banglabert_checkpoint,use_fast=True)
 banglabert_model = AutoModelForTokenClassification.from_pretrained(banglabert_checkpoint)


# Load RemBERT tokenizer and model
 rembert_tokenizer = AutoTokenizer.from_pretrained(rembert_checkpoint)
 rembert_model = AutoModelForTokenClassification.from_pretrained(rembert_checkpoint)

 # Load the CRF model for testing
 CRFmodel = CRFLayer(num_tags=9)
 CRFmodel.load_state_dict(torch.load("/kaggle/working/crf_model.pt"))
 CRFmodel.eval()


 true_labels = []
 pred_labels = []
 model_pred_labels = []

 i=0;
 for example in testdata:
    tokens = example['tokens']
    gold = example['ner_tag']


    if(i%100==0):
      print("i->",i);
    i=i+1;

    ensembled_crf_preds,ensembled_pred = ensemble_predict(tokens,rembert_model,rembert_tokenizer,banglabert_model,banglabert_tokenizer,CRFmodel)


    min_len = min(len(gold), len(ensembled_crf_preds))
    gold = gold[:min_len]
    ensembled_crf_preds = ensembled_crf_preds[:min_len]
    Model_preds = ensembled_pred[:min_len]               #Model refers just ensembled

    #Map IDs to label names
    gold_labels = [id2label[label] for label in gold]
    pred_labels_list = [id2label[label] for label in ensembled_crf_preds]
    Model_labels_list = [id2label[label] for label in Model_preds]



    true_labels.append(gold_labels)
    pred_labels.append(pred_labels_list)
    model_pred_labels.append(Model_labels_list)
    

    # Now print evaluation
 print("----------------------------CRF Ensambled Output-----------------------------")
 print("Accuracy:", accuracy_score(true_labels, pred_labels))
 print("F1 Score:", f1_score(true_labels, pred_labels))
 print("Precision:", precision_score(true_labels, pred_labels))
 print("Recall:", recall_score(true_labels, pred_labels))
 print("\nDetailed Classification Report:\n")
 print(classification_report(true_labels, pred_labels))
 print("-------------------------------------------------------------------------")

    # Return two sets of evaluation: (1) CRF ensemble, (2) base model output
 return (
        {
            "accuracy": accuracy_score(true_labels, pred_labels),
            "precision": precision_score(true_labels, pred_labels),
            "recall": recall_score(true_labels, pred_labels),
            "f1": f1_score(true_labels, pred_labels),
        },
        {
            "accuracy": accuracy_score(true_labels, model_pred_labels),
            "precision": precision_score(true_labels, model_pred_labels),
            "recall": recall_score(true_labels, model_pred_labels),
            "f1": f1_score(true_labels, model_pred_labels),
        }
    )

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

# Load tokenizer from original pretrained model
base_tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/banglabert", use_fast=True)

In [None]:
from transformers import Trainer, TrainingArguments, AutoModelForTokenClassification
from sklearn.model_selection import KFold
import random
import numpy as np
import torch
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


# Define number of folds
k = 5

# Convert HuggingFace Dataset to list of examples for manual slicing
dataset_list = tokenized_datasets['train'].shuffle(seed=seed).to_list()


# Convert HuggingFace Dataset to list of examples for manual slicing
dataset_list2 = tokenized_datasets2['train'].shuffle(seed=seed).to_list()


rawdataset=rawdata.shuffle(seed=seed).to_list()

kf = KFold(n_splits=k)

# Store evaluation results for each fold
results = []
fold_metrics = []

Result_BB=[]
Result_RB=[]
Result_EN=[]

In [None]:
model_checkpoint2_Base="google/rembert"
model_checkpoint_Base="csebuetnlp/banglabert"

In [None]:
flat_predsB = []
flat_labelsB = []

flat_predsR = []
flat_labelsR = []

# five-fold cross-validation on our Bangla dataset.

In [None]:
for fold, (train_index, val_index) in enumerate(kf.split(dataset_list)):
    print(f"\n=== Fold {fold + 1}/{k} ===")



    # Split the dataset into train and validation subsets
    train_data = [dataset_list[i] for i in train_index]
    val_data = [dataset_list[i] for i in val_index]

    # Split the dataset into train and validation subsets  #for REM_bert
    train_data2 = [dataset_list2[i] for i in train_index]
    val_data2 = [dataset_list2[i] for i in val_index]



    val_data_rw = [rawdataset[i] for i in val_index]
    train_data_rw = [rawdataset[i] for i in train_index]



    #====CRF Train Model=====
    train_data_tensor= get_dataset_tensors(train_data_rw)
    val_data_tensor = get_dataset_tensors(val_data_rw)

    train_crflayer(train_data_tensor, val_data_tensor)




    # Convert lists back to HuggingFace Dataset   #BanglaBert
    from datasets import Dataset
    train_dataset = Dataset.from_list(train_data)
    val_dataset = Dataset.from_list(val_data)


    # Convert lists back to HuggingFace Dataset   #RemBert
    from datasets import Dataset
    train_dataset2 = Dataset.from_list(train_data2)
    val_dataset2 = Dataset.from_list(val_data2)




    # Load a fresh model for each fold
    model = AutoModelForTokenClassification.from_pretrained(
        model_checkpoint_Base,
        num_labels=len(label_names),
        id2label={i: label for i, label in enumerate(label_names)},
        label2id={label: i for i, label in enumerate(label_names)}
    )


    #repo_checkpoint="/checkpoint-231" #3000   
    #repo_checkpoint="/checkpoint-459" #3000      #459   for batch size 8
    repo_checkpoint="/checkpoint-927" #6162
   
    
    # output directory
    fold_output_dir = "Results_of_BanglaBert"
    banglabert_checkpoint="/kaggle/working/"+fold_output_dir+repo_checkpoint
   

    # Define TrainingArguments

    args = TrainingArguments(
        output_dir=fold_output_dir,
        save_strategy="epoch",
        save_total_limit=1,
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
        logging_dir=f"{fold_output_dir}/logs",
        seed=seed,
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=data_collator,
        compute_metrics=compute_metrics
    )

    # Train and evaluate
    trainer.train()

    eval_result = trainer.evaluate()
    print(f"Fold {fold + 1} Evaluation:", eval_result)
    Result_BB.append(eval_result)



    # Classification Report BB:
    predictionsB = trainer.predict(val_dataset)
    pred_labelsB = predictionsB.predictions.argmax(axis=-1)
    true_labelsB = predictionsB.label_ids

    id2label2={i: label for i, label in enumerate(label_names)}


    for p, l in zip(pred_labelsB, true_labelsB):
      for pred_token, true_token in zip(p, l):
        if true_token != -100:
            flat_predsB.append(id2label2[pred_token])
            flat_labelsB.append(id2label2[true_token])



    # REMBERT
    # Load a fresh model for each fold
    model = AutoModelForTokenClassification.from_pretrained(
        model_checkpoint2_Base,
        num_labels=len(label_names),
        id2label={i: label for i, label in enumerate(label_names)},
        label2id={label: i for i, label in enumerate(label_names)}
    )

    # Define fold-specific output directory
    fold_output_dir = "Results_of_REMBERT"
    rembert_checkpoint="/kaggle/working/"+fold_output_dir+repo_checkpoint
 

    # Define TrainingArguments
    args = TrainingArguments(
        output_dir=fold_output_dir,
        save_strategy="epoch",
        save_total_limit=1,
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
        logging_dir=f"{fold_output_dir}/logs",
        seed=seed,
        report_to="none"
    )


    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_dataset2,
        eval_dataset=val_dataset2,
        data_collator=data_collator,
        compute_metrics=compute_metrics
    )

    # Train and evaluate
    trainer.train()


    # Classification Report:
    predictionsR = trainer.predict(val_dataset2)
    pred_labelsR = predictionsR.predictions.argmax(axis=-1)
    true_labelsR = predictionsR.label_ids

    
    id2label2={i: label for i, label in enumerate(label_names)}

 

    for p, l in zip(pred_labelsR, true_labelsR):
      for pred_token, true_token in zip(p, l):
        if true_token != -100:
            flat_predsR.append(id2label2[pred_token])
            flat_labelsR.append(id2label2[true_token])


    eval_result = trainer.evaluate()
    print(f"Fold {fold + 1} Evaluation:", eval_result)
    Result_RB.append(eval_result)
    

    
    eval_result,ensembled_result=calculate(val_data_rw,rembert_checkpoint,banglabert_checkpoint)
    results.append(eval_result)
    Result_EN.append(ensembled_result)


# Cross-Validation Average Results

In [None]:
import pandas as pd

df_results = pd.DataFrame(results)
print("\n=== Cross-Validation Average Results CRF===")
print(df_results.mean())


In [None]:
import pandas as pd

df_results = pd.DataFrame(Result_RB)
print("\n=== Cross-Validation Average Results REMBert===")
print(df_results.mean())


In [None]:
import pandas as pd

df_results = pd.DataFrame(Result_BB)
print("\n=== Cross-Validation Average Results BanglaBert===")
print(df_results.mean())

In [None]:
import pandas as pd

df_results = pd.DataFrame(Result_EN)
print("\n=== Cross-Validation Average Results Ensembled===")
print(df_results.mean())