In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
pip install datasets


Note: you may need to restart the kernel to use updated packages.


In [11]:
from datasets import load_dataset

# Load the CoNLL-2003 dataset directly from Hugging Face
dataset = load_dataset("conll2003")

# Check the first few samples of the dataset
print(dataset['train'][0])


{'id': '0', 'tokens': ['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.'], 'pos_tags': [22, 42, 16, 21, 35, 37, 16, 21, 7], 'chunk_tags': [11, 21, 11, 12, 21, 22, 11, 12, 0], 'ner_tags': [3, 0, 7, 0, 0, 0, 7, 0, 0]}


In [10]:
import pandas as pd

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset['train'])

# Print the DataFrame
df

Unnamed: 0,id,tokens,pos_tags,chunk_tags,ner_tags
0,0,"[EU, rejects, German, call, to, boycott, Briti...","[22, 42, 16, 21, 35, 37, 16, 21, 7]","[11, 21, 11, 12, 21, 22, 11, 12, 0]","[3, 0, 7, 0, 0, 0, 7, 0, 0]"
1,1,"[Peter, Blackburn]","[22, 22]","[11, 12]","[1, 2]"
2,2,"[BRUSSELS, 1996-08-22]","[22, 11]","[11, 12]","[5, 0]"
3,3,"[The, European, Commission, said, on, Thursday...","[12, 22, 22, 38, 15, 22, 28, 38, 15, 16, 21, 3...","[11, 12, 12, 21, 13, 11, 11, 21, 13, 11, 12, 1...","[0, 3, 4, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, ..."
4,4,"[Germany, 's, representative, to, the, Europea...","[22, 27, 21, 35, 12, 22, 22, 27, 16, 21, 22, 2...","[11, 11, 12, 13, 11, 12, 12, 11, 12, 12, 12, 1...","[5, 0, 0, 0, 0, 3, 4, 0, 0, 0, 1, 2, 0, 0, 0, ..."
...,...,...,...,...,...
14036,14036,"[on, Friday, :]","[15, 22, 8]","[13, 11, 0]","[0, 0, 0]"
14037,14037,"[Division, two]","[21, 11]","[11, 12]","[0, 0]"
14038,14038,"[Plymouth, 2, Preston, 1]","[21, 11, 22, 11]","[11, 12, 12, 12]","[3, 0, 3, 0]"
14039,14039,"[Division, three]","[21, 11]","[11, 12]","[0, 0]"


In [12]:
# Extract words and labels from the dataset
def preprocess_data(dataset):
    words = []
    labels = []
    
    for example in dataset:
        words.append(example['tokens'])
        labels.append(example['ner_tags'])
    
    return words, labels

# Get train, validation, and test data
train_words, train_labels = preprocess_data(dataset['train'])
val_words, val_labels = preprocess_data(dataset['validation'])
test_words, test_labels = preprocess_data(dataset['test'])

# Create word-to-index dictionary
from collections import Counter

# Create word2idx and label2idx
word_counts = Counter([word for sentence in train_words for word in sentence])
word2idx = {word: idx for idx, (word, _) in enumerate(word_counts.items(), start=2)}  # Reserve index 0 for padding, 1 for unknown words
word2idx["<PAD>"] = 0
word2idx["<UNK>"] = 1

# Create label2idx using the label names from the dataset
label_names = dataset['train'].features['ner_tags'].feature.names
label2idx = {label: idx for idx, label in enumerate(label_names)}

print(f"Word2Idx: {list(word2idx.items())[:5]}")
print(f"Label2Idx: {label2idx}")


Word2Idx: [('EU', 2), ('rejects', 3), ('German', 4), ('call', 5), ('to', 6)]
Label2Idx: {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}


In [13]:
import torch
import torch.nn as nn

class BiLSTMNERModel(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMNERModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.hidden2tag = nn.Linear(hidden_dim * 2, tagset_size)  # *2 for bidirectional LSTM

    def forward(self, sentence):
        embeds = self.embedding(sentence)
        lstm_out, _ = self.lstm(embeds)
        tag_space = self.hidden2tag(lstm_out)
        tag_scores = torch.log_softmax(tag_space, dim=2)
        return tag_scores


In [14]:
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

class NERDataset(Dataset):
    def __init__(self, words, labels, word2idx, label2idx):
        self.words = words
        self.labels = labels
        self.word2idx = word2idx
        self.label2idx = label2idx
    
    def __len__(self):
        return len(self.words)

    def __getitem__(self, idx):
        sentence = self.words[idx]
        label = self.labels[idx]
        sentence_idx = [self.word2idx.get(word, 1) for word in sentence]  # Convert words to indices, 1 for unknown words
        label_idx = [self.label2idx[tag] for tag in label]  # Convert labels to indices
        return torch.tensor(sentence_idx), torch.tensor(label_idx)

# Prepare dataset and dataloader
def collate_fn(batch):
    sentences, labels = zip(*batch)
    sentences_padded = pad_sequence(sentences, batch_first=True, padding_value=0)
    labels_padded = pad_sequence(labels, batch_first=True, padding_value=-100)  # Use -100 for padding labels
    return sentences_padded, labels_padded

train_dataset = NERDataset(train_words, train_labels, word2idx, label2idx)
val_dataset = NERDataset(val_words, val_labels, word2idx, label2idx)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)


In [18]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import classification_report
import numpy as np
from datasets import load_dataset

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Step 1: Load the dataset
dataset = load_dataset("conll2003")

# Step 2: Preprocess data
def preprocess_data(dataset, label_names):
    words = []
    labels = []
    
    for example in dataset:
        words.append(example['tokens'])
        labels.append([label_names[tag] for tag in example['ner_tags']])
    
    return words, labels

# Get label names from the dataset
label_names = dataset['train'].features['ner_tags'].feature.names

# Get train, validation, and test data
train_words, train_labels = preprocess_data(dataset['train'], label_names)
val_words, val_labels = preprocess_data(dataset['validation'], label_names)
test_words, test_labels = preprocess_data(dataset['test'], label_names)

# Create word2idx and label2idx dictionaries
from collections import Counter

# Create word2idx dictionary
word_counts = Counter([word for sentence in train_words for word in sentence])
word2idx = {word: idx for idx, (word, _) in enumerate(word_counts.items(), start=2)}  # Reserve index 0 for padding, 1 for unknown words
word2idx["<PAD>"] = 0
word2idx["<UNK>"] = 1

# Create label2idx dictionary
label2idx = {label: idx for idx, label in enumerate(label_names)}

print(f"Word2Idx: {list(word2idx.items())[:5]}")
print(f"Label2Idx: {label2idx}")

# Step 3: Define NERDataset class
class NERDataset(Dataset):
    def __init__(self, words, labels, word2idx, label2idx):
        self.words = words
        self.labels = labels
        self.word2idx = word2idx
        self.label2idx = label2idx
    
    def __len__(self):
        return len(self.words)

    def __getitem__(self, idx):
        sentence = self.words[idx]
        label = self.labels[idx]
        sentence_idx = [self.word2idx.get(word, 1) for word in sentence]  # Convert words to indices, 1 for unknown words
        label_idx = [self.label2idx[tag] for tag in label]  # Convert labels to indices
        return torch.tensor(sentence_idx), torch.tensor(label_idx)

# Collate function to pad sentences and labels to equal lengths
def collate_fn(batch):
    sentences, labels = zip(*batch)
    sentences_padded = pad_sequence(sentences, batch_first=True, padding_value=0)
    labels_padded = pad_sequence(labels, batch_first=True, padding_value=-100)  # Use -100 for padding labels
    return sentences_padded, labels_padded

# Step 4: Define BiLSTM NER Model
class BiLSTMNERModel(nn.Module):
    def __init__(self, vocab_size, tagset_size, embedding_dim=100, hidden_dim=128):
        super(BiLSTMNERModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.hidden2tag = nn.Linear(hidden_dim * 2, tagset_size)  # *2 for bidirectional LSTM

    def forward(self, sentence):
        embeds = self.embedding(sentence)
        lstm_out, _ = self.lstm(embeds)
        tag_space = self.hidden2tag(lstm_out)
        tag_scores = torch.log_softmax(tag_space, dim=2)
        return tag_scores

# Step 5: Define EarlyStopping Class
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# Step 6: Define Training and Evaluation Functions

# Function to evaluate the model
def evaluate_model(model, val_loader, label2idx):
    model.eval()
    true_labels, pred_labels = [], []
    val_loss = 0
    loss_function = nn.CrossEntropyLoss(ignore_index=-100)
    
    with torch.no_grad():
        for sentences, labels in val_loader:
            # Move data to device (GPU/CPU)
            sentences, labels = sentences.to(device), labels.to(device)
            
            tag_scores = model(sentences)
            predictions = torch.argmax(tag_scores, dim=2)

            # Compute loss for validation set
            tag_scores = tag_scores.view(-1, len(label2idx))
            labels = labels.view(-1)
            loss = loss_function(tag_scores, labels)
            val_loss += loss.item()

            for i in range(len(sentences)):
                true_label = [idx2label[l.item()] for l in labels.view(-1)[i*len(sentences[i]):(i+1)*len(sentences[i])] if l.item() != -100]
                pred_label = [idx2label[p.item()] for p, l in zip(predictions[i], labels.view(-1)[i*len(sentences[i]):(i+1)*len(sentences[i])]) if l.item() != -100]
                
                true_labels.extend(true_label)
                pred_labels.extend(pred_label)

    val_loss /= len(val_loader)  # Average validation loss
    return val_loss, classification_report(true_labels, pred_labels, output_dict=True)['weighted avg']['f1-score']

# Training function with Early Stopping
def train_model(model, train_loader, val_loader, label2idx, num_epochs=100, learning_rate=0.001, patience=5):
    loss_function = nn.CrossEntropyLoss(ignore_index=-100)  # Ignore padding labels
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    early_stopping = EarlyStopping(patience=patience)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for sentences, labels in train_loader:
            # Move data to device (GPU/CPU)
            sentences, labels = sentences.to(device), labels.to(device)

            model.zero_grad()
            tag_scores = model(sentences)
            
            # Flatten the output and labels to compute the loss
            tag_scores = tag_scores.view(-1, len(label2idx))
            labels = labels.view(-1)

            loss = loss_function(tag_scores, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)
        val_loss, val_f1 = evaluate_model(model, val_loader, label2idx)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}')

        # Check for early stopping
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered.")
            break

# Step 7: Prepare DataLoaders
train_dataset = NERDataset(train_words, train_labels, word2idx, label2idx)
val_dataset = NERDataset(val_words, val_labels, word2idx, label2idx)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Step 8: Initialize Model and Train
vocab_size = len(word2idx)
tagset_size = len(label2idx)
idx2label = {idx: label for label, idx in label2idx.items()}

model = BiLSTMNERModel(vocab_size, tagset_size).to(device)  # Move the model to the GPU

train_model(model, train_loader, val_loader, label2idx)


Using device: cuda
Word2Idx: [('EU', 2), ('rejects', 3), ('German', 4), ('call', 5), ('to', 6)]
Label2Idx: {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}
Epoch 1/100, Train Loss: 0.5805, Val Loss: 0.4321, Val F1: 0.8621
Epoch 2/100, Train Loss: 0.2676, Val Loss: 0.2875, Val F1: 0.9158
Epoch 3/100, Train Loss: 0.1532, Val Loss: 0.2600, Val F1: 0.9293
Epoch 4/100, Train Loss: 0.0892, Val Loss: 0.2433, Val F1: 0.9372
Epoch 5/100, Train Loss: 0.0490, Val Loss: 0.2583, Val F1: 0.9401
Epoch 6/100, Train Loss: 0.0254, Val Loss: 0.2951, Val F1: 0.9407
Epoch 7/100, Train Loss: 0.0124, Val Loss: 0.3337, Val F1: 0.9404
Epoch 8/100, Train Loss: 0.0061, Val Loss: 0.3644, Val F1: 0.9398
Epoch 9/100, Train Loss: 0.0033, Val Loss: 0.3929, Val F1: 0.9403
Early stopping triggered.


In [21]:
# Save the model
def save_model(model, path="ner_model.pth"):
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

# Call this after training to save the model
save_model(model, "ner_model.pth")
# Load the model
def load_model(vocab_size, tagset_size, path="ner_model.pth"):
    model = BiLSTMNERModel(vocab_size, tagset_size)  # Initialize the model architecture
    model.load_state_dict(torch.load(path))  # Load the saved weights
    model.to(device)  # Move model to the correct device (GPU/CPU)
    model.eval()  # Set the model to evaluation mode
    print(f"Model loaded from {path}")
    return model

# Example of loading the model
vocab_size = len(word2idx)
tagset_size = len(label2idx)
loaded_model = load_model(vocab_size, tagset_size, "ner_model.pth")


Model saved to ner_model.pth
Model loaded from ner_model.pth


  model.load_state_dict(torch.load(path))  # Load the saved weights


In [23]:
def predict_with_model(model, sentence, word2idx, idx2label):
    model.eval()  # Set the model to evaluation mode
    sentence_idx = [word2idx.get(word, word2idx["<UNK>"]) for word in sentence.split()]
    sentence_tensor = torch.tensor(sentence_idx).unsqueeze(0).to(device)  # Batch of 1 sentence

    with torch.no_grad():
        tag_scores = model(sentence_tensor)
        predictions = torch.argmax(tag_scores, dim=2)

    # Convert predictions back to labels
    predicted_labels = [idx2label[p.item()] for p in predictions[0]]
    return list(zip(sentence.split(), predicted_labels))

# Example prediction
sentence = " i love the fat girl"
predictions = predict_with_model(loaded_model, sentence, word2idx, idx2label)
print(predictions)


[('i', 'O'), ('love', 'O'), ('the', 'O'), ('fat', 'O'), ('girl', 'O')]


In [16]:
import torch
from datasets import load_dataset
from transformers import BertTokenizerFast, BertForTokenClassification, Trainer, TrainingArguments
from transformers import DataCollatorForTokenClassification
from sklearn.metrics import classification_report
from transformers import EarlyStoppingCallback
import numpy as np

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Step 1: Load the CoNLL-2003 dataset
dataset = load_dataset("conll2003")

# Step 2: Load the BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

# Step 3: Preprocess the dataset
label_list = dataset['train'].features['ner_tags'].feature.names  # Get NER label names (e.g., 'B-PER', 'I-LOC', etc.)
label_to_id = {l: i for i, l in enumerate(label_list)}
id_to_label = {i: l for l, i in label_to_id.items()}

# Tokenize and align labels for BERT input
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples['tokens'], truncation=True, is_split_into_words=True)
    labels = []
    for i, label in enumerate(examples['ner_tags']):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)  # Label ignored in loss calculation
            elif word_idx != previous_word_idx:  # First sub-token of the word
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)  # Ignore sub-tokens
            previous_word_idx = word_idx
        labels.append(label_ids)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Apply tokenization and label alignment
tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=True)

# Step 4: Load Pre-trained BERT Model for Token Classification
model = BertForTokenClassification.from_pretrained("bert-base-uncased", num_labels=len(label_list)).to(device)

# Step 5: Data Collator and Training Arguments
data_collator = DataCollatorForTokenClassification(tokenizer)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=30,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100,  # Adjust this as needed
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True
)



# Step 6: Define Metric for Evaluation (F1-score)
from sklearn.metrics import classification_report

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    # Flatten true labels and predictions for comparison
    true_labels = [[id_to_label[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [id_to_label[pred] for (pred, lab) in zip(prediction, label) if lab != -100]
        for prediction, label in zip(predictions, labels)
    ]
    
    # Flatten the list of lists into a single list
    true_labels_flat = [item for sublist in true_labels for item in sublist]
    true_predictions_flat = [item for sublist in true_predictions for item in sublist]
    
    # Generate classification report
    results = classification_report(true_labels_flat, true_predictions_flat, output_dict=True)
    
    return {
        "precision": results["weighted avg"]["precision"],
        "recall": results["weighted avg"]["recall"],
        "f1": results["weighted avg"]["f1-score"],
        "accuracy": results["accuracy"]
    }

# Step 7: Trainer Setup for Fine-Tuning with Early Stopping
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]  # Early stopping if no improvement after 3 evaluations
)

# Step 8: Fine-tune the model
trainer.train()

# Step 9: Save the fine-tuned model
model.save_pretrained("bert-ner-model")
tokenizer.save_pretrained("bert-ner-tokenizer")

# Step 10: Load the saved model for future use
def load_model():
    model = BertForTokenClassification.from_pretrained("bert-ner-model").to(device)
    tokenizer = BertTokenizerFast.from_pretrained("bert-ner-tokenizer")
    return model, tokenizer


Using device: cuda


Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.072,0.056684,0.984221,0.984074,0.983968,0.984074
2,0.0392,0.047057,0.987889,0.987831,0.987848,0.987831
3,0.0241,0.045954,0.988938,0.989,0.98896,0.989
4,0.0157,0.049494,0.988827,0.988766,0.98877,0.988766
5,0.01,0.051928,0.988718,0.988747,0.988729,0.988747
6,0.0085,0.057711,0.988463,0.988416,0.988424,0.988416


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


In [15]:


# Function to provide meanings for the NER tags
def explain_ner_tag(tag):
    tag_explanations = {
        'O': 'Outside of any entity',
        'B-PER': 'Beginning of a Person entity',
        'I-PER': 'Inside a Person entity',
        'B-LOC': 'Beginning of a Location entity',
        'I-LOC': 'Inside a Location entity',
        'B-ORG': 'Beginning of an Organization entity',
        'I-ORG': 'Inside an Organization entity',
        'B-MISC': 'Beginning of a Miscellaneous entity',
        'I-MISC': 'Inside a Miscellaneous entity',
    }
    return tag_explanations.get(tag, "Unknown tag")

# Function to predict NER tags and explain them
def predict_with_model(model, tokenizer, sentence):
    model.eval()
    tokens = tokenizer(sentence, return_tensors="pt", truncation=True, is_split_into_words=False).to(device)
    with torch.no_grad():
        output = model(**tokens)
    predictions = torch.argmax(output.logits, dim=2)
    predicted_labels = [id_to_label[i.item()] for i in predictions[0]]

    # Include explanations with predictions
    explained_predictions = []
    for token, tag in zip(tokenizer.tokenize(sentence), predicted_labels):
        explained_predictions.append((token, tag, explain_ner_tag(tag)))
    
    return explained_predictions

# Example usage for prediction and explanation
sentence = "i am Md Sazzat Hossain. I love Bangladesh"
model, tokenizer = load_model()  # Assuming the load_model function is defined
predictions = predict_with_model(model, tokenizer, sentence)

# Print predictions with explanations
for token, tag, explanation in predictions:
    print(f"Token: {token}, Tag: {tag}, Meaning: {explanation}")


Token: i, Tag: O, Meaning: Outside of any entity
Token: am, Tag: O, Meaning: Outside of any entity
Token: md, Tag: O, Meaning: Outside of any entity
Token: sa, Tag: B-PER, Meaning: Beginning of a Person entity
Token: ##zza, Tag: I-PER, Meaning: Inside a Person entity
Token: ##t, Tag: I-PER, Meaning: Inside a Person entity
Token: ho, Tag: I-PER, Meaning: Inside a Person entity
Token: ##ssa, Tag: I-PER, Meaning: Inside a Person entity
Token: ##in, Tag: I-PER, Meaning: Inside a Person entity
Token: ., Tag: I-PER, Meaning: Inside a Person entity
Token: i, Tag: O, Meaning: Outside of any entity
Token: love, Tag: O, Meaning: Outside of any entity
Token: bangladesh, Tag: O, Meaning: Outside of any entity


In [None]:
def train_model(model, train_loader, val_loader, label2idx, num_epochs=5, learning_rate=0.001):
    loss_function = nn.CrossEntropyLoss(ignore_index=-100)  # Ignore padding labels
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        total_loss = 0
        
        for sentences, labels in train_loader:
            model.zero_grad()
            tag_scores = model(sentences)
            
            # Flatten the output and labels to compute the loss
            tag_scores = tag_scores.view(-1, len(label2idx))
            labels = labels.view(-1)

            loss = loss_function(tag_scores, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss:.4f}")

# Initialize and train the model
vocab_size = len(word2idx)
tagset_size = len(label2idx)

model = BiLSTMNERModel(vocab_size, tagset_size)
train_model(model, train_loader, val_loader, label2idx)
