In [1]:
# Import necessary libraries
import torch
import joblib
import yaml
import os
from src.models.bilstm_crf_model import BiLSTM_CRF

# Load configuration
config_path = 'config/bilstm_crf_config.yaml'
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

# Load the trained model
vocab_size = 10000  # Adjust based on your actual vocab size
tagset_size = 18    # Adjust based on your actual tag size

model = BiLSTM_CRF(vocab_size, tagset_size, config['model']['embedding_dim'], config['model']['hidden_dim'])
model.eval()
print("Model loaded successfully")

# Load a small batch of data
data_path = 'data/processed/bilstm_train.pkl'
X, y = joblib.load(data_path)

# Take a small sample sentence
sent = torch.tensor(X[0], dtype=torch.long).unsqueeze(0)  # Batch size 1
print("Sample Sentence:", sent)
print("Sample Sentence Shape:", sent.shape)


Model loaded successfully
Sample Sentence: tensor([[ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 12, 18,
         19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]])
Sample Sentence Shape: torch.Size([1, 31])


In [2]:
# Check input dimensions
print("Input Sentence (Batch First):", sent)
print("Input Shape:", sent.shape)


Input Sentence (Batch First): tensor([[ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 12, 18,
         19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]])
Input Shape: torch.Size([1, 31])


In [3]:
with torch.no_grad():
    embeds = model.word_embeds(sent)
    print("Embedding Output Shape:", embeds.shape)
    print("Embedding Output (First 5 Tokens):", embeds[0, :5])


Embedding Output Shape: torch.Size([1, 31, 100])
Embedding Output (First 5 Tokens): tensor([[ 4.3985e-01, -1.6485e+00, -8.8791e-01,  7.1116e-01,  4.6108e-02,
         -5.5073e-01, -5.6396e-01,  1.8835e+00,  9.9901e-01,  7.9519e-01,
          1.2114e+00,  5.0557e-01, -8.4548e-01, -1.4257e+00,  3.0945e-01,
          1.6079e-01,  2.0867e-01, -2.5437e-01, -4.2353e-02,  8.7499e-02,
          3.8745e-01, -9.9284e-01, -1.7035e+00,  7.5859e-01,  8.5649e-01,
         -3.1103e-01, -1.4060e+00,  1.5158e+00, -8.5780e-01,  7.0593e-01,
          5.4726e-01,  1.6098e+00, -4.8729e-02,  3.9417e-01, -1.4241e-01,
          1.0993e+00, -6.5213e-01,  3.0283e-01, -8.7332e-01, -7.2792e-01,
          9.0189e-01,  2.2705e+00, -1.4012e+00,  1.0091e-01,  2.5080e-01,
          1.1738e+00,  5.3529e-01,  2.8089e-01,  5.6923e-01, -8.6394e-01,
         -1.4258e+00,  1.8027e+00,  6.5605e-01, -2.2597e+00, -5.5891e-02,
          1.3291e+00,  5.0488e-01,  7.1280e-01,  1.9835e+00, -1.4426e+00,
         -8.7028e-01, -1.530

In [4]:
with torch.no_grad():
    lstm_out, _ = model.lstm(embeds)
    print("LSTM Output Shape:", lstm_out.shape)
    print("LSTM Output (First 5 Steps):", lstm_out[0, :5])


LSTM Output Shape: torch.Size([1, 31, 256])
LSTM Output (First 5 Steps): tensor([[ 0.0250,  0.0636,  0.0290,  ..., -0.0281, -0.0535, -0.0312],
        [ 0.0115,  0.1023, -0.0111,  ..., -0.0363, -0.0654, -0.0264],
        [ 0.0255,  0.0957, -0.0360,  ..., -0.0329, -0.0690, -0.0196],
        [ 0.0453,  0.1414, -0.0528,  ..., -0.0706, -0.0470,  0.0163],
        [ 0.0634,  0.1432, -0.0053,  ..., -0.0705, -0.0318,  0.0138]])


In [5]:
with torch.no_grad():
    lstm_feats = model.hidden2tag(lstm_out)
    print("LSTM Features (Emission Scores) Shape:", lstm_feats.shape)
    print("Emission Scores (First 5 Steps):", lstm_feats[0, :5])


LSTM Features (Emission Scores) Shape: torch.Size([1, 31, 18])
Emission Scores (First 5 Steps): tensor([[-0.0495,  0.0672,  0.0499,  0.0187, -0.0161, -0.0563, -0.0086,  0.0611,
          0.0098, -0.0463, -0.0643, -0.0516, -0.0005,  0.0629, -0.0445,  0.0563,
          0.0398, -0.0306],
        [-0.0445,  0.0721,  0.0708,  0.0387, -0.0183, -0.0586, -0.0289,  0.0714,
          0.0216, -0.0461, -0.0523, -0.0450, -0.0090,  0.0646, -0.0499,  0.0555,
          0.0324, -0.0260],
        [-0.0516,  0.0409,  0.0768,  0.0558, -0.0067, -0.0431, -0.0306,  0.0650,
          0.0266, -0.0449, -0.0424, -0.0324, -0.0041,  0.0634, -0.0448,  0.0627,
          0.0333, -0.0157],
        [-0.0694,  0.0413,  0.0526,  0.0414,  0.0116, -0.0535, -0.0031,  0.0542,
          0.0328, -0.0594, -0.0390, -0.0342, -0.0028,  0.0564, -0.0555,  0.0886,
          0.0384,  0.0009],
        [-0.0706,  0.0556,  0.0507,  0.0097,  0.0134, -0.0671, -0.0010,  0.0614,
          0.0459, -0.0553, -0.0510, -0.0412,  0.0107,  0.0589, 

In [6]:
try:
    score, tag_seq = model._viterbi_decode(lstm_feats)
    print("Viterbi Decoding Score:", score)
    print("Predicted Tag Sequence:", tag_seq)
except Exception as e:
    print("Error during Viterbi Decoding:", str(e))


Viterbi Decoding Score: tensor([-19906.9805], grad_fn=<IndexBackward0>)
Predicted Tag Sequence: [[12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12]]


In [7]:
# Check the transition matrix values
print("Transition Matrix:")
print(model.transitions)
print("Transition Matrix Shape:", model.transitions.shape)

# Check for any dominant values (indicative of initialization issues)
max_val = torch.max(model.transitions)
min_val = torch.min(model.transitions)
print(f"Max Transition Value: {max_val.item()}, Min Transition Value: {min_val.item()}")


Transition Matrix:
Parameter containing:
tensor([[-1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
         -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
         -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
         -1.0000e+04, -1.0000e+04, -1.0000e+04],
        [-1.0000e+04, -1.6847e+00, -1.4650e+00,  3.8129e-01, -8.0737e-03,
          2.0782e+00,  1.0535e-02, -3.5243e-01,  4.2435e-01,  8.4695e-01,
         -1.1362e-01,  4.5446e-01, -1.0574e+00,  9.4980e-01,  9.9600e-01,
         -3.2728e-01, -3.5063e-01,  5.1021e-01],
        [-1.0000e+04, -1.7474e+00, -5.0859e-02, -9.2646e-01,  1.0532e+00,
         -7.9413e-02,  7.8410e-02, -2.6374e-01, -6.8105e-02,  1.2402e+00,
         -2.7998e-02, -1.0916e+00, -2.3756e-01,  2.6666e-01, -9.6932e-01,
          5.5455e-02, -8.6848e-01, -2.5927e-01],
        [-1.0000e+04, -1.8121e-02, -1.4713e+00, -1.6999e+00, -8.7816e-01,
          1.8334e-01, -7.9039e-01, -1.8654e+00,  7.5694e-01, -1.0451e+00

In [8]:
# Create a sample tag tensor for testing the loss calculation
tags = torch.tensor(y[0], dtype=torch.long).unsqueeze(0)

# Calculate the negative log-likelihood loss
try:
    loss = model.neg_log_likelihood(sent, tags)
    print("Negative Log-Likelihood Loss:", loss.item())
except Exception as e:
    print("Error during NLL calculation:", str(e))


Negative Log-Likelihood Loss: -9889.203125


In [9]:
# Run a full forward pass to get the score and predicted tag sequence
try:
    score, tag_seq = model.forward(sent)
    print("Full Forward Pass - Viterbi Decoding Score:", score)
    print("Predicted Tag Sequence from Forward Pass:", tag_seq)
except Exception as e:
    print("Error during Full Forward Pass:", str(e))


Full Forward Pass - Viterbi Decoding Score: tensor([-19906.9805], grad_fn=<IndexBackward0>)
Predicted Tag Sequence from Forward Pass: [[12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12]]


In [10]:
# Set the model to training mode
model.train()

# Define a simple optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Perform one training step
try:
    optimizer.zero_grad()
    loss = model.neg_log_likelihood(sent, tags)
    print("Initial Loss:", loss.item())
    loss.backward()
    optimizer.step()

    # Check loss after one step
    loss_after = model.neg_log_likelihood(sent, tags)
    print("Loss after one training step:", loss_after.item())
    print("Loss decreased:", loss_after.item() < loss.item())
except Exception as e:
    print("Error during loss reduction check:", str(e))


Initial Loss: -9889.203125
Loss after one training step: -9892.375
Loss decreased: True
