In [4]:
import numpy as np
from tqdm import tqdm

In [1]:

from transformers import BertTokenizer, BertModel
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = BertTokenizer.from_pretrained("saved_bert_model")
bert_model = BertModel.from_pretrained("saved_bert_model")
bert_model.to(device)
bert_model.eval()

print("✅ Loaded BERT model successfully!")


✅ Loaded BERT model successfully!


In [2]:
complaints = [
    "My credit card was charged twice for the same transaction.",
    "I am unable to access my online banking account since yesterday.",
    "The bank took more than a week to resolve my loan application issue.",
    "Unauthorized withdrawal occurred from my savings account without my consent."
]

In [5]:
def get_bert_embeddings(text_list, tokenizer, model, device, max_len=128):
    embeddings = []
    with torch.no_grad():
        for text in tqdm(text_list, desc="Encoding with BERT"):
            inputs = tokenizer(
                text,
                return_tensors="pt",
                truncation=True,
                padding="max_length",
                max_length=max_len
            ).to(device)
            
            outputs = model(**inputs)
            last_hidden_state = outputs.last_hidden_state  
            cls_embedding = last_hidden_state[:, 0, :].cpu().numpy().flatten()
            embeddings.append(cls_embedding)
    return np.array(embeddings)

# Generate embeddings
complaint_embeddings = get_bert_embeddings(complaints, tokenizer, bert_model, device)

print("\n✅ Shape of embeddings:", complaint_embeddings.shape)
print("\nSample embedding for first complaint:\n", complaint_embeddings[0])

Encoding with BERT: 100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00,  1.22it/s]


✅ Shape of embeddings: (4, 768)

Sample embedding for first complaint:
 [ 4.34767753e-02 -4.47132476e-02 -3.86100739e-01  2.46702340e-02
 -6.60726652e-02  1.65026113e-01  4.85310331e-02  4.39159036e-01
  1.75442815e-01 -1.10568464e-01  4.43437755e-01  2.67092705e-01
  2.89378483e-02 -9.88036916e-02  2.70563930e-01 -5.74001297e-02
  2.38424446e-02  3.83469224e-01  2.65490621e-01 -4.33165848e-01
 -1.28244191e-01  5.20564020e-02  3.13618630e-01  9.80895981e-02
 -1.35004371e-01 -1.12090260e-01  1.55159920e-01 -5.16181365e-02
 -1.01748295e-01  2.61989921e-01  3.37968498e-01 -2.12532934e-02
 -1.97186053e-01  1.58485174e-02  3.58879805e-01 -1.91638395e-01
  1.32425040e-01 -4.16249335e-01  3.96102071e-02 -1.22010820e-01
 -2.03090325e-01  1.87813789e-01  3.75955217e-02  2.46324360e-01
 -1.09980129e-01 -2.22258195e-01 -3.29228449e+00 -9.50225070e-02
 -1.37197137e-01 -9.86055508e-02  3.90766531e-01 -3.66208553e-01
 -2.84864962e-01  2.73249865e-01  3.08385700e-01  3.71734738e-01
 -5.61172068e-01 




In [6]:
import torch
import torch.nn as nn
import numpy as np

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Suppose complaint_embeddings is already created (shape: num_samples x 768)
# complaint_embeddings = get_bert_embeddings(new_texts, tokenizer, bert_model, device)

# Load dimensions
input_dim = complaint_embeddings.shape[1]
output_dim = 3  # Change this if your number of classes is different

# Recreate model architecture (must be same as training)
model = nn.Sequential(
    nn.Linear(input_dim, 512),
    nn.ReLU(),
    nn.Dropout(0.2),

    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Dropout(0.2),

    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(0.2),

    nn.Linear(128, output_dim)
)

model.to(device)
model.load_state_dict(torch.load("best_model.pth", map_location=device))
model.eval()
print("✅ Model loaded successfully!")

# Convert embeddings to torch tensor
X_new = torch.tensor(complaint_embeddings, dtype=torch.float32).to(device)

# Make predictions
with torch.no_grad():
    outputs = model(X_new)
    predicted_classes = torch.argmax(outputs, dim=1).cpu().numpy()

print("Predicted classes:", predicted_classes)


✅ Model loaded successfully!
Predicted classes: [1 1 0 0]
