In [1]:
# from bert_model_implementation_torch.model import BertModel
from bert_model_implementation_torch.tokenization import  _is_control,_is_whitespace,_is_punctuation
from transformers import BertTokenizer, BertModel
from sklearn.model_selection import train_test_split
import torch 
import torch.nn as nn
import numpy as np
import pandas as pd
import sys
from tqdm import tqdm
from keras.src.utils import to_categorical
from torch.utils.data import DataLoader
import shutil

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(
05/18/2024 19:41:52 - INFO - numexpr.utils -   Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
05/18/2024 19:41:52 - INFO - numexpr.utils -   NumExpr defaulting to 8 threads.


In [9]:
data = pd.read_csv("../data/Book1.csv")

In [45]:
data.head()

Unnamed: 0,prompt,intent,classes
0,Can you update the sales figures in cells <Cel...,Update cell range,entry and manipulation
1,Please modify the data in range <Range>,Update cell range,entry and manipulation
2,I need to change the prices in cells <Range>,Update cell range,entry and manipulation
3,Could you update the inventory levels from <Ce...,Update cell range,entry and manipulation
4,Please adjust the budget numbers in range <Range>,Update cell range,entry and manipulation


In [46]:
config = {
    'max_len': 256,
    'batch_size': 8,
    'epochs': 10,
    'lr':1e-05,
    'out_first_layer': 768,
    'dropout_rate': 0.1,
    'model_dir':'bert-base-cased',
    'ckpt_path': './ckpts',
    'ckpt_model_path': './experiments'
}

In [10]:
intent_list = data.intent.unique().tolist()

In [48]:
class intent_dataset:
    def __init__(self, df: pd.DataFrame, tokenizer: BertTokenizer, max_len: int):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.classes = data.intent.unique().tolist()
        self.y = df['intent']
        self.x = df['prompt']
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        title = str(self.x[index])
        title= ''.join(title.split())
        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens = True,
            return_attention_mask = True,
            return_tensors = 'pt',
            return_token_type_ids = True,
            padding = 'max_length',
            max_length = self.max_len,
            truncation = True
        )
        # print(self.classes.index(self.y[index]))
        target = self.classes.index(self.y[index])  # Get the class index
        target_tensor = torch.zeros(len(self.classes),dtype= torch.float32)  # Initialize target tensor with zeros
        target_tensor[target] = 1  # Set the corresponding index to 1
        return {
            'input_ids': inputs["input_ids"].flatten(),
            'token_type_ids': inputs['token_type_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'targets': target_tensor
        }

In [49]:
train = data.sample(frac=0.9, random_state=200).reset_index(drop=True)
val = data.drop(train.index).reset_index(drop=True)

In [50]:
tokenizer = BertTokenizer.from_pretrained(config["model_dir"])
train_dataset = intent_dataset(train, tokenizer, config["max_len"])
val_dataset = intent_dataset(val, tokenizer, config["max_len"])

In [51]:
train_loader = DataLoader(
    train_dataset,
    shuffle = True,
    batch_size = config["batch_size"],
    num_workers = 0
    )

val_loader = DataLoader(
    val_dataset,
    shuffle = False,
    batch_size = config["batch_size"],
    num_workers = 0
    )

In [52]:
train_loader.dataset[10]['targets']


tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.])

In [53]:
len(train_loader.dataset[0]['input_ids'])


256

In [12]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [26]:
def load_ckpt(ckpt_path, model, optimizer):
    ckpt = torch.load(ckpt_path)
    # print(ckpt)
    model.load_state_dict(ckpt['state_dict'])
    # optimizer.load_state_dict(ckpt['state_dict'])
    valid_loss_min = ckpt['valid_loss_min']
    return model

In [3]:
def save_ckpt(state, is_best, ckpt_path, best_model_path):
    f_path= ckpt_path
    torch.save(state, f_path)
    if is_best:
        best_f_path = best_model_path
        shutil.copyfile(f_path, best_f_path)

In [7]:
class intent_model(nn.Module):
    def __init__(self, config: dict, intent_labels: list[str], dropout: float = 0.1):
        super(intent_model,self).__init__()
        self.config = config
        self.intent_labels = intent_labels
        self.dropout_rate = dropout
        self.bert = BertModel.from_pretrained(self.config["model_dir"])
        self.dropout = nn.Dropout(self.dropout_rate)
        self.layer_1 = nn.Linear(self.bert.config.hidden_size,self.config["out_first_layer"])
        self.activation_1 = nn.ReLU()
        self.layer_2 = nn.Linear(self.config["out_first_layer"], len(self.intent_labels))
        
    def forward(self, input_ids: torch.Tensor, token_type_ids: torch.Tensor, attention_mask: torch.Tensor):
        output = self.bert(input_ids, token_type_ids, attention_mask)
        output_dropout = self.dropout(output.pooler_output)
        out_layer_1 = self.layer_1(output_dropout)
        act_1 = self.activation_1(out_layer_1)
        out_layer_2 = self.layer_2(act_1)
        return out_layer_2
        

In [39]:
model = intent_model(config, intent_list, 0.1)
model.to(device)

05/16/2024 01:11:17 - INFO - bert_model_implementation_torch.file_utils -   https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz not found in cache, downloading to C:\Users\kikos\AppData\Local\Temp\tmpcjv867ym
100%|██████████| 404400730/404400730 [07:17<00:00, 925346.54B/s] 
05/16/2024 01:18:36 - INFO - bert_model_implementation_torch.file_utils -   copying C:\Users\kikos\AppData\Local\Temp\tmpcjv867ym to cache at C:\Users\kikos\.pytorch_pretrained_bert\a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c
05/16/2024 01:18:36 - INFO - bert_model_implementation_torch.file_utils -   creating metadata file for C:\Users\kikos\.pytorch_pretrained_bert\a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c
05/16/2024 01:18:36 - INFO - bert_model_implementation_torch.file_utils -   removing temp file C:\Users\kikos\AppData

intent_model(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
 

In [40]:
def loss_fn(outputs, targets):
    targets = targets.float()
    return nn.BCEWithLogitsLoss()(outputs, targets)


In [41]:
def train(model, epochs, train_loader, val_loader, optimizer, ckpt_path, best_model_path):
    valid_loss_min = np.Inf
    for epoch in range(1, epochs + 1):
        train_loss = 0
        valid_loss = 0
        model.train()
        for batch_index, batch in tqdm(enumerate(train_loader)):
            input_ids = batch['input_ids'].to(device, dtype= torch.long)
            attention_mask = batch["attention_mask"].to(device, dtype= torch.long)
            token_type_ids = batch["token_type_ids"].to(device, dtype= torch.long)
            targets = batch["targets"].to(device, dtype= torch.long)
            outputs = model(input_ids, token_type_ids, attention_mask)
            optimizer.zero_grad()
            loss = loss_fn(outputs, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += (1/(batch_index + 1)) * (loss.item() - train_loss)
        print(f"epoch {epoch} ended with train loss of {train_loss}")
        model.eval()
        with torch.no_grad():
            for batch_index, batch in tqdm(enumerate(val_loader)):
                input_ids = batch['input_ids'].to(device, dtype= torch.long)
                attention_mask = batch["attention_mask"].to(device, dtype= torch.long)
                token_type_ids = batch["token_type_ids"].to(device, dtype= torch.long)
                targets = batch["targets"].to(device, dtype= torch.long)
                outputs = model(input_ids, token_type_ids, attention_mask)
                loss = loss_fn(outputs, targets)
                valid_loss += (1/(batch_index + 1)) * (loss.item() - valid_loss)
        print(f"epoch {epoch} ended with train loss of {valid_loss}")
        checkpoint = {
            'epoch': epoch +1,
            'valid_loss_min': valid_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        save_ckpt(checkpoint, False, ckpt_path, best_model_path)
        return model

In [42]:
train = train(model, config["epochs"], train_loader, val_loader, optimizer, config["ckpt_path"], config["ckpt_model_path"])

0it [00:02, ?it/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [33]:
config = {
    'max_len': 256,
    'batch_size': 8,
    'epochs': 10,
    'lr':1e-05,
    'out_first_layer': 768,
    'dropout_rate': 0.1,
    'model_dir':'bert-base-cased',
    'ckpt_path': './ckpts',
    'ckpt_model_path': './experiments'
}
import torch.nn.functional as F

In [54]:
class intent_model(nn.Module):
    def __init__(self, config: dict, intent_labels: list[str], dropout: float = 0.1):
        super(intent_model, self).__init__()
        self.config = config
        self.intent_labels = intent_labels
        self.dropout_rate = dropout
        self.bert = BertModel.from_pretrained(self.config["model_dir"])
        self.dropout = nn.Dropout(self.dropout_rate)
        self.layer_1 = nn.Linear(self.bert.config.hidden_size, self.config["out_first_layer"])
        self.activation_1 = nn.ReLU()
        self.layer_2 = nn.Linear(self.config["out_first_layer"], len(self.intent_labels))
        
    def forward(self, input_ids: torch.Tensor, token_type_ids: torch.Tensor, attention_mask: torch.Tensor):
        output = self.bert(input_ids, token_type_ids, attention_mask)
        output_dropout = self.dropout(output.pooler_output)
        out_layer_1 = self.layer_1(output_dropout)
        act_1 = self.activation_1(out_layer_1)
        out_layer_2 = self.layer_2(act_1)
        return out_layer_2
    
    def predict_intent(self, text: str, tokenizer: BertTokenizer):
        # Tokenize input text
        inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=128)
        input_ids = inputs['input_ids']
        token_type_ids = inputs['token_type_ids']
        attention_mask = inputs['attention_mask']
        
        # Move tensors to the appropriate device
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(device)
        input_ids = input_ids.to(device)
        token_type_ids = token_type_ids.to(device)
        attention_mask = attention_mask.to(device)
        
        # Make predictions
        self.eval()  # Set the model to evaluation mode
        with torch.no_grad():
            logits = self(input_ids, token_type_ids, attention_mask)
        
        # Convert logits to probabilities
        probs = F.softmax(logits, dim=1)
        
        # Get the predicted label
        predicted_label_idx = torch.argmax(probs, dim=1).item()
        print(predicted_label_idx)
        predicted_label = self.intent_labels[predicted_label_idx]
        
        return predicted_label, probs[0].cpu().numpy()

In [55]:
tokenizer = BertTokenizer.from_pretrained(config["model_dir"])

In [56]:
optimizer = torch.optim.Adam(params = model.parameters(), lr = config["lr"])

In [57]:
model = intent_model(config, intent_list, 0.1)
model.to(device)

intent_model(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affi

In [58]:
model.bert.resize_token_embeddings(30522)

You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embeding dimension will be 30522. This might induce some performance reduction as *Tensor Cores* will not be available. For more details  about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc


Embedding(30522, 768)

In [59]:
model = load_ckpt("./ckpts/ckpt", model, optimizer)

In [61]:
model.predict_intent("Please modify the data in range", tokenizer)

14


('Delete hyperlink',
 array([0.01483624, 0.00962736, 0.01309677, 0.01109273, 0.01267296,
        0.01391257, 0.01042866, 0.01172547, 0.01188861, 0.01360726,
        0.01299947, 0.01062253, 0.01138435, 0.01929716, 0.01973281,
        0.00815214, 0.01179272, 0.01155274, 0.01214933, 0.01029058,
        0.01119961, 0.00984573, 0.01025925, 0.01101724, 0.01066194,
        0.01142314, 0.01046023, 0.01088517, 0.01101524, 0.01014855,
        0.01382952, 0.00946699, 0.01382294, 0.01145752, 0.01230946,
        0.01451662, 0.0112405 , 0.0141823 , 0.00991062, 0.01725945,
        0.01911117, 0.0094124 , 0.0120395 , 0.00818533, 0.01099228,
        0.01047215, 0.00911422, 0.00737221, 0.01044737, 0.0114465 ,
        0.00703791, 0.00761131, 0.00768395, 0.00895203, 0.00710685,
        0.01233352, 0.00776994, 0.00646441, 0.00676312, 0.00951902,
        0.01012736, 0.01270906, 0.0093776 , 0.01227192, 0.01941985,
        0.011088  , 0.0120331 , 0.01338007, 0.00844096, 0.00996742,
        0.00847649, 0.00942