In [107]:
import torch
import torch.nn as nn

from transformers import BertTokenizer, BertForSequenceClassification, BertModel

In [108]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
bert = BertModel.from_pretrained('bert-base-uncased', output_hidden_states=False)

In [109]:
max_len = 30
ids = tokenizer.encode("Hello my dog is cute", max_len=max_len, pad_to_max_length=True)

In [110]:
from torch.utils.data import Dataset, DataLoader
from src.utils import prepare_dataset
from transformers import BertTokenizer
import torch
import numpy as np

import warnings
warnings.filterwarnings("ignore")

class IntentData(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.dataframe = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.dataframe.index)

    def __getitem__(self, item):
        row = self.dataframe.loc[item]
        print(row)

        intent = row.intent_idx
        query = row.query

        ids = self.tokenizer.encode(query, 
                    pad_to_max_len=True,
                    max_len=self.max_len)
        
        attention_mask = [int(i > 0) for i in ids]

        return {
            "ids": torch.tensor(ids, dtype=torch.long),
            "attention_mask": torch.tensor(attention_mask, dtype=torch.long),
            "intent": intent
        }

In [111]:
text = "What is my balance?"

In [112]:
ids = tokenizer.encode(text)
ids = ids + [0] * (30 - len(ids))
attention_mask = [int(i > 0) for i in ids]

In [113]:
ids_tensor = torch.tensor(ids, dtype=torch.long).view(1, -1)
attention_mask_tensor = torch.tensor(attention_mask, dtype=torch.long).view(1, -1)

In [114]:
out1, out2 = bert(ids_tensor.view(1, -1))

In [115]:
out1.shape

torch.Size([1, 30, 768])

In [116]:
out2.shape

torch.Size([1, 768])