In [9]:
import torch 
from transformers import BertTokenizer, BertForSequenceClassification
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torch.nn as nn
import torch.nn.functional as F

In [10]:
pretrained_model_name = 'bert-base-uncased'
tokenizer=BertTokenizer.from_pretrained(pretrained_model_name)
model=BertForSequenceClassification.from_pretrained(pretrained_model_name,num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [12]:
train_texts=["I love programming.", "I hate bugs."]
train_labels=torch.tensor([1, 0]).to(device)

In [13]:
encoded_train=tokenizer(train_texts,padding=True,truncation=True,max_length=128,return_tensors='pt')
train_input_ids=encoded_train['input_ids'].to(device)
train_attention_mask=encoded_train['attention_mask'].to(device)

In [None]:
#define TensorDataset

In [14]:
train_dataset=TensorDataset(train_input_ids,train_attention_mask,train_labels)
train_dataloader=DataLoader(train_dataset,batch_size=2,shuffle=True)

In [15]:
optimizer=AdamW(model.parameters(),lr=2e-5)

In [16]:
#train the model kalke
epochs=5 
model.train()

for epoch in range(epochs):
    total_loss=0
    correct=0
    total=0 
    for batch in train_dataloader:
        batch_input_ids,batch_attention_mask,batch_labels=batch
        optimizer.zero_grad()

        outputs=model(input_ids=batch_input_ids,attention_mask=batch_attention_mask,labels=batch_labels)
        loss=outputs.loss
        logits=outputs.logits

        total_loss+=loss.item()
        loss.backward()
        optimizer.step()

        preds=torch.argmax(F.softmax(logits,dim=1),dim=1)
        correct+=(preds==batch_labels).sum().item()
        total+=batch_labels.size(0)

    avg_loss=total_loss/len(train_dataloader)
    accuracy=correct/total*100
    print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')

Epoch 1/5, Loss: 0.7243, Accuracy: 50.00%
Epoch 2/5, Loss: 0.6636, Accuracy: 50.00%
Epoch 3/5, Loss: 0.6167, Accuracy: 50.00%
Epoch 4/5, Loss: 0.5789, Accuracy: 100.00%
Epoch 5/5, Loss: 0.5703, Accuracy: 100.00%


In [17]:
torch.save(model.state_dict(),'bert_sequence_classification.pth')