In [None]:
!pip install datasets
!pip install torchmetrics

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torchmetrics

In [None]:
from datasets import load_dataset

dataset = load_dataset("sst2")

#  you can use any of the following config names as a second argument:
#  "ax", "cola", "mnli", "mnli_matched",
#  "mnli_mismatched", "mrpc", "qnli", "qqp",
#  "rte", "sst2", "stsb", "wnli"

In [None]:
dataset

In [None]:
from transformers import DistilBertTokenizer, DistilBertModel
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
dbert_model = DistilBertModel.from_pretrained("distilbert-base-uncased")


In [None]:
## Data prep ##
#1-Train#
seq_len=128
train_x_in=[]
train_x_mask=[]
train_y=[]
for i in range(dataset['train'].num_rows):

  temp=dataset['train'][i]
  sent=temp["sentence"]
  label=temp['label']
  tok=tokenizer(sent, return_tensors='pt')
  #token tensor and mask
  input=tok.input_ids
  mask=tok.attention_mask

  if len(input[0])<=seq_len:
    z=torch.zeros(seq_len-len(input[0]))
    # print(z.shape)

    input=torch.cat((input[0],z))
    mask=torch.cat((mask[0],z))
    # print(input.shape,mask.shape)

    train_x_in.append(input)
    train_x_mask.append(mask)

    train_y.append(label)


train_x_in=torch.stack(train_x_in)
train_x_in=train_x_in.to(torch.long)

# train_x_in=torch.LongTensor(train_x_in)

train_x_mask=torch.stack(train_x_mask)
train_x_mask=train_x_mask.to(torch.long)

# train_x_mask=torch.LongTensor(train_x_mask)

train_y=torch.LongTensor(train_y)


#2-val#
val_x_in=[]
val_x_mask=[]
val_y=[]
for i in range(dataset['validation'].num_rows):

  temp=dataset['validation'][i]
  sent=temp["sentence"]
  label=temp['label']
  tok=tokenizer(sent, return_tensors='pt')
  #token tensor and mask
  input=tok.input_ids
  mask=tok.attention_mask

  if len(input[0])<=seq_len:
    z=torch.zeros(seq_len-len(input[0]))


    input=torch.cat((input[0],z))
    mask=torch.cat((mask[0],z))



    val_x_in.append(input)
    val_x_mask.append(mask)
    val_y.append(label)


val_x_in=torch.stack(val_x_in)
val_x_in=val_x_in.to(torch.long)
# val_x_in=torch.LongTensor(val_x_in)

val_x_mask=torch.stack(val_x_mask)
val_x_mask=val_x_mask.to(torch.long)
# val_x_mask=torch.LongTensor(val_x_mask)

val_y=torch.LongTensor(val_y)


#3-test#
test_x_in=[]
test_x_mask=[]
test_y=[]
for i in range(dataset['test'].num_rows):

  temp=dataset['test'][i]
  sent=temp["sentence"]
  label=temp['label']
  tok=tokenizer(sent, return_tensors='pt')
  #token tensor and mask
  input=tok.input_ids
  mask=tok.attention_mask
  if len(input[0])<=seq_len:
    z=torch.zeros(seq_len-len(input[0]))

    input=torch.cat((input[0],z))
    mask=torch.cat((mask[0],z))

    test_x_in.append(input)
    test_x_mask.append(mask)
    test_y.append(label)


test_x_in=torch.stack(test_x_in)
test_x_in=test_x_in.to(torch.long)
# test_x_in=torch.LongTensor(test_x_in)

test_x_mask=torch.stack(test_x_mask)
test_x_mask=test_x_mask.to(torch.long)
# test_x_mask=torch.LongTensor(test_x_mask)

test_y=torch.LongTensor(test_y)




In [None]:
train_x_in.dtype

In [None]:
dbert_model.parameters

In [None]:
dbert_model.config.dim

In [None]:
classifier=nn.Sequential(
    nn.Linear(dbert_model.config.dim,512),
    nn.ReLU(),
    nn.Linear(512,128),
    nn.ReLU(),
    nn.Linear(128,2),
    nn.Softmax()
)

In [None]:
class SentimentAnalysis(nn.Module):
  def __init__(self):
    super().__init__()
    self.m1=dbert_model
    self.m2=classifier
  def forward(self,input,mask):
    x=self.m1(input,mask).last_hidden_state
    x=torch.squeeze(x, 0)
    # print(x.shape)
    x=torch.mean(x, 1)
    # print(x.shape)
    x=self.m2(x)

    return x



In [None]:
model=SentimentAnalysis()

In [None]:
model.parameters

In [None]:
train_x_in=train_x_in.to('cuda')
train_x_mask=train_x_mask.to('cuda')
train_y=train_y.to('cuda')
val_x_in=val_x_in.to('cuda')
val_x_mask=val_x_mask.to('cuda')
val_y=val_y.to('cuda')
test_x_in=test_x_in.to('cuda')
test_x_mask=test_x_mask.to('cuda')
test_y=test_y.to('cuda')
model=model.to('cuda')

In [None]:
from torch.utils.data import DataLoader,TensorDataset
train_dataset=TensorDataset(train_x_in,train_x_mask,train_y)
val_dataset=TensorDataset(val_x_in,val_x_mask,val_y)
test_dataset=TensorDataset(test_x_in,test_x_mask,test_y)



train = DataLoader(train_dataset, batch_size=64, shuffle=True)
val=DataLoader(val_dataset, batch_size=64, shuffle=True)
test = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
len(train)

In [None]:
epochs=5

criterion=nn.CrossEntropyLoss()
opt=torch.optim.Adam(model.parameters(), lr=0.0001)
accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=2).to('cuda')

for epoch in range(epochs):
  avg_train_acc=0
  avg_val_acc=0
  count=0
  avg_train_loss=0
  avg_val_loss=0
  precision=0
  recall=0
  fscore=0

  for batch in train:
    count=count+1
    print(f'Epoch {epoch} Batch no.: {count}')
    X_batch_in,X_batch_mask,label_batch = batch
    # print(X_batch.shape)
    preds=model(X_batch_in,X_batch_mask)

    loss=criterion(preds,label_batch)
    acc=accuracy(preds,label_batch)
    avg_train_acc=avg_train_acc+acc
    avg_train_loss=avg_train_loss+loss

    opt.zero_grad()
    loss.backward()
    opt.step()


  with torch.no_grad():
    for batch in val:
      X_val_batch_in,X_val_batch_mask,label_val_batch = batch


      val_preds=model(X_val_batch_in,X_val_batch_mask)
      val_loss=criterion(val_preds,label_val_batch)
      val_acc=accuracy(val_preds,label_val_batch)
      avg_val_acc=avg_val_acc+val_acc
      avg_val_loss=avg_val_loss+val_loss
      precision1, recall1, fscore1, sup = sklearn.metrics.precision_recall_fscore_support(y_2, Predicted_Label_2, average='weighted')
      precision=precision + precision1
      recall=recall+recall1
      fscore=fscore+fscore1



  # print(f'Epoch {epoch}: Training Loss:  Training accuracy: , Validation Loss: Validation accuracy:')
  # if epoch%5==0:
  print(f"| Epoch={epoch} | Training Accuracy={avg_train_acc/len(train)} | Validation Accuracy={avg_val_acc/len(val)} | Training Loss={avg_train_loss/len(train)} | Validation_Loss={avg_val_loss/len(val)} |")
  print(f"P={precision/len(iterator)}, R={recall/len(iterator)}, F1={fscore/len(iterator)}")
  print('-------------------------------------------------------------------------------------------------------------------------------------------------------------------------')




## Test acc:
with torch.no_grad():
    for batch in test:
      X_test_batch_in,X_test_batch_mask,label_test_batch = batch


      test_preds=model(X_test_batch_in,X_test_batch_mask)
      test_loss=criterion(test_preds,label_test_batch)
      test_acc=accuracy(test_preds,label_test_batch)
      avg_test_acc=avg_test_acc+test_acc
      avg_test_loss=avg_test_loss+test_loss


print(f'Test loss: {avg_test_loss/len(test)} | Test acc: {avg_test_acc/len(test)}')

new_model=new_model.to('cpu')
# torch.save(new_model,'/content/drive/MyDrive/NLPCourse/test_model.pt')


In [None]:
with torch.no_grad():
    for batch in test:
      X_test_batch_in,X_test_batch_mask,label_test_batch = batch


      test_preds=model(X_test_batch_in,X_test_batch_mask)
      test_loss=criterion(test_preds,label_test_batch)
      test_acc=accuracy(test_preds,label_test_batch)
      avg_test_acc=avg_test_acc+test_acc
      avg_test_loss=avg_test_loss+test_loss


print(f'Test loss: {avg_test_loss/len(test)} | Test acc: {avg_test_acc/len(test)}')