In [None]:
!pip install datasets
!pip install torchmetrics

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torchmetrics

from datasets import load_dataset

dataset = load_dataset("glue", "stsb")

#  you can use any of the following config names as a second argument:
#  "ax", "cola", "mnli", "mnli_matched",
#  "mnli_mismatched", "mrpc", "qnli", "qqp",
#  "rte", "sst2", "stsb", "wnli"

dataset



In [None]:
from transformers import DistilBertTokenizer, DistilBertModel
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
dbert_model = DistilBertModel.from_pretrained("distilbert-base-uncased")


In [None]:
## Data prep ##
#1-Train#
seq_len=128

train_x_in=[]
train_x_mask=[]
train_y=[]
for i in range(dataset['train'].num_rows):

  temp=dataset['train'][i]
  sent1=temp["sentence1"]
  sent2=temp['sentence2']
  label=temp['label']
  tok1=tokenizer(sent1, return_tensors='pt')
  tok2=tokenizer(sent2, return_tensors='pt')
  #token tensor and mask
  input1=tok1.input_ids
  mask1=tok1.attention_mask

  input2=tok2.input_ids
  mask2=tok2.attention_mask

  input=[]
  mask=[]
  input=torch.cat((input1,input2),1)
  mask=torch.cat((mask1,mask2),1)

  if len(input[0])<=seq_len:
    z=torch.zeros(seq_len-len(input[0]))
    # print(z.shape)

    input=torch.cat((input[0],z))
    mask=torch.cat((mask[0],z))
    # print(input.shape,mask.shape)

    train_x_in.append(input)
    train_x_mask.append(mask)

    train_y.append(label)



train_x_in=torch.stack(train_x_in)
train_x_in=train_x_in.to(torch.long)



# train_x_in=torch.LongTensor(train_x_in)

train_x_mask=torch.stack(train_x_mask)
train_x_mask=train_x_mask.to(torch.long)


# train_x_mask=torch.LongTensor(train_x_mask)

train_y=torch.LongTensor(train_y)


#2-val#
val_x_in=[]
val_x_mask=[]

val_y=[]
for i in range(dataset['validation'].num_rows):

  temp=dataset['validation'][i]
  sent1=temp["sentence1"]
  sent2=temp['sentence2']
  label=temp['label']
  tok1=tokenizer(sent1, return_tensors='pt')
  tok2=tokenizer(sent2, return_tensors='pt')
  #token tensor and mask
  input1=tok1.input_ids
  mask1=tok1.attention_mask

  input2=tok2.input_ids
  mask2=tok2.attention_mask

  input=[]
  mask=[]
  input=torch.cat((input1,input2),1)
  mask=torch.cat((mask1,mask2),1)

  if len(input[0])<=seq_len:
    z=torch.zeros(seq_len-len(input[0]))
    # print(z.shape)

    input=torch.cat((input[0],z))
    mask=torch.cat((mask[0],z))
    # print(input.shape,mask.shape)

    val_x_in.append(input)
    val_x_mask.append(mask)

    val_y.append(label)



val_x_in=torch.stack(val_x_in)
val_x_in=val_x_in.to(torch.long)


# train_x_in=torch.LongTensor(train_x_in)

val_x_mask=torch.stack(val_x_mask)
val_x_mask=val_x_mask.to(torch.long)

# train_x_mask=torch.LongTensor(train_x_mask)
val_y=torch.LongTensor(val_y)

#3-test#
test_x_in=[]
test_x_mask=[]


test_y=[]
for i in range(dataset['test'].num_rows):

  temp=dataset['test'][i]
  sent1=temp["sentence1"]
  sent2=temp['sentence2']
  label=temp['label']
  tok1=tokenizer(sent1, return_tensors='pt')
  tok2=tokenizer(sent2, return_tensors='pt')
  #token tensor and mask
  input1=tok1.input_ids
  mask1=tok1.attention_mask

  input2=tok2.input_ids
  mask2=tok2.attention_mask

  input=[]
  mask=[]
  input=torch.cat((input1,input2),1)
  mask=torch.cat((mask1,mask2),1)

  if len(input[0])<=seq_len:
    z=torch.zeros(seq_len-len(input[0]))
    # print(z.shape)

    input=torch.cat((input[0],z))
    mask=torch.cat((mask[0],z))
    # print(input.shape,mask.shape)


    test_x_in.append(input)
    test_x_mask.append(mask)
    test_y.append(label)




test_x_in=torch.stack(test_x_in)
test_x_in=test_x_in.to(torch.long)


# train_x_in=torch.LongTensor(train_x_in)

test_x_mask=torch.stack(test_x_mask)
test_x_mask=test_x_mask.to(torch.long)

# train_x_mask=torch.LongTensor(train_x_mask)

test_y=torch.LongTensor(test_y)


In [None]:
train_x_in.shape,val_x_in.shape,test_x_in.shape

In [None]:
classifier=nn.Sequential(
    nn.Linear(dbert_model.config.dim,512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512,128),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(128,1),

)

class SentimentAnalysis(nn.Module):
  def __init__(self):
    super().__init__()
    self.m1=dbert_model
    self.m2=classifier
  def forward(self,input,mask):
    x=self.m1(input,mask).last_hidden_state
    x=torch.squeeze(x, 0)
    # print(x.shape)
    x=torch.mean(x, 1)
    # print(x.shape)
    x=self.m2(x)

    return x

model=SentimentAnalysis()

In [None]:
model.parameters

In [None]:
train_x_in=train_x_in.to('cuda')
train_x_mask=train_x_mask.to('cuda')
train_y=train_y.to('cuda')
val_x_in=val_x_in.to('cuda')
val_x_mask=val_x_mask.to('cuda')
val_y=val_y.to('cuda')
test_x_in=test_x_in.to('cuda')
test_x_mask=test_x_mask.to('cuda')
test_y=test_y.to('cuda')
model=model.to('cuda')

In [None]:
from torch.utils.data import DataLoader,TensorDataset
train_dataset=TensorDataset(train_x_in,train_x_mask,train_y)
val_dataset=TensorDataset(val_x_in,val_x_mask,val_y)
test_dataset=TensorDataset(test_x_in,test_x_mask,test_y)

train = DataLoader(train_dataset, batch_size=64, shuffle=True)
val=DataLoader(val_dataset, batch_size=64, shuffle=True)
test = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
from torchmetrics.regression import SpearmanCorrCoef
from torchmetrics.regression import PearsonCorrCoef
epochs=10

criterion=nn.MSELoss()
opt=torch.optim.Adam(model.parameters(), lr=0.0001)

# spearman_train = SpearmanCorrCoef()
# pearson_train = PearsonCorrCoef()

# spearman_val= SpearmanCorrCoef()
# pearson_val = PearsonCorrCoef()

spearman_train = SpearmanCorrCoef().to('cuda')
pearson_train = PearsonCorrCoef().to('cuda')

spearman_val= SpearmanCorrCoef().to('cuda')
pearson_val = PearsonCorrCoef().to('cuda')

In [None]:

for epoch in range(epochs):
  avg_train_acc=0
  avg_val_acc=0
  count=0
  avg_train_loss=0
  avg_val_loss=0

  for batch in train:
    count=count+1
    print(f'Epoch {epoch} Batch no.: {count}')
    X_batch_in,X_batch_mask,label_batch = batch
    label_batch = label_batch.float()

    # print(X_batch.shape)
    preds=model(X_batch_in,X_batch_mask)

    preds=torch.squeeze(preds,1)

    loss=criterion(preds,label_batch)
    # label_batch=label_batch.to('cpu')
    # label_batch=label_batch.float()
    # print(preds.dtype,label_batch.dtype)


    spearman_train.update(preds,label_batch)
    pearson_train.update(preds,label_batch)

    #avg_train_acc=avg_train_acc+acc
    avg_train_loss=avg_train_loss+loss

    # print(loss.dtype)
    # loss=loss.float()
    opt.zero_grad()
    loss.backward()
    opt.step()


  with torch.no_grad():
    for batch in val:
      X_val_batch_in,X_val_batch_mask,label_val_batch = batch

      label_val_batch=label_val_batch.float()

      val_preds=model(X_val_batch_in,X_val_batch_mask)
      val_preds=torch.squeeze(val_preds,1)

      val_loss=criterion(val_preds,label_val_batch)

      spearman_val.update(val_preds,label_val_batch)
      pearson_val.update(val_preds,label_val_batch)
      #avg_val_acc=avg_val_acc+val_acc
      avg_val_loss=avg_val_loss+val_loss



  # print(f'Epoch {epoch}: Training Loss:  Training accuracy: , Validation Loss: Validation accuracy:')
  # if epoch%5==0:
  print(f"| Epoch={epoch} | SpearmanTrain={spearman_train.compute()} | PearsonTrain={pearson_train.compute()} | SpearmanVal={spearman_val.compute()}  | PearsonVal={pearson_val.compute()}  | Training Loss={avg_train_loss/len(train)} | Validation_Loss={avg_val_loss/len(val)} |")
  print('-------------------------------------------------------------------------------------------------------------------------------------------------------------------------')




## Test acc:
#with torch.no_grad():
#    for batch in test:
#      X_test_batch_in,X_test_batch_mask,label_test_batch = batch


 #     test_preds=model(X_test_batch_in,X_test_batch_mask)
#      test_loss=criterion(test_preds,label_test_batch)
#      test_acc=accuracy(test_preds,label_test_batch)
#      avg_test_acc=avg_test_acc+test_acc
#      avg_test_loss=avg_test_loss+test_loss


#print(f'Test loss: {avg_test_loss/len(test)} | Test acc: {avg_test_acc/len(test)}')

#new_model = model.to('cpu')
torch.save(model,'test_model_distilbert_stsb.pt')

# test_accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=2).to('cuda')

# with torch.no_grad():
#     for batch in test:
#       X_test_batch_in,X_test_batch_mask,label_test_batch = batch


#       test_preds=model(X_test_batch_in,X_test_batch_mask)
#       test_loss=criterion(test_preds,label_test_batch)
#       test_accuracy.update(test_preds,label_test_batch)
#       avg_test_loss=avg_test_loss+test_loss


# print(f'Test loss: {avg_test_loss/len(test)} | Test acc: test_accuracy.compute() * 100')

