In [None]:
!git clone https://github.com/aliannejadi/ClariQ

fatal: destination path 'ClariQ' already exists and is not an empty directory.


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import torch
import numpy as np
from transformers import ElectraTokenizer, ElectraForSequenceClassification
from torch import nn
from torch.optim import Adam
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel, ElectraModel
from transformers import set_seed

In [None]:
datapath = '/content/ClariQ/data/train.tsv'
df = pd.read_csv(datapath, sep='\t')
df_train=df.drop_duplicates('initial_request')

In [None]:
datapath = '/content/ClariQ/data/dev.tsv'
df_dev = pd.read_csv(datapath, sep='\t')
df_dev=df_dev.drop_duplicates('initial_request')
df_dev.head()

Unnamed: 0,topic_id,initial_request,topic_desc,clarification_need,facet_id,facet_desc,question_id,question,answer
0,101,Find me information about the Ritz Carlton Lak...,Find information about the Ritz Carlton resort...,2,F0010,Find information about the Ritz Carlton resort...,Q00697,are you looking for a specific web site,yes for the ritz carlton resort at lake las vegas
60,106,I'm looking for universal animal cuts reviews,Find testimonials of Universal Animal Cuts nut...,3,F0028,Find testimonials of Universal Animal Cuts nut...,Q01481,did universal animal cuts work for you,i need testimonials on the universal animal cu...
102,107,tell me about cass county missouri,Find demographic information about Cass County...,3,F0031,Find demographic information about Cass County...,Q00086,are you interested in a list of homes for sale...,no i want demographic info for cass county mo
192,114,Tell about an adobe indian house?,How does one build an adobe house?,2,F0063,How does one build an adobe house?,Q00057,are you going to purchase any specific product...,maybe
231,123,What is von Willebrand Disease?,What is von Willebrand Disease?,3,F0100,What is von Willebrand Disease?,Q00284,are you interested in learning about treatment...,id like to know what it is first


In [None]:
df_train.groupby(['clarification_need'])
df_dev.groupby(['clarification_need'])
set_seed(42)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("google/electra-base-discriminator")

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, df):

        self.labels = [label-1 for label in df['clarification_need']]
        self.texts = [tokenizer(text, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for text in df['initial_request']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

In [None]:
class ElectraClassifier(nn.Module):

    def __init__(self, dropout=0.3):

        super(ElectraClassifier, self).__init__()

        self.electra = ElectraModel.from_pretrained("google/electra-base-discriminator", problem_type="multi_label_classification")
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 4)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):
        pooled_output = self.electra(input_ids=input_id, attention_mask=mask)
        pooled_output=pooled_output.last_hidden_state
        pooled_output=pooled_output[:,-1]
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer

In [None]:
def train(model, train_data, val_data, learning_rate, epochs):

    train =  Dataset(train_data)
    val = Dataset(val_data)

    train_dataloader = torch.utils.data.DataLoader(train, batch_size=8, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=8)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)

    if use_cuda:
            torch.cuda.empty_cache()

            model = model.cuda()
            criterion = criterion.cuda()

    for epoch_num in range(epochs):

            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in tqdm(train_dataloader):

                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask) 
                
                batch_loss = criterion(output, train_label.long())
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
            
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:

                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)

                    output = model(input_id, mask) #

                    batch_loss = criterion(output, val_label.long())
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
                | Train Accuracy: {total_acc_train / len(train_data): .3f} \
                | Val Loss: {total_loss_val / len(val_data): .3f} \
                | Val Accuracy: {total_acc_val / len(val_data): .3f}')

In [None]:
def evaluate(model, test_data):

    test = Dataset(test_data)

    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:

        model = model.cuda()

    total_acc_test = 0
    with torch.no_grad():

        for test_input, test_label in test_dataloader:

              test_label = test_label.to(device)
              mask = test_input['attention_mask'].to(device)
              input_id = test_input['input_ids'].squeeze(1).to(device)

              output = model(input_id, mask)

              acc = (output.argmax(dim=1) == test_label).sum().item()
              total_acc_test += acc
    
    print(f'Test Accuracy: {total_acc_test / len(test_data): .3f}')

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
def eval_result(preds, labels):
    """ Calculate the accuracy, f1, precision, recall of our predictions vs labels
    """

    y_pred = np.argmax(preds, axis=1).flatten()
    y_true = labels.flatten()

    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    accuracy = np.sum(y_pred == y_true) / len(y_true) 

    return (precision, recall, f1, accuracy)

from sklearn.metrics import classification_report

def eval_model( model, epoch,lr,test_dataloader):
    
    if torch.cuda.is_available():    
        device = torch.device("cuda")
        model.cuda()
    else:
        device = torch.device("cpu")


    model.eval()

    test_results = []
    test_labels = []
    test_results_predicted_lavels = [] 
    for test_input, test_label in tqdm(test_dataloader):

      test_label = test_label.to(device)
      mask = test_input['attention_mask'].to(device)
      input_id = test_input['input_ids'].squeeze(1).to(device)
       
      with torch.no_grad():        
        result = model(input_id,mask)

      logits = result
      logits = logits.detach().cpu().numpy()
      test_results.extend(logits.tolist())
      tmp = np.asarray(logits.tolist())
      test_results_predicted_lavels.extend(np.argmax(tmp, axis=1).flatten())
      label_ids = test_label.to('cpu').numpy()
      test_labels.extend(label_ids)

    print(classification_report( np.asarray(test_labels), np.asarray(test_results_predicted_lavels)))
    (precision, recall, f1, accuracy) = eval_result(np.asarray(test_results), np.asarray(test_labels))

    print('Test Precision: {:.4f}, Test Recall: {:.4f}, Test Macro F1: {:.4f}, Test Accuracy: {:.4f} ' .format(precision, recall, f1, accuracy))

In [None]:
EPOCHS = 11
model = ElectraClassifier()
LR = 1e-5
              
train(model, df_train, df_dev, LR, EPOCHS)

Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 24/24 [00:15<00:00,  1.58it/s]


Epochs: 1 | Train Loss:  0.176                 | Train Accuracy:  0.310                 | Val Loss:  0.194                 | Val Accuracy:  0.340


100%|██████████| 24/24 [00:15<00:00,  1.55it/s]


Epochs: 2 | Train Loss:  0.163                 | Train Accuracy:  0.444                 | Val Loss:  0.187                 | Val Accuracy:  0.260


100%|██████████| 24/24 [00:15<00:00,  1.53it/s]


Epochs: 3 | Train Loss:  0.146                 | Train Accuracy:  0.583                 | Val Loss:  0.182                 | Val Accuracy:  0.360


100%|██████████| 24/24 [00:15<00:00,  1.52it/s]


Epochs: 4 | Train Loss:  0.121                 | Train Accuracy:  0.711                 | Val Loss:  0.179                 | Val Accuracy:  0.480


100%|██████████| 24/24 [00:15<00:00,  1.54it/s]


Epochs: 5 | Train Loss:  0.095                 | Train Accuracy:  0.765                 | Val Loss:  0.177                 | Val Accuracy:  0.480


100%|██████████| 24/24 [00:15<00:00,  1.55it/s]


Epochs: 6 | Train Loss:  0.070                 | Train Accuracy:  0.872                 | Val Loss:  0.185                 | Val Accuracy:  0.460


100%|██████████| 24/24 [00:15<00:00,  1.55it/s]


Epochs: 7 | Train Loss:  0.049                 | Train Accuracy:  0.930                 | Val Loss:  0.183                 | Val Accuracy:  0.460


100%|██████████| 24/24 [00:15<00:00,  1.54it/s]


Epochs: 8 | Train Loss:  0.037                 | Train Accuracy:  0.963                 | Val Loss:  0.181                 | Val Accuracy:  0.460


100%|██████████| 24/24 [00:15<00:00,  1.54it/s]


Epochs: 9 | Train Loss:  0.027                 | Train Accuracy:  0.979                 | Val Loss:  0.186                 | Val Accuracy:  0.500


100%|██████████| 24/24 [00:15<00:00,  1.55it/s]


Epochs: 10 | Train Loss:  0.019                 | Train Accuracy:  0.989                 | Val Loss:  0.197                 | Val Accuracy:  0.420


100%|██████████| 24/24 [00:15<00:00,  1.55it/s]


Epochs: 11 | Train Loss:  0.014                 | Train Accuracy:  0.995                 | Val Loss:  0.184                 | Val Accuracy:  0.460


In [None]:
datapath = '/content/ClariQ/data/test_with_labels.tsv'
df_test = pd.read_csv(datapath, sep='\t')
df_test = df_test.drop_duplicates('initial_request')
df_train.groupby(['clarification_need'])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7feb4c93d430>

In [None]:
test = Dataset(df_test)
test_dataloader = torch.utils.data.DataLoader(test, batch_size=8, shuffle=True)
val = Dataset(df_dev)
val_dataloader = torch.utils.data.DataLoader(val, batch_size=8)
eval_model(model, EPOCHS , LR ,test_dataloader)
eval_model(model, EPOCHS , LR ,val_dataloader)

100%|██████████| 8/8 [00:01<00:00,  4.25it/s]


              precision    recall  f1-score   support

           0       0.27      0.38      0.32         8
           1       0.61      0.59      0.60        32
           2       0.50      0.39      0.44        18
           3       0.17      0.25      0.20         4

    accuracy                           0.48        62
   macro avg       0.39      0.40      0.39        62
weighted avg       0.51      0.48      0.49        62

Test Precision: 0.3881, Test Recall: 0.4019, Test Macro F1: 0.3891, Test Accuracy: 0.4839 


100%|██████████| 7/7 [00:01<00:00,  4.69it/s]

              precision    recall  f1-score   support

           0       0.33      0.50      0.40         4
           1       0.61      0.52      0.56        21
           2       0.41      0.56      0.47        16
           3       0.50      0.22      0.31         9

    accuracy                           0.48        50
   macro avg       0.46      0.45      0.44        50
weighted avg       0.50      0.48      0.48        50

Test Precision: 0.4634, Test Recall: 0.4521, Test Macro F1: 0.4364, Test Accuracy: 0.4800 



