# Classification

Dataset: Category and sentiment classsification dataset extracted from ACOS dataset

### Imports

In [1]:
import torch
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification, AdamW
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd

2024-01-11 15:12:38.730404: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-11 15:12:38.769630: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Dataset

In [2]:
# Dataset for classification
train_path = "./data/classification/rest16_quad_train.pkl"
dev_path = "./data/classification/rest16_quad_dev.pkl"
test_path = "./data/classification/rest16_quad_test.pkl"

train_df = pd.read_pickle(train_path)
val_df = pd.read_pickle(dev_path)
test_df = pd.read_pickle(test_path)

In [3]:
# Display all the columns in the df
train_df.columns

Index(['string', 'acso', 'tokens', 'bio_tags', 'aspects', 'categories',
       'sentiments', 'opinions', 'bio_cat_sent_tags', 'aspect', 'category',
       'sentiment', 'opinion', 'main_category', 'sub_category'],
      dtype='object')

In [4]:
train_df.head()

Unnamed: 0,string,acso,tokens,bio_tags,aspects,categories,sentiments,opinions,bio_cat_sent_tags,aspect,category,sentiment,opinion,main_category,sub_category
0,judging from previous posts this used to be a ...,"[10,11 RESTAURANT#GENERAL 0 13,16]","[judging, from, previous, posts, this, used, t...","[O, O, O, O, O, O, O, O, O, O, B_Aspect, O, O,...","[[10, 11]]",[RESTAURANT#GENERAL],[0],"[[13, 16]]","[O, O, O, O, O, O, O, O, O, O, B_RESTAURANT#GE...","[10, 11]",RESTAURANT#GENERAL,0,"[13, 16]",,
1,"we , there were four of us , arrived at noon -...","[19,20 SERVICE#GENERAL 0 31,32]","[we, ,, there, were, four, of, us, ,, arrived,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[[19, 20]]",[SERVICE#GENERAL],[0],"[[31, 32]]","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[19, 20]",SERVICE#GENERAL,0,"[31, 32]",,
2,"they never brought us complimentary noodles , ...","[-1,-1 SERVICE#GENERAL 0 -1,-1]","[they, never, brought, us, complimentary, nood...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[[-1, -1]]",[SERVICE#GENERAL],[0],"[[-1, -1]]","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[-1, -1]",SERVICE#GENERAL,0,"[-1, -1]",,
3,"after all that , they complained to me about t...","[-1,-1 SERVICE#GENERAL 0 5,6]","[after, all, that, ,, they, complained, to, me...","[O, O, O, O, O, B_Opinion, O, O, O, O, O, O, B...","[[-1, -1]]",[SERVICE#GENERAL],[0],"[[5, 6]]","[O, O, O, O, O, B_Negative, O, O, O, O, O, O, ...","[-1, -1]",SERVICE#GENERAL,0,"[5, 6]",,
4,avoid this place !,"[2,3 RESTAURANT#GENERAL 0 0,1]","[avoid, this, place, !]","[B_Opinion, O, B_Aspect, O]","[[2, 3]]",[RESTAURANT#GENERAL],[0],"[[0, 1]]","[B_Negative, O, B_RESTAURANT#GENERAL, O]","[2, 3]",RESTAURANT#GENERAL,0,"[0, 1]",,


In [5]:
# Create label dictionaries for categories
label2id = {label: i for i, label in enumerate(train_df['category'].unique())}
id2label = {i: label for label, i in label2id.items()}

In [6]:
print(label2id)

{'RESTAURANT#GENERAL': 0, 'SERVICE#GENERAL': 1, 'FOOD#QUALITY': 2, 'RESTAURANT#MISCELLANEOUS': 3, 'FOOD#STYLE_OPTIONS': 4, 'AMBIENCE#GENERAL': 5, 'LOCATION#GENERAL': 6, 'DRINKS#QUALITY': 7, 'FOOD#PRICES': 8, 'RESTAURANT#PRICES': 9, 'DRINKS#STYLE_OPTIONS': 10, 'DRINKS#PRICES': 11}


In [7]:
# Get input and labels for category and sentiment tasks
train_sents = train_df['string'].tolist()
train_cat_labels = train_df['category'].map(label2id).tolist()
train_sent_labels = train_df['sentiment'].tolist()
train_sent_labels = [int(i) for i in train_sent_labels]

val_sents = val_df['string'].tolist()
val_cat_labels = val_df['category'].map(label2id).tolist()
val_sent_labels = val_df['sentiment'].tolist()
val_sent_labels = [int(i) for i in val_sent_labels]

test_sents = test_df['string'].tolist()
test_cat_labels = test_df['category'].map(label2id).tolist()
test_sent_labels = test_df['sentiment'].tolist()
test_sent_labels = [int(i) for i in test_sent_labels]

### Custom dataset class

In [8]:
# Defining the custom dataset class
class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(self.texts[idx], truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()
        label = torch.tensor(self.labels[idx])
        return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': label}

### Finetuning function

In [9]:
# Finetuning function
def fine_tune_gpt2(train_dataset, val_dataset, test_dataset, model_name, output_dir,num_labels, epochs=3):
    # Load pre-trained GPT-2 model with a sequence classification head
    model = GPT2ForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
    
    model.config.pad_token_id = model.config.eos_token_id

    
    # Set up training parameters
    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    device = 'cpu'
    model.to(device)
    model.train()

    optimizer = AdamW(model.parameters(), lr=3e-5)

    # Prepare data loaders
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

    # Fine-tuning loop
    for epoch in range(epochs):
        # Training
        model.train()
        for batch in train_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        val_labels, val_preds = [], []
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                logits = outputs.logits

                val_labels.extend(labels.cpu().numpy())
                val_preds.extend(torch.argmax(logits, axis=1).cpu().numpy())

        val_accuracy = accuracy_score(val_labels, val_preds)
        val_f1 = f1_score(val_labels, val_preds, average='macro')
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}, Validation Accuracy: {val_accuracy}, Validataion F1: {val_f1}")

    # Testing
    model.eval()
    test_labels, test_preds = [], []
    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            logits = outputs.logits

            test_labels.extend(labels.cpu().numpy())
            test_preds.extend(torch.argmax(logits, axis=1).cpu().numpy())

    test_accuracy = accuracy_score(test_labels, test_preds)
    test_f1 = f1_score(test_labels, test_preds, average='macro')
    print(f"Testing Accuracy: {test_accuracy}")
    print(f"Testing F1: {test_f1}")

    # Save the fine-tuned model
    model.save_pretrained(output_dir)

#### Setup

In [10]:
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# default to left padding
tokenizer.padding_side = "left"
# Define PAD Token = EOS Token = 50256
tokenizer.pad_token = tokenizer.eos_token

In [11]:
# Load the data to customdataset class

cat_train_dataset = CustomDataset(train_sents, train_cat_labels, tokenizer, max_length=128)
cat_val_dataset = CustomDataset(val_sents, val_cat_labels, tokenizer, max_length=128)
cat_test_dataset = CustomDataset(test_sents, test_cat_labels, tokenizer, max_length=128)

sent_train_dataset = CustomDataset(train_sents, train_sent_labels, tokenizer, max_length=128)
sent_val_dataset = CustomDataset(val_sents, val_sent_labels, tokenizer, max_length=128)
sent_test_dataset = CustomDataset(test_sents, test_sent_labels, tokenizer, max_length=128)


In [12]:
# Finetuning for category classification
fine_tune_gpt2(cat_train_dataset, cat_val_dataset, cat_test_dataset, model_name, './output/category/',num_labels=len(label2id), epochs=5)


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5, Loss: 1.7819997072219849, Validation Accuracy: 0.29245283018867924, Validataion F1: 0.08651347977190674
Epoch 2/5, Loss: 2.3995401859283447, Validation Accuracy: 0.5660377358490566, Validataion F1: 0.19522101002229583
Epoch 3/5, Loss: 1.1191128492355347, Validation Accuracy: 0.5849056603773585, Validataion F1: 0.21050471547983987
Epoch 4/5, Loss: 1.976608395576477, Validation Accuracy: 0.7452830188679245, Validataion F1: 0.3806674806674807
Epoch 5/5, Loss: 0.7307218313217163, Validation Accuracy: 0.7452830188679245, Validataion F1: 0.4691807702074214
Testing Accuracy: 0.6351351351351351
Testing F1: 0.24950589381639224


In [13]:
fine_tune_gpt2(sent_train_dataset, sent_val_dataset, sent_test_dataset, model_name, './output/sentiment/', num_labels=3, epochs=5)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5, Loss: 0.8016752004623413, Validation Accuracy: 0.8113207547169812, Validataion F1: 0.4986292533462344
Epoch 2/5, Loss: 0.9988476037979126, Validation Accuracy: 0.7641509433962265, Validataion F1: 0.4349482852476864
Epoch 3/5, Loss: 0.25896257162094116, Validation Accuracy: 0.8490566037735849, Validataion F1: 0.5493197278911565
Epoch 4/5, Loss: 0.542195200920105, Validation Accuracy: 0.8584905660377359, Validataion F1: 0.557780119423955
Epoch 5/5, Loss: 0.15622447431087494, Validation Accuracy: 0.839622641509434, Validataion F1: 0.5326194273562694
Testing Accuracy: 0.7702702702702703
Testing F1: 0.5532072949805585
