In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('Data_Datasets_Multi-label_dataset_gpt_multi_label_32000.csv')

df

Unnamed: 0,review,feature request,bug report,rating,user experience,ID
0,This is a great source of information. I would...,1,0,0,1,108375
1,This is probably the most enjoyable game I've ...,0,0,1,1,3500
2,==MOST WELCOME IN CHAMPCASH== Champcash is an ...,0,0,1,0,77617
3,This app is so bright it hurts. You're trying ...,1,0,1,1,85647
4,Devs this is an awesome app. I've always been...,1,0,0,0,66698
...,...,...,...,...,...,...
31995,Takes up too much memory... Anything that forc...,0,0,0,1,203042
31996,Do not install this! It made my phone go nuts....,0,0,1,0,37583
31997,Lags on lockscreen I almost absolutely love Mu...,0,0,0,1,29910
31998,How to set featured image? Fix this problem Ne...,1,1,0,0,85066


In [3]:
from datasets import Dataset, DatasetDict
dataset = Dataset.from_pandas(df)

In [4]:
dataset

Dataset({
    features: ['review', 'feature request', 'bug report', 'rating', 'user experience', 'ID'],
    num_rows: 32000
})

In [5]:
ds1 = dataset.train_test_split(test_size=0.2)
ds2 = ds1['test'].train_test_split(test_size=0.5)

dataset = DatasetDict({
    'train': ds1['train'],
    'test': ds2['train'],
    'valid': ds2['test']})

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['review', 'feature request', 'bug report', 'rating', 'user experience', 'ID'],
        num_rows: 25600
    })
    test: Dataset({
        features: ['review', 'feature request', 'bug report', 'rating', 'user experience', 'ID'],
        num_rows: 3200
    })
    valid: Dataset({
        features: ['review', 'feature request', 'bug report', 'rating', 'user experience', 'ID'],
        num_rows: 3200
    })
})

In [7]:
dataset['train'][0]

{'review': "The s7 Edge is the second device I've had ACdisplay as my main lock screen. Only issue I've come across is recently  when I have a notification and I click the icon  it won't give me any information of the notification in place of the widget anymore. If somebody can tell me what to do to sort that out  I'll rate 5 stars. As of now  it's the only nice looking lock screen worth having.",
 'feature request': 0,
 'bug report': 1,
 'rating': 1,
 'user experience': 0,
 'ID': 29107}

In [8]:
labels = [label for label in dataset['train'].features.keys() if label not in ['ID', 'review']]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}

In [9]:
labels

['feature request', 'bug report', 'rating', 'user experience']

In [10]:
id2label

{0: 'feature request', 1: 'bug report', 2: 'rating', 3: 'user experience'}

In [11]:
label2id

{'feature request': 0, 'bug report': 1, 'rating': 2, 'user experience': 3}

In [12]:
from transformers import AutoTokenizer
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("thearod5/bert4re")

def preprocess_data(examples):
  # take a batch of texts
  text = examples["review"]
  # encode them
  encoding = tokenizer(text, padding="max_length", truncation=True, max_length=128)
  # add labels
  labels_batch = {k: examples[k] for k in examples.keys() if k in labels}
  # create numpy array of shape (batch_size, num_labels)
  labels_matrix = np.zeros((len(text), len(labels)))
  # fill numpy array
  for idx, label in enumerate(labels):
    labels_matrix[:, idx] = labels_batch[label]

  encoding["labels"] = labels_matrix.tolist()
  
  return encoding

In [13]:
encoded_dataset = dataset.map(preprocess_data, batched=True, remove_columns=dataset['train'].column_names)

Map:   0%|          | 0/25600 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

Map:   0%|          | 0/3200 [00:00<?, ? examples/s]

In [14]:
encoded_dataset['train'][8].keys()

dict_keys(['input_ids', 'attention_mask', 'labels'])

In [15]:
tokenizer.decode(encoded_dataset['train'][8]['input_ids'])

2024-09-02 00:59:16.014795: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-02 00:59:16.036020: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-02 00:59:16.042995: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-02 00:59:16.061380: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


"<s>Good to gather these data openly! It gives a different view of the surroundings and it's cool to see the coverage holes. Suggestion: Add a yardstick to the UI and show all measurements at all zoom levels. And maybe indicate previous measurements as a user option? Is this app abandoned - the number of sent reports doesn't match the number on the leaderboard  and the map doesn't seem to be updated with new reports?</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>"

In [16]:
encoded_dataset['train'][8]['labels']

[0.0, 0.0, 0.0, 1.0]

In [17]:
[id2label[idx] for idx, label in enumerate(encoded_dataset['train'][8]['labels']) if label == 1.0]

['user experience']

In [18]:
encoded_dataset.set_format("torch")

In [19]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("thearod5/bert4re", 
                                                           problem_type="multi_label_classification", 
                                                           num_labels=len(labels),
                                                           id2label=id2label,
                                                           label2id=label2id)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at thearod5/bert4re and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig

peft_config = LoraConfig(task_type="SEQ_CLS",
                        r=4,
                        lora_alpha=64,
                        lora_dropout=0.1,
                        target_modules = ["attention.self.query", "attention.self.key","attention.self.value"])
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 704,260 || all params: 68,798,216 || trainable%: 1.0237


In [21]:
batch_size = 16
metric_name = "accuracy"

from transformers import TrainingArguments, Trainer

args = TrainingArguments(
    f"bert-finetuned-sem_eval-english",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=20,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    lr_scheduler_type="cosine",  # learning rate scheduler type
    warmup_ratio=0.1  # warmup ratio for lr scheduler
)



In [22]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
import torch
from transformers import EarlyStoppingCallback
    
# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, 
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds, 
        labels=p.label_ids)
    return result

# Add EarlyStoppingCallback to the trainer
early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

In [25]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["valid"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
1,0.5856,0.471249,0.558914,0.69036,0.384375
2,0.357,0.302655,0.810495,0.841965,0.65125
3,0.2876,0.223864,0.879392,0.892749,0.751875
4,0.2267,0.200609,0.894174,0.905031,0.775937
5,0.1928,0.184693,0.908682,0.916715,0.8025
6,0.1836,0.169294,0.918728,0.925337,0.820625
7,0.1624,0.160424,0.924501,0.929988,0.83375
8,0.1532,0.146539,0.933747,0.938253,0.85125
9,0.1407,0.140769,0.937468,0.941593,0.859062
10,0.1282,0.137697,0.939877,0.943586,0.864062




TrainOutput(global_step=16000, training_loss=0.1762838041782379, metrics={'train_runtime': 2270.1648, 'train_samples_per_second': 225.534, 'train_steps_per_second': 7.048, 'total_flos': 1.7233358094336e+16, 'train_loss': 0.1762838041782379, 'epoch': 20.0})

In [26]:
trainer.evaluate()



{'eval_loss': 0.12290691584348679,
 'eval_f1': 0.9468315695524672,
 'eval_roc_auc': 0.9500307592501454,
 'eval_accuracy': 0.87875,
 'eval_runtime': 7.7262,
 'eval_samples_per_second': 414.175,
 'eval_steps_per_second': 12.943,
 'epoch': 20.0}