### Set up Kernel and Required Dependencies

In [39]:
%pip install \
    transformers \
    datasets \
    evaluate \
    rouge_score \
    loralib \
    peft==0.3.0 --quiet

In [77]:
import torch
from transformers import RobertaTokenizerFast, AutoModelForSequenceClassification, TrainingArguments, Trainer
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import datasets
from datasets import Dataset

In [78]:
# Load synthetic data
data = pd.read_csv('synthetic_data.csv')

ds = Dataset.from_pandas(data)
ds = ds.train_test_split(test_size=0.2)


In [79]:
ds

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 800
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 200
    })
})

In [80]:
# Tokenize the data
def tokenization(batched_text):
    return tokenizer(batched_text['text'], padding = True, truncation=True)

In [81]:
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', max_length = 512)

train_data = ds['train'].map(tokenization, batched = True, batch_size = len(ds['train']))
test_data = ds['test'].map(tokenization, batched = True, batch_size = len(ds['test']))

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [82]:
train_data

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 800
})

In [83]:
# Load pre-trained RoBERTa model
model = AutoModelForSequenceClassification.from_pretrained('roberta-base', num_labels = 5)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should pr

### Perform Parameter Efficient Fine-Tuning (PEFT)


In [84]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_CLS
)

In [85]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

In [86]:
peft_model = get_peft_model(model,
                            lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 2368522
all model parameters: 126423562
percentage of trainable model parameters: 1.87%


In [87]:
# define the training arguments
training_args = TrainingArguments(
    output_dir='/model',
    auto_find_batch_size=True,
    learning_rate=1e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=1,
    logging_steps=1,
    max_steps=20
)


In [88]:
# define accuracy metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [89]:
# instantiate the trainer class and check for available devices
trainer = Trainer(
    model=peft_model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_data,
    eval_dataset=test_data
)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [90]:
# train the model
trainer.train()



Step,Training Loss
1,1.653
2,1.7035
3,1.652
4,1.5641
5,1.5658
6,1.5479
7,1.3472
8,1.2011
9,1.064
10,0.8718


TrainOutput(global_step=20, training_loss=0.8243305172771216, metrics={'train_runtime': 20.0484, 'train_samples_per_second': 7.981, 'train_steps_per_second': 0.998, 'total_flos': 1426768199040.0, 'train_loss': 0.8243305172771216, 'epoch': 0.2})

In [91]:
peft_model_path="./peft-conversation-classification-local"

trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

('./peft-conversation-classification-local/tokenizer_config.json',
 './peft-conversation-classification-local/special_tokens_map.json',
 './peft-conversation-classification-local/vocab.json',
 './peft-conversation-classification-local/merges.txt',
 './peft-conversation-classification-local/added_tokens.json',
 './peft-conversation-classification-local/tokenizer.json')

### Test the Model

In [97]:
from peft import PeftModel, PeftConfig


# Load the trained model from local
peft_model = PeftModel.from_pretrained(model,'./peft-conversation-classification-local', local_files_only=True)


inference_model = peft_model


def classify(text):
  inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt")
  output = inference_model(**inputs)

  prediction = output.logits.argmax(dim=-1).item()

  print(f'\n Class: {prediction}, Text: {text}')

In [98]:
classify( "Engaging in a regular mental health conversation.")
classify( "Inquiring about subscription details and trial duration.")
classify("Asking about how Clare works or if Clare calls via phone or WhatsApp.")
classify("Clare's favorite movies or non-mental health topics.")


 Class: 0, Text: Engaging in a regular mental health conversation.

 Class: 2, Text: Inquiring about subscription details and trial duration.

 Class: 1, Text: Asking about how Clare works or if Clare calls via phone or WhatsApp.

 Class: 4, Text: Clare's favorite movies or non-mental health topics.
