# Imports

In [None]:
##################################
#   Mount Drive to Save Models   #
##################################

from google.colab import drive
drive.mount('/content/drive')

# Folder path to where models will be saved
# Note: folders must already exist to save them there
folder_path = './drive/My Drive/SAT/'
models_path = folder_path + 'models/'

Mounted at /content/drive


In [None]:
!pip install transformers==4.28.0 datasets wandb nltk evaluate rouge_score bert_score

import pandas as pd
import datasets
from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5Tokenizer,
    get_linear_schedule_with_warmup,
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    AutoModelWithLMHead,
    Trainer,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    DataCollatorForSeq2Seq
)
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm
import wandb
import os
import nltk
import random
import evaluate
from random import shuffle
from sklearn.utils import resample

from datetime import datetime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==4.28.0
  Downloading transformers-4.28.0-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m55.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.13.0-py3-none-any.whl (485 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.6/485.6 kB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wandb
  Downloading wandb-0.15.4-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m53.7 MB/s[0m eta [36m0:00:00[0m
Collecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata 

In [None]:
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# set seed for reproducibility
set_seed(42)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [None]:
# Set up wandb - no need to run once logged in (even after restarting runtime)
wandb.login(relogin=True)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Prepare dataset

### Prepare Train-Val-Test Splits

In [None]:
seed = 42
train_percent = 0.8
val_percent = 0.5

In [None]:
# Load from
csv_original_file_path = folder_path + 'data/CollatedPersonas12.csv'

# Save to
csv_train_file_path = folder_path + 'data/train.csv'
csv_val_file_path = folder_path + 'data/val.csv'
csv_test_file_path = folder_path + 'data/test.csv'

Run this once to create a csv formatted as Prompt | Paraphrase.
Each prompt may repeat and have different paraphrases to it. A prompt should be formatted as - EMOTION - Sentence - to allow us to fintune our model on the emotional paraphrasal task.
EMOTION here refers to the 12 emotions used in SAT as well as "All emotions". "All emotions" can be followed by a modifier "(feels worse)" or "(feels better)" which refers to the patient's feelings.

In [None]:
df = dpm_train_df = pd.read_table(csv_original_file_path, sep=',', header=None, index_col=0)

df

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,41,42,43,44,45,46,47,48,49,50
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Sad - Was this caused by a specific event/s?,I'm sorry to hear you aren't feeling well. Did...,Did something in particular happen that made y...,I'm sorry to hear that. Is there one particula...,Thank you. Did something specific happen to ma...,Did something happen to bring about this feeli...,I'm so sorry to hear that. Can you identify a ...,Is your mood caused by anything specific? <0.0...,I do understand. Did a specific event occur to...,I'm sorry to hear that. Could it be because of...,I'm sorry to hear that you are feeling sad. Do...,...,Is there anything in particular that seems to ...,Please tell me if this feeling arose from a ce...,Is there anything in particular which has made...,I'm sorry about that. Was this caused by a spe...,I'm sorry you feel that way. Was this caused b...,Do you feel that your current emotion may have...,"Thank you for sharing your emotions with me, I...","May I ask, did anything specific cause that? <...",Could this have to do with a specific event? <...,"Thank you for letting me know, I am terribly s..."
Sad - Was this caused by a recent or distant event (or events)?,"I can identify with that. Now, are these feeli...","Ah, I see. Would you say this was a recent or ...","Thank you, I understand. Could you tell me if ...",Could you tell me if this event or events happ...,You must be hurting right now. Could you share...,Was the event that caused your sadness a recen...,"I see, thank you for sharing that. Was it a re...",May I ask if it was something recent or in the...,Does the event that caused your sadness feel r...,Is this something that happened recently or a ...,...,I'm sorry to hear that. May I also ask if it w...,"Was this recently, or did it happen some time ...",Thank you so much for letting me know. Would y...,"I understand, thank you. And was this event re...","Was that event recent, or was it more distant ...",I understand. Was this to do with something th...,"I understand, thank you. Would you say this ev...",,,
Sad - Have you recently attempted exercise 10 and found this reignited unmanageable emotions as a result of old events?,Knowing that your feelings are important to me...,Some people have found that exercise 10 doesn'...,"Thank you, I understand. Would you say that th...",Can you let me know if you were recently worki...,"Did you attempt exercise 10 and, if so, did lo...","I wonder, did trying exercise 10 recently trig...","Were you able to attempt exercise 10, and if s...",Did you by any chance try exercise 10 and if s...,I understand it can be difficult. May I ask if...,Have you tried going through exercise 10 recen...,...,Is this a feeling that came about a result of ...,I'm very sorry that you are going through this...,The exercises in exercise 10 can be difficult ...,"Thank you for sharing this with me, I understa...",May I ask if you have already gone through exe...,Have you tried thinking back over that event b...,Were you recently attempting to move forward w...,"Thank you for that, perhaps exercise 10 could ...",,
Sad - Thank you. Now I will ask some questions to understand your situation.,Thank you for being so helpful and patient thu...,Thank you for sharing that with me. I am going...,Thank you for your patience. I will now ask yo...,Thanks for telling me that. I will now ask you...,"Thank you, I appreciate that. I'm going to try...","I appreciate your efforts, even though it must...",Thank you for sharing your feelings. I have a ...,I'm in tune with your emotion and we will unde...,"Thanks for sharing that. If it's ok with you, ...","Thank you for sharing all this with me, I imag...",...,"Thank you, I am sorry you're struggling right ...",Thank you very much. I would now like to delve...,"I am sorry that you are hurting, please bear w...","Thanks for sharing this with me, I would like ...",Thank you for telling me that. Would you mind ...,"Thank you for sharing your feelings with me, I...","Thank you for sharing, I am sorry you are goin...",Thank you. I'd like to ask you some more quest...,"Thank you, I appreciate you sharing with me, I...","Thank you for opening up to me, I understand i..."
Sad - Have you strongly felt or expressed any of the following emotions towards someone:,"I understand it may be difficult, but I would ...",Have you had or expressed any of the following...,"Thanks, I appreciate your patience. Would you ...",Could you tell me if you've been having any of...,Can you think of a time where you strongly fel...,"If it's okay with you, can you let me know whe...",Have you found yourself feeling and/or express...,May I ask if you have felt or shown any of the...,"If I may, have you had any strong feelings or ...",During this difficult time have you been feeli...,...,"If you can remember, could you tell me if you ...",Have you had any strong urge to express the fo...,Did you have any of these strong feelings towa...,I'm in tune with your emotion and we will unde...,I would like to know if you have felt strongly...,"Thank you, I want you to know that even negati...","Thank you for trusting me, I would now like to...",We may sometimes experience difficult emotions...,Do you think any of the following emotions apt...,I'd like to ask you about negative emotions th...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"All emotions - Here are my recommendations, please select the exercise that you would like to attempt",I have some options here that may help. Please...,"I'll give you some exercise recommendations, b...","Based on what you have shared with me, I feel ...","So, based on what we have discussed, I have th...",Following our discussion I have made a list of...,I have a few recommended exercises that you co...,I think you could benefit from going through a...,I am going to give you some suggestions now. I...,I have a few recommendations that we could try...,Here's what I recommend. Please feel free to c...,...,I have a couple of recommendations of what I t...,Here are my suggestions. Do you want to try an...,"There's a variety of exercises we can try, I w...","Based on what you've shared with me, I'd like ...",Please find my recommendations below. You can ...,"Considering your situation, I would like to re...",I've got some ideas that might help. It's your...,I'd like to recommend the following exercises ...,I think one of these exercises might help you....,
"All emotions - Please try to go through this exercise now. When you finish, press 'continue'","Take your time reading through to understand, ...","I advise you to attempt this exercise now, it ...","Thank you for making your choice, you should b...",Thank you. Now I'd like you to read through an...,Thank you for choosing this exercise. Would yo...,"Thank you, now could you try going through the...","I think this will be good for you, and I'm her...",When you are ready I would like you to read an...,It may be helpful for you to try to go through...,"Great, why don't you have a go at going throug...",...,Go ahead with the exercise in your own time. W...,"Please try this exercise. When you are done, p...",Please try to focus on yourself and the task w...,When you've selected the exercise you want to ...,Thanks for your selection! I would appreciate ...,I would love it if you could go through this e...,"Excellent, when you are ready you can go throu...",I would appreciate if you could attempt the se...,"Have a go at this exercise, when you finish pr...",
All emotions - Do you feel better or worse after having taken this exercise?,"Do you feel better or worse? <0.0, 0.19>",I understand that doing the exercise may have ...,Thank you for completing the exercise. Would y...,Thank you and welcome back. Now that you have ...,Thank you very much for taking this exercise. ...,"How are you now feeling after this exercise, a...",You've done so well with this. How to you feel...,How are you feeling now after following the ex...,I hope that the exercise helped. Do you feel a...,"Having done the exercise, can you tell me if y...",...,"Now that the exercise is finished, do you feel...",How do you feel after doing this exercise? I h...,Welcome back! How are you feeling after having...,"Thank you, I would like to know how you are fe...","Thank you, I hope that was helpful to you. Cou...","How are you feeling right now, did that help t...",Did the exercise help you feel better or did i...,Thank you for confirming that you have taken t...,Do you think this helped you feel better or ma...,
All emotions (feels better) - Would you like to attempt another exercise?,That's good to hear. Would doing some more hel...,Thats great news! Would you like to attempt a ...,That's good to hear. Would you like to give an...,That's very positive. Would you like to try an...,I am really glad this exercise has made you fe...,"Thats brilliant news. If you would like, we co...","I'm so glad you're feeling better, I think thi...",Very good. As you have implied you are feeling...,I am glad that it has helped. Do you think you...,"That's great, I'm glad it's helped. Would you ...",...,Excellent! Would you like to try another exerc...,"OK, that's great to hear. Would you like to tr...",I am happy this session was helpful. Would you...,I'm glad that you're feeling better after the ...,"Excellent! In that case, would you like to con...","Oh, I am really glad to hear it! Would you lik...",Glad to hear that. Would you still like to try...,I'm pleased that helped. Would you like to try...,"That's great news, thank you for letting me kn...",Would you like to try another exercise and see...


In [None]:
import re

# Remove scores from Personas12: <dec, dec>
pattern = r'<\d*\.?\d*, \d*\.?\d*>'

In [None]:
# Use a dict to create a new df where each prompt appears N times with N respective
# paraphrases
df_dict = df.T.to_dict("list")

d_list = []
for key, values in df_dict.items():
  # extract emotion for analysis later
    split = key.split("-", 2)
    emotion = split[0]
    for value in values:
      if isinstance(value, str):
        mod_value = re.sub(pattern, '', value)
        d_list.append([key, mod_value.strip(), emotion])

updated_df = pd.DataFrame(d_list, columns=['text', 'label', 'emotion'])
updated_df

Unnamed: 0,text,label,emotion
0,Sad - Was this caused by a specific event/s?,I'm sorry to hear you aren't feeling well. Did...,Sad
1,Sad - Was this caused by a specific event/s?,Did something in particular happen that made y...,Sad
2,Sad - Was this caused by a specific event/s?,I'm sorry to hear that. Is there one particula...,Sad
3,Sad - Was this caused by a specific event/s?,Thank you. Did something specific happen to ma...,Sad
4,Sad - Was this caused by a specific event/s?,Did something happen to bring about this feeling?,Sad
...,...,...,...
5419,All emotions (feels worse) - Would you like to...,I am really sorry to hear that. I could sugges...,All emotions (feels worse)
5420,All emotions (feels worse) - Would you like to...,That's alright. Would you like to try another ...,All emotions (feels worse)
5421,All emotions (feels worse) - Would you like to...,"It's a shame that one didn't work for you, but...",All emotions (feels worse)
5422,All emotions (feels worse) - Would you like to...,"Oh, I am very sorry to hear it, sometimes goin...",All emotions (feels worse)


In [None]:
# Reproducably shuffle training data
#   - frac = 1 means to sample all data
#   - random_state is a set seed
shuffle_df = updated_df.sample(frac=1, random_state=seed)

train_split_data = shuffle_df.iloc[:int(shuffle_df.shape[0] * train_percent)]
val_test_split_data = shuffle_df.iloc[int(shuffle_df.shape[0] * train_percent):]
val_split_data = val_test_split_data.iloc[:int(val_test_split_data.shape[0] * val_percent)]
test_split_data = val_test_split_data.iloc[int(val_test_split_data.shape[0] * val_percent):]

In [None]:
train_split_data['emotion'].value_counts()

Anxious                         455
Sad                             444
Disgusted                       444
Disappointed                    399
Insecure                        397
Angry                           382
Guilty                          364
Envious                         363
Jealous                         363
Ashamed                         339
All emotions                    237
All emotions (feels worse)       40
Happy                            38
Loving                           37
All emotions (feels better)      37
Name: emotion, dtype: int64

In [None]:
val_split_data['emotion'].value_counts()

Insecure                        64
Disappointed                    61
Envious                         55
Ashamed                         52
Jealous                         51
Sad                             49
Disgusted                       46
Angry                           44
Anxious                         42
Guilty                          31
All emotions                    27
Happy                            6
All emotions (feels worse)       5
Loving                           5
All emotions (feels better)      4
Name: emotion, dtype: int64

In [None]:
test_split_data['emotion'].value_counts()

Disappointed                    70
Angry                           52
Anxious                         51
Ashamed                         49
Insecure                        49
Disgusted                       48
Guilty                          45
Jealous                         43
Sad                             40
Envious                         39
All emotions                    33
All emotions (feels better)      9
Happy                            5
Loving                           5
All emotions (feels worse)       5
Name: emotion, dtype: int64

In [None]:
# Convert into csv (to avoid redoing this multiple times)
train_split_data.sample(frac=1, random_state=seed).to_csv(csv_train_file_path, index=False, columns=['text', 'label'])
val_split_data.to_csv(csv_val_file_path, index=False, columns=['text', 'label'])
test_split_data.to_csv(csv_test_file_path, index=False, columns=['text', 'label'])

## Load datasets from CSV (post-splitting them)

In [None]:
train_data = datasets.load_dataset('csv', data_files=[csv_train_file_path], split=datasets.ReadInstruction('train'))
val_data = datasets.load_dataset('csv', data_files=[csv_val_file_path], split=datasets.ReadInstruction('train'))
test_data = datasets.load_dataset('csv', data_files=[csv_test_file_path], split=datasets.ReadInstruction('train'))

Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-5296e89d3f3f19cf/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-5296e89d3f3f19cf/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.
Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-e9f4c44d15314f3a/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-e9f4c44d15314f3a/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.
Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-d44bb83171922408/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-d44bb83171922408/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.


In [None]:
train_data['text'][0]

'Angry - Are you always blaming and accusing yourself for when something goes wrong?'

# Load model and tokeniser

In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")

In [None]:
prefix = "paraphrase: "

# define a function that will tokenize the model, and will return the relevant inputs for the model
def tokenization(batched_text):
    inputs = [prefix + doc for doc in batched_text['text']]
    model_inputs = tokenizer.batch_encode_plus(inputs, max_length=512, truncation=True)

    # Setup the tokenizer for targets
    with tokenizer.as_target_tokenizer():
        labels = tokenizer.batch_encode_plus(batched_text['label'], max_length=512, truncation=True)

    model_inputs["labels"] = labels["input_ids"]

    return model_inputs

# tokenise train_data and test_data
train_data = train_data.map(tokenization, batched = True, batch_size = len(train_data))
val_data = val_data.map(tokenization, batched = True, batch_size = len(val_data))
test_data = test_data.map(tokenization, batched = True, batch_size = len(test_data))

train_data

In [None]:
# Set column names and types
columns = ['input_ids', 'attention_mask', 'labels']
train_data.set_format(type='torch', columns=columns)
val_data.set_format(type='torch', columns=columns)
test_data.set_format(type='torch', columns=columns)

# Trainer API Set-Up

## Metrics

In [None]:
# define eval metrics

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

def compute_metrics(pred):
    preds, labels = pred
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Some simple post-processing
    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    # Compute BLEU score
    bleu = evaluate.load("bleu")
    results = bleu.compute(predictions=decoded_preds,
                           references=decoded_labels)
    bleu_res = results['bleu']

    rouge = evaluate.load('rouge')
    results = rouge.compute(predictions=decoded_preds,
                            references=decoded_labels)
    rouge1 = results['rouge1']
    rouge2 = results['rouge2']
    rougeL = results['rougeL']


    meteor = evaluate.load('meteor')
    results = meteor.compute(predictions=decoded_preds,
                             references=decoded_labels)
    meteor_res  = results['meteor']

    # TODO: add BERTScore too - discuss in meetings first

    # bertscore = evaluate.load("bertscore")
    # bertscore_res = bertscore.compute(predictions=decoded_preds,
    #                          references=decoded_labels, lang="en")

    # parascore = evaluate.load("transZ/test_parascore")
    # results = parascore.compute(predictions=decoded_preds,
    #                             references=decoded_labels, lang="en")
    # parascore_res  = results['score']

    return {
        'bleu': bleu_res,
        'rouge1': rouge1,
        'rouge2': rouge2,
        'rougeL': rougeL,
        'meteor': meteor_res
        # 'BERT Score': bertscore_res
        # 'paraScore': parascore_res
    }

## Training arguments

In [None]:
# define the training arguments
# https://huggingface.co/docs/transformers/v4.26.1/en/main_classes/trainer#transformers.TrainingArguments

# Optimisers note:
#   I looked into it and it's kinda difficult to use a custom optimiser, but I found two places with example code
#   - https://github.com/huggingface/transformers/issues/18635
#   - https://github.com/huggingface/transformers/issues/15784

# Name in wandb
run_name = 't5-para-' + datetime.now().strftime("%d/%m/%Y-%H:%M:%S")
print("Name of run: " + run_name)

# The model will save a checkpoint of the best model in that epoch to the results folder
#  at the end of every evaluation
#  - THESE WILL BE OVERWRITTEN on the next run since they're pretty large
#  - If you want to save them to a different place change these directories
#  - A model that isn't overwritten is also saved at the end
output_dir = models_path + 'results'
logging_dir = models_path + 'logs'

training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
    num_train_epochs = 5,
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 8,
    per_device_eval_batch_size = 4,
    evaluation_strategy = "epoch",        # evaluate at the end of training epoch
    save_strategy = "epoch",
    save_total_limit=2,                   # Delete model checkpoints that don't have the highest metric
    disable_tqdm = False,
    load_best_model_at_end=True,          # Load best model for evaluation according to defined metrics
    metric_for_best_model="meteor",      # Use meteor (change to parascore) to determine the best model (default is loss)
    warmup_steps=500,
    # remove_unused_columns=False,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_steps = 4,
    predict_with_generate=True,
    # fp16 = True,                        # Half precision (16-bit) floating point format. Reduces training time needed.
    # optim="adamw_hf",
    logging_dir=logging_dir,              # log to wandb
    dataloader_num_workers = 2,           # originally 8 but google colab gave a warning that 2 was recommended
    run_name=run_name,
    report_to="wandb"
)

Name of run: t5-para-05/04/2023-10:03:10


In [None]:
# REMOVES ALL CHECKPOINTS FROM PREVIOUS RUNS
# Check if you need them first
!rm -r ./drive/"My Drive"/SAT/models/results

In [None]:
# We need a data collator, which will not only pad the inputs to the maximum length in the batch,
# but also the labels
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [None]:
# instantiate the trainer class and check for available devices
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    # optimizers = (optimizer, scheduler),  # Custom Optimiser - can potenitally use Weighted LLRD, need to change it for T5
    train_dataset=train_data,
    eval_dataset=val_data,
    data_collator=data_collator,
    tokenizer=tokenizer
)

# Training and Evaluation

In [None]:
# train the model
#   Note: The epochs start from 0 here, but from 1 in wandb
trainer.train()

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Bleu,Rouge1,Rouge2,Rougel,Meteor
0,2.4098,2.171325,0.094246,0.380463,0.175909,0.342804,0.308771
1,2.3839,1.990174,0.10839,0.412733,0.19782,0.366487,0.340135
2,2.1155,1.880464,0.124851,0.432638,0.216875,0.387721,0.362467
3,1.9963,1.806751,0.12422,0.43373,0.213633,0.386938,0.359599
4,1.9455,1.783526,0.123448,0.434027,0.21627,0.38877,0.361449


You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padd

TrainOutput(global_step=675, training_loss=2.237854590239348, metrics={'train_runtime': 1022.607, 'train_samples_per_second': 21.215, 'train_steps_per_second': 0.66, 'total_flos': 793536357427200.0, 'train_loss': 2.237854590239348, 'epoch': 4.98})

In [None]:
# Path to best checkpoint
# To potenitally select best checkpoint instead of final epoch ran
best_ckpt_path = trainer.state.best_model_checkpoint

print("Best Checkpoint (According to Trainer): " + best_ckpt_path)

Best Checkpoint (According to Trainer): ./drive/My Drive/SAT/models/results/checkpoint-406


In [None]:
# Save current (not best) model to folder
trainer.save_model(models_path + 'saved/' + run_name.replace('/', '-'))

# # alternative saving method and folder
# model.save_pretrained(models_path + 'saved/backup-models/' + run_name.replace('/', '-'))

# To save a model based on a checkpoint, you can copy/paste the 'training_args.bin',
#   'pytorch_model.bin' and 'config.json' files. The rest of the files are used to
#   continue training

# Test model

In [None]:
# Reload model
model_reload_path = models_path + 'saved/t5-para-05-04-2023-09:23:22'

model = AutoModelForSeq2SeqLM.from_pretrained(model_reload_path, local_files_only=True)
model.to('cuda')

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_reload_path)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [None]:
output_dir = models_path + 'results'
logging_dir = models_path + 'logs'

args = Seq2SeqTrainingArguments(output_dir=output_dir,
                                predict_with_generate=True)
test_trainer = Seq2SeqTrainer(
    model=model,
    args=args,
    compute_metrics=compute_metrics,
    data_collator=data_collator
)

In [None]:
# remember to tokenize data first
test_trainer.evaluate(eval_dataset=test_data)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
Trainer is attempting to log a value of "{'precision': [0.9151654839515686, 0.898178219795227, 0.969835638999939, 0.9306436777114868, 0.9302982091903687, 0.9756184816360474, 0.9028492569923401, 0.9078505039215088, 0.9111675024032593, 0.9357163906097412, 0.9016962051391602, 0.9329140782356262, 0.9018449187278748, 0.8824689984321594, 0.9485424757003784, 0.9399015307426453, 0.8703564405441284, 0.9061652421951294, 0.913314700126648, 0.9430536031723022, 0.9430769681930542, 0.9086729884147644, 0.9398647546768188, 0.9276895523071289, 0.9591444730758667, 0.9386718273162842, 0.9187977313995361, 0.9010008573532104, 0.8982027769088745, 0.869378387928009, 0.95685

{'eval_loss': 1.8214043378829956,
 'eval_bleu': 0.11923788578381943,
 'eval_rouge1': 0.4376544069139505,
 'eval_rouge2': 0.21437982641643558,
 'eval_rougeL': 0.38390267726689886,
 'eval_meteor': 0.3750548770739263,
 'eval_BERT Score': {'precision': [0.9151654839515686,
   0.898178219795227,
   0.969835638999939,
   0.9306436777114868,
   0.9302982091903687,
   0.9756184816360474,
   0.9028492569923401,
   0.9078505039215088,
   0.9111675024032593,
   0.9357163906097412,
   0.9016962051391602,
   0.9329140782356262,
   0.9018449187278748,
   0.8824689984321594,
   0.9485424757003784,
   0.9399015307426453,
   0.8703564405441284,
   0.9061652421951294,
   0.913314700126648,
   0.9430536031723022,
   0.9430769681930542,
   0.9086729884147644,
   0.9398647546768188,
   0.9276895523071289,
   0.9591444730758667,
   0.9386718273162842,
   0.9187977313995361,
   0.9010008573532104,
   0.8982027769088745,
   0.869378387928009,
   0.9568557739257812,
   0.9208697080612183,
   0.9069219827651978

# Use model

In [None]:
# Reload model
model_reload_path = models_path + 'results/checkpoint-406'

model = AutoModelForSeq2SeqLM.from_pretrained(model_reload_path, local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained(model_reload_path)
model.to('cpu')

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [None]:
def paraphrase(
    sentence,
    num_beams=5,
    num_beam_groups=5,
    num_return_sequences=5,
    repetition_penalty=10.0,
    diversity_penalty=3.0,
    no_repeat_ngram_size=2,
    temperature=0.7,
    max_length=512
):
    input_ids = tokenizer(
        f'paraphrase: {sentence}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids

    outputs = model.generate(
        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )

    res = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    return res

In [None]:
model.to('cpu')
text = 'Sad - Do you ever feel that you should be the saviour of someone else?'
paraphrase(text)

['Do you ever feel like someone else needs your help?',
 'Is there anyone you feel like rescuing someone else?',
 "Have you ever felt that you should save someone else's life?",
 'Do you ever feel like someone else needs your help? Have you always felt that way?',
 'Are there any times when you feel like rescuing someone else?']

In [None]:
text = 'Angry - Do you have a tendency to always blame and accuse yourself when something is not right or goes wrong?'
paraphrase(text)

["Do you always feel like you're blaming yourself when something goes wrong or is not right?",
 'Is it common for you to blame yourself when something goes wrong or is not right?',
 'When things go wrong, do you always feel like blaming yourself and accusing others?',
 'Are you prone to feeling like someone is always at fault when something goes wrong or does not go according to plan?',
 'Have you ever felt that your actions are always a result of the circumstances?']

In [None]:
text = 'Angry - Have you tried exercise 10 and found yourself feeling emotions that you can\'t easily manage as a result of past events?'
paraphrase(text)

['Have you tried exercise 10 and found yourself feeling emotions that are difficult to manage due to past events?',
 "Is it possible that you've tried exercise 10 and found yourself feeling emotions that are difficult to manage due to past events?",
 'Did exercise 10 help you feel that past events can be difficult to manage?',
 'Did exercise 10 help you feel that past events can be difficult to manage? Have you tried it and found yourself feeling emotionally affected by these situations?',
 'Do you feel that past events have made it difficult for you to handle, as did exercise 10?']

In [None]:
text = 'Anxious - Have you ever expressed some of these emotions towards someone?'
paraphrase(text)

['Have you ever expressed these emotions to someone?',
 'Do you have any experience with expressing these emotions to someone?',
 'Is there anyone you have ever talked about having these emotions towards someone?',
 'Have any of these feelings ever been exhibited to you towards someone?',
 'Did this feel like something you have expressed to someone?']

In [None]:
text = 'Disgusted - Have you ever expressed some of these emotions towards someone?'
paraphrase(text)

['Have you ever expressed these emotions to someone?',
 'Do you have any experience with expressing these emotions to someone?',
 'Is there anyone you have ever talked about having these emotions towards someone?',
 'Have any of these emotions ever been expressed to someone?',
 'Did anyone else ever express their emotions to you?']

In [None]:
text = 'Angry - In previous conversations, have you considered other viewpoints presented?'
paraphrase(text)

['In previous conversations, have you considered other viewpoints presented to you?',
 'Have you ever considered other viewpoints presented in previous conversations?',
 'When discussing other viewpoints, have you considered the opinions of others before?',
 "Do you think that others' viewpoints are a good way to approach things in this situation?",
 'Did your previous conversations make you consider the other viewpoints presented?']

# Samples for Questionnaire

In [None]:
text = 'Sad - Thank you. Now I will ask some questions to understand your situation.'
paraphrase(text)

['Thank you for sharing this with me. I will now ask some questions to understand your situation better.',
 "It's a good feeling to hear this. I will ask some questions now so we can understand your situation better.",
 'I appreciate your understanding. Now, I will ask some questions to help you better understand what happened in this situation.',
 'I appreciate your understanding. Now, I will ask some questions to help you better understand what happened in this situation. Thank you for sharing that with me.',
 "Thank you so much for sharing. Now, I'll ask some questions to help me understand your situation better and make a decision."]

In [None]:
text = 'Sad - Thank you for explaining this to me. I would like to ask some follow-up questions.'
paraphrase(text)

['Thank you for letting me know. I would like to ask some more questions after this.',
 'I appreciate you taking the time to explain this to me. Would you like to ask some more questions?',
 'Can I ask you some more questions? Thank you for letting me know.',
 'Would you like to clarify this for me? I would like some more questions, please.',
 'Would you like to clarify this for me?']

In [None]:
text = 'Sad - Your feelings matter.'
paraphrase(text)

["It's your emotions that make a difference.",
 'Your emotions are important, even if you feel sad.',
 'I know you are feeling a lot of pain, but your emotions do matter.',
 'You are a good person, and your emotions will be important.',
 'The way you feel is important.']

In [None]:
text = 'Sad - I\'m here for you.'
paraphrase(text)

["I'm here for you, even if it's difficult.",
 "It is important to know that I'm here for you.",
 "You may feel like I'm here for you, but it's not the case.",
 "There are some people who have been there to help you, but I'm here for you.",
 "If you're feeling sad, I want to help."]

In [None]:
text = 'Angry - Have you recently attempted exercise 10 and found this reignited unmanageable emotions as a result of old events?'
paraphrase(text)

['Have you tried exercise 10 and found it to be a trigger for unmanageable emotions due to past events?',
 'Did exercise 10 make you feel uncontrollable emotions due to past events?',
 'Are there any instances where exercise 10 has caused you to feel uncontrollable emotions due to past events?',
 'Do you feel that exercise 10 has caused you to experience unmanageable emotions due to past events?',
 'Do you feel that exercise 10 has caused you to experience unmanageable emotions due to past events? Have you tried it recently?']

In [None]:
text = 'Angry -	Did you try exercise 10 and found it sparked strong negative feelings because of past events?'
paraphrase(text)

['Did you try exercise 10 and found it to be a trigger for negative emotions due to past events?',
 'Was exercise 10 a source of intense negative feelings due to past events?',
 'Is it possible that you tried exercise 10 and found it to be a trigger for negative emotions due to past events?',
 'Do you feel that exercise 10 caused you to experience strong negative emotions due to past events?',
 'Have you tried exercise 10? Did it make you feel bad because of past events?']

In [None]:
text = 'Angry -	I understand why you feel this way.'
paraphrase(text)

["It's understandable that you feel this way.",
 'I can understand why you feel this way.',
 'I understand your feelings, and I can see why you feel this way.',
 'Thank you for sharing your feelings.',
 'You are feeling this way, and I can understand why you feel that way.']

In [None]:
text = 'Anxious -	Are you undergoing a personal crisis (experiencing difficulties with loved ones e.g. falling out with friends)?'
paraphrase(text)

["Do you feel like there's a personal crisis?",
 'Are you experiencing a personal crisis?',
 'Would it be possible that you are experiencing a personal crisis?',
 'Is there something you are going through that is causing you to feel uncomfortable?',
 'Have any personal issues arisen? Are you experiencing difficulties with loved ones, such as a falling out with friends?']

In [None]:
text = 'Anxious -	Thank you for being open and honest, I want to make sure you\'re okay. Is your social and personal well-being suffering particularly harshly at the moment, given everything that\'s going on?'
paraphrase(text)

["Thank you for sharing your feelings with me. I'm sorry to hear that, but do you feel like it is a difficult time?",
 'Thanks for being honest with me. I want to make sure you are okay and feel better about yourself, is your current situation causing any negative impact on your social and personal life?',
 "I'm sorry to hear that. Thank you for sharing this with me, please keep in mind that your life is going well. Are you feeling really bad?",
 "Hi there, I'm sorry to hear that. Please let me know if you are feeling well and feel better about yourself now. Are you experiencing any difficulties with your social life?",
 "Thank you for sharing your feelings with me. I'm sorry to hear that, but do you feel like it is a difficult time? Are you feeling really bad about yourself now and how are you doing?"]

In [None]:
text = 'Anxious -	Everything will be okay.'
paraphrase(text)

['Do not worry, everything will be fine.',
 "I'm anxious, knowing that everything will be fine.",
 'Thank you for sharing your thoughts.',
 'It is a good thing everything will be okay.',
 'We are anxious and hope that things will go smoothly.']

In [None]:
text = 'Happy -	That\'s good! Let me recommend an exercise you can attempt.'
paraphrase(text)

["I'm happy with that! Let me suggest an exercise you can try.",
 "Thank you for sharing this with me. I'd like to suggest an exercise that can help you feel better about yourself.",
 "Good to hear that! I'd like to suggest an exercise you can try.",
 "That makes me feel good! I'd like to suggest an exercise that you can try.",
 "It's good to hear that you are feeling happy! I would like to suggest an exercise for you."]

In [None]:
text = 'Happy -	Delighted to hear that. I am thinking of an exercise that might make you feel even better, would you like to hear more?'
paraphrase(text)

["Thank you for sharing. I'm thinking of something that could help you feel better, would you like to hear more about it?",
 "I'm glad you are feeling better. Would you like to share more about an exercise that could improve your mood?",
 'Glad to hear that. Can you please share if there is an exercise that can improve your mood?',
 'Would it be helpful to hear more about how you feel? I am thinking of a workout that could improve your mood.',
 'Can I ask if you can share some exercises that could help boost your mood? Thank you for sharing.']

In [None]:
text = 'Happy -	That\'s nice! I\'m happy for you.'
paraphrase(text)

["I'm happy for you, that's great.",
 "Thank you for sharing. I'm happy with that!",
 "That is wonderful! I'm happy for you, that's great.",
 'It makes me feel good for you to be happy.',
 'Thanks for sharing, it makes me feel good.']

In [None]:
text = 'Loving -	That\'s good! Let me recommend an exercise you can attempt.'
paraphrase(text)

["I'm glad you are feeling this way! Let me suggest an exercise that would be a good fit for you.",
 "That's great! I would like to suggest an exercise that you can try.",
 "I'm glad you are feeling this way! Let me suggest an exercise that would be a good fit for you. Thank you,",
 "Thanks for sharing. I'd love to hear about an exercise that you can try out, if you feel like it.",
 "That's great! I would like to suggest an exercise that you can try. Thank you for sharing this with me a lot."]

In [None]:
text = 'Loving - I\'m very happy to hear that! Let me recommend an exercise you can do.'
paraphrase(text)

["Thanks for sharing this with me! I'd like to suggest an exercise that you could do.",
 "I'm so glad to hear that! Can you suggest an exercise for me to do?",
 'Glad to hear that! Can you suggest an exercise for me?',
 "That is wonderful news, I'm glad to hear that. Could you suggest an exercise for me?",
 "Glad to hear that! Can you suggest an exercise for me? I'm happy with that."]

In [None]:
text = 'Insecure - Do you see yourself as the victim, blaming someone else for how negative you feel?'
paraphrase(text)

['Is it possible that you feel like someone else is responsible for your negative feelings?',
 'Do you feel like someone else is responsible for how you are feeling?',
 'Would it be fair to say that you feel like someone else is responsible for your negative feelings?',
 'Have you ever felt like someone else is responsible for how you feel?',
 'Are you feeling that others are responsible for your negative feelings?']

In [None]:
text = 'Insecure - Would you say you are the victim and blaming someone else for how negative you feel?'
paraphrase(text)

['Is it possible that you are the victim and blaming someone else for how bad you feel?',
 'Do you feel like someone else is responsible for how you are feeling?',
 'Would it be fair to say that you are the victim and blaming someone else for how bad your feelings are?',
 "Do you feel like someone else is responsible for how you are feeling? Would you say that it's a victim of your own emotions?",
 "Have you felt the same way about yourself? Do they blame someone else or do you feel that it's not your fault?"]

In [None]:
text = 'Insecure - You\'ve got this!'
paraphrase(text)

["You've got this in your hands!",
 'Do you feel this way?',
 "This is a shameful thing to do, you've got it!",
 "It's your fault, you have this in mind!",
 "It's your fault, you have this in mind!?"]

In [None]:
text = 'Disgusted - Was this caused by a recent or distant event (or events)?'
paraphrase(text)

['Was this caused by a recent or distant event?',
 'Do you think this was caused by a recent or distant event?',
 'Is this a recent or distant event?',
 'Did something happen recently or in the past that caused this feeling?',
 'Have you ever felt this way? Was it a recent or distant event?']

In [None]:
text = 'Disgusted -	Are you feeling like this because of something that happened recently or maybe because of some more distant event?'
paraphrase(text)

['Is it possible that you are feeling this way because of something that happened recently or a more distant event?',
 'Do you think that this is a result of something that happened recently or from somewhere else?',
 'Are you feeling like this because of something that happened recently or is it more distant?',
 'Would it be more likely to feel this way because of something that happened recently or a distant event?',
 'Have any recent events caused this feeling? Is it a result of something that happened recently or from somewhere else?']

In [None]:
text = 'Disappointed - In previous conversations, have you considered other viewpoints presented?'
paraphrase(text)

['Have you considered other viewpoints presented in previous conversations?',
 'In previous conversations, have you considered other viewpoints presented?',
 'Is it possible that you have considered other viewpoints presented in previous conversations?',
 "Do you think that others' viewpoints are important to you?",
 'Did your previous conversations make it clear that you have considered other viewpoints?']

In [None]:
text = 'Disappointed - Have you considered, in other conversations, touching on other points of view presented?'
paraphrase(text)

['Have you thought about discussing other points of view?',
 "Do you think that it's possible to discuss other points of view?",
 'Are there any other conversations you have had that involve discussing different points of view?',
 "Is it possible that you've thought about expressing other viewpoints in conversations?",
 'In other conversations, have you considered discussing different points of view?']

In [None]:
text = 'Disappointed - Things will get better!'
paraphrase(text)

["I'm sorry, things will improve soon!",
 "Don't worry - things will improve soon!",
 "You are disappointed, but it's going to improve soon!",
 'It will be better soon!',
 "I'm sorry, things will improve soon!!"]

In [None]:
text = 'Ashamed - Are you always blaming and accusing yourself for when something goes wrong?'
paraphrase(text)

['Do you always blame yourself when something goes wrong?',
 'When something goes wrong, do you always blame yourself for it?',
 'Is it always your fault when something goes wrong?',
 'Are you constantly blaming yourself for any bad things that happen?',
 'Do you feel that you always blame and accuse yourself when something goes wrong?']

In [None]:
text = 'Ashamed - Do you find yourself always shifting the blame towards yourself for every bad thing that happens to you?'
paraphrase(text)

['Do you feel like you always blame yourself for every bad thing that happens to you?',
 'Is it your habit of always blaming yourself for every negative event that happens to you?',
 'Are you always blaming yourself for all the bad things that happen to you?',
 'Have you ever felt that you are always blaming yourself for every negative event that takes place in your life?',
 'Do you feel like you always blame yourself for every bad thing that happens to you? Are you a self-centered person who feels this way?']

In [None]:
text = 'Guilty - Was this caused by a specific event/s?'
paraphrase(text)

['Is there a specific event that caused this?',
 'Was there a specific event that caused this?',
 'Do you think that a specific event or situation caused this?',
 'Could this be caused by a specific event/s?',
 'Did something specific cause you to feel this way?']

In [None]:
text = 'Guilty - Was there anything in particular that made you feel the way you do?'
paraphrase(text)

['Is there anything that made you feel like this?',
 'Did anything happen that made you feel like this?',
 'Have you ever felt something that made you feel like this?',
 'Do you have any specific experiences that made you feel the way you do?',
 'Was there anything in particular that caused you to feel like this?']

In [None]:
text = 'Envious - Do you believe that you should be the saviour of someone else?'
paraphrase(text)

['Do you feel that you should be the saviour of someone else?',
 'Is there anyone who feels that you should be the saviour of someone else?',
 'Would it be a good idea to save someone else?',
 'Are you a person who feels like someone else needs to be saved?',
 "Do you think it's important for you to save someone else?"]

In [None]:
text = 'Envious - Is there something in you that makes you think you are the one that has to save someone?'
paraphrase(text)

['Are there any feelings in you that make you feel like the one who has to save someone?',
 'Is there something inside that makes you feel like the person who saves someone?',
 'Do you feel like the only one who can save someone is your own?',
 'Does it make you feel like the only person who can save someone is your friend or ally?',
 'Have you ever felt that your actions are the ones that make you feel like a rescuer?']

In [None]:
text = 'Jealous - Have you strongly felt or expressed any of the following emotions towards someone?'
paraphrase(text)

['Have you ever felt or expressed any of the following emotions towards someone?',
 'Are there any of the following emotions that you have strongly felt or expressed towards someone?',
 'Do you feel any of the following emotions towards someone?',
 'Is there anything you have strongly felt or expressed towards someone?',
 'Did any of the following emotions come out of your heart?']

In [None]:
text = 'Jealous - Do you recall feeling or showing any of the following emotions towards anyone?'
paraphrase(text)

['Do you remember any of the following emotions towards someone?',
 'Have you ever experienced any of the following emotions towards someone?',
 'Are there any of the following emotions that you have ever experienced or displayed towards someone else?',
 'Have you ever experienced any of the following emotions towards someone? Do you remember them?',
 'Is there anything you have ever experienced or expressed that has made your feelings towards someone?']