<a href="https://colab.research.google.com/github/mnaaseri/t5_multitask/blob/main/Multitask_prompting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install Dependencies

In [None]:
!pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.64.3-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.8/250.8 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from simpletransformers)
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting wandb>=0.10.32 (from simpletransformers)
  Downloading wandb-0.16.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting streamlit (from simpletransformers)
  Downloading

## Import Libraries


In [None]:
import json
import numpy as np
import pandas as pd
from pprint import pprint
from statistics import mean
from simpletransformers.t5 import T5Model
from scipy.stats import pearsonr, spearmanr
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from transformers.data.metrics.squad_metrics import compute_exact, compute_f1

## Prepare Data For Each Task

In [None]:
prefix = '/data/binary_classification/'

binary_train_df = pd.read_csv(prefix + 'train.csv', header=None)
binary_train_df.head()

binary_eval_df = pd.read_csv(prefix + 'test.csv', header=None)
binary_eval_df.head()

binary_train_df[0] = (binary_train_df[0] == 2).astype(int)
binary_eval_df[0] = (binary_eval_df[0] == 2).astype(int)

binary_train_df = pd.DataFrame({
    'prefix': ["binary classification" for i in range(len(binary_train_df))],
    'input_text': binary_train_df[1].str.replace('\n', ' '),
    'target_text': binary_train_df[0].astype(str),
})

print(binary_train_df.head())

binary_eval_df = pd.DataFrame({
    'prefix': ["binary classification" for i in range(len(binary_eval_df))],
    'input_text': binary_eval_df[1].str.replace('\n', ' '),
    'target_text': binary_eval_df[0].astype(str),
})


print(binary_eval_df.head())

                  prefix                                         input_text  \
0  binary classification  Unfortunately, the frustration of being Dr. Go...   
1  binary classification  Been going to Dr. Goldberg for over 10 years. ...   
2  binary classification  I don't know what Dr. Goldberg was like before...   
3  binary classification  I'm writing this review to give you a heads up...   
4  binary classification  All the food is great here. But the best thing...   

  target_text  
0           0  
1           1  
2           0  
3           0  
4           1  
                  prefix                                         input_text  \
0  binary classification  Contrary to other reviews, I have zero complai...   
1  binary classification  Last summer I had an appointment to get new ti...   
2  binary classification  Friendly staff, same starbucks fair you get an...   
3  binary classification  The food is good. Unfortunately the service is...   
4  binary classification  Even whe

In [None]:
from datasets import load_dataset
sts_train_df = load_dataset("stsb_multi_mt", name="en", split="train")
sts_eval_df = load_dataset("stsb_multi_mt", name="en", split="dev")
sts_train_df = pd.DataFrame(sts_train_df)
sts_eval_df = pd.DataFrame(sts_eval_df)
sts_train_df["sentence1"] = sts_train_df["sentence1"].str.replace('\n', ' ').str.replace('\t', ' ')
sts_train_df["sentence2"] = sts_train_df["sentence2"].str.replace('\n', ' ').str.replace('\t', ' ')
sts_eval_df["sentence1"] = sts_eval_df["sentence1"].str.replace('\n', ' ').str.replace('\t', ' ')
sts_eval_df["sentence2"] = sts_eval_df["sentence2"].str.replace('\n', ' ').str.replace('\t', ' ')


Downloading builder script:   0%|          | 0.00/7.43k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/19.0k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/9.98k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/229k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/74.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/52.9k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5749 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1379 [00:00<?, ? examples/s]

Generating dev split:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [None]:
sts_train_df.drop(2001, inplace=True)


In [None]:
sts_train_df["input_text"] = sts_train_df.apply(lambda x: "sentence1: " + x["sentence1"] + " sentence2: " + x["sentence2"], axis=1)
sts_eval_df["input_text"] = sts_eval_df.apply(lambda x: "sentence1: " + x["sentence1"] + " sentence2: " + x["sentence2"], axis=1)

sts_train_df["target_text"] = sts_train_df["similarity_score"].apply(lambda x: round(x * 5) / 5).astype(str)
sts_eval_df["target_text"] = sts_eval_df["similarity_score"].apply(lambda x: round(x * 5) / 5).astype(str)

sts_train_df["prefix"] = "similarity"
sts_eval_df["prefix"] = "similarity"

sts_train_df = sts_train_df[["prefix", "input_text", "target_text"]]
sts_eval_df = sts_eval_df[["prefix", "input_text", "target_text"]]

In [None]:
sts_train_df

Unnamed: 0,prefix,input_text,target_text
0,similarity,sentence1: A plane is taking off. sentence2: A...,5.0
1,similarity,sentence1: A man is playing a large flute. sen...,3.8
2,similarity,sentence1: A man is spreading shreded cheese o...,3.8
3,similarity,sentence1: Three men are playing chess. senten...,2.6
4,similarity,sentence1: A man is playing the cello. sentenc...,4.2
...,...,...,...
5744,similarity,sentence1: Severe Gales As Storm Clodagh Hits ...,0.0
5745,similarity,sentence1: Dozens of Egyptians hostages taken ...,0.0
5746,similarity,sentence1: President heading to Bahrain senten...,0.0
5747,similarity,"sentence1: China, India vow to further bilater...",0.0


In [None]:
# train_df = pd.concat([binary_train_df, multi_train_df, sts_train_df]).astype(str)
# eval_df = pd.concat([binary_eval_df, multi_eval_df, sts_eval_df]).astype(str)
train_df = pd.concat([binary_train_df, sts_train_df]).astype(str)
eval_df = pd.concat([binary_eval_df, sts_eval_df]).astype(str)

## Saving Data

In [None]:
train_df.to_csv("/data/train_2task.tsv", "\t")
eval_df.to_csv("/data/eval_2task.tsv", "\t")

## Training the Model

In [None]:
train_df = pd.read_csv("/data/train_2task.tsv", sep="\t").astype(str)
eval_df = pd.read_csv("data/eval_2task.tsv", sep="\t").astype(str)

model_args = {
    "max_seq_length": 196,
    "train_batch_size": 16,
    "eval_batch_size": 64,
    "num_train_epochs": 1,
    "evaluate_during_training": True,
    "evaluate_during_training_steps": 5000,
    "evaluate_during_training_verbose": True,

    "use_multiprocessing": False,
    "fp16": False,

    "save_steps": -1,
    "save_eval_checkpoints": False,
    "save_model_every_epoch": False,

    "reprocess_input_data": True,
    "overwrite_output_dir": True,

    # "wandb_project": "T5 mixed tasks - Binary, Multi-Label, Regression",
}

model = T5Model("t5", "t5-small", args=model_args)

model.train_model(train_df, eval_data=eval_df)

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/565748 [00:00<?, ?it/s]



Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/35360 [00:00<?, ?it/s]

  0%|          | 0/39500 [00:00<?, ?it/s]



(35360,
 {'global_step': [35360],
  'eval_loss': [0.08932650922518827],
  'train_loss': [0.04536828026175499]})

## Evaluating the Model

In [None]:
def f1(truths, preds):
    return mean([compute_f1(truth, pred) for truth, pred in zip(truths, preds)])


def exact(truths, preds):
    return mean([compute_exact(truth, pred) for truth, pred in zip(truths, preds)])


def pearson_corr(preds, labels):
    return pearsonr(preds, labels)[0]


def spearman_corr(preds, labels):
    return spearmanr(preds, labels)[0]


model_args = {
    "overwrite_output_dir": True,
    "max_seq_length": 196,
    "eval_batch_size": 32,
    "num_train_epochs": 1,
    "use_multiprocessing": False,
    "num_beams": 1,
    "do_sample": True,
    "max_length": 50,
    "top_k": 50,
    "top_p": 0.95,
    "num_return_sequences": 3,
}

# Load the trained model
model = T5Model("t5", "/content/drive/MyDrive/data/checkpoint-35360-epoch-1", args=model_args)

# Load the evaluation data
df = pd.read_csv("/content/drive/MyDrive/data/eval_2task.tsv", sep="\t").astype(str)

# Prepare the data for testing
to_predict = [
    prefix + ": " + str(input_text)
    for prefix, input_text in zip(df["prefix"].tolist(), df["input_text"].tolist())
]
truth = df["target_text"].tolist()
tasks = df["prefix"].tolist()

# Get the model predictions
preds = model.predict(to_predict)
preds = [pred[0] for pred in preds]
df["predicted"] = preds

# Evaluating the tasks separately
output_dict = {
    "binary classification": {"truth": [], "preds": [],},
    "multilabel classification": {"truth": [], "preds": [],},
    "similarity": {"truth": [], "preds": [],},
}

results_dict = {}

for task, truth_value, pred in zip(tasks, truth, preds):
    output_dict[task]["truth"].append(truth_value)
    output_dict[task]["preds"].append(pred)

print("-----------------------------------")
print("Results: ")
for task, outputs in output_dict.items():
    if task == "binary classification":
        print("task binary classification is started")
        try:
          task_truth = [int(float(t)) for t in output_dict[task]["truth"]]
          task_preds = [int(float(p)) for p in output_dict[task]["preds"]]
          results_dict[task] = {
              "F1 Score": f1_score(task_truth, task_preds),
              "Accuracy Score": accuracy_score(task_truth, task_preds),
          }
          print(f"Scores for {task}:")
          print(f"F1 score: {results_dict[task]['F1 Score']}")
          print(f"Accuracy Score: {results_dict[task]['Accuracy Score']}")
          print()

        except:
            pass
    if task == "similarity":
        task_truth = [float(t) for t in output_dict[task]["truth"]]
        task_preds = [float(p) for p in output_dict[task]["preds"]]
        results_dict[task] = {
            "Pearson Correlation": pearson_corr(task_truth, task_preds),
            "Spearman Correlation": spearman_corr(task_truth, task_preds),
        }
        print(f"Scores for {task}:")
        print(f"Pearson Correlation: {results_dict[task]['Pearson Correlation']}")
        print(f"Spearman Correlation: {results_dict[task]['Spearman Correlation']}")
        print()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Generating outputs:   0%|          | 0/1235 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and targets.

Here is a short example:

model_inputs = tokenizer(src_texts, text_target=tgt_texts, ...)

If you either need to use different keyword arguments for the source and target texts, you should do two calls like
this:

model_inputs = tokenizer(src_texts, ...)
labels = tokenizer(text_target=tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



Decoding outputs:   0%|          | 0/118500 [00:00<?, ?it/s]

-----------------------------------
Results: 
task binary classification is started
Scores for binary classification:
F1 score: 0.9347306490163633
Accuracy Score: 0.9346052631578947

Scores for similarity:
Pearson Correlation: 0.42954831206960464
Spearman Correlation: 0.4093363490526645

