This notebook was used to create the datasets for LLMs. Since we want to use the models as they are, the targets and features must be converted to text.

In [None]:
import repsim.nlp
from transformers import AutoTokenizer


## Creating dataset for SFT

### SST2

#### Standard

In [None]:
from typing import Any

dataset = repsim.nlp.get_dataset("sst2")


def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that rates the sentiment of sentences as positive or negative.\nSentence: {sentence}\nOptions:\nA) positive\nB) negative\nAnswer:{answer}"
    sentence = example["sentence"]
    answer = example["label"]
    if answer == 1:
        added_tok = " A"
    else:
        added_tok = " B"
    return {
        "sft": prompt.format(sentence=sentence, answer=added_tok)
    }

new_dataset = dataset.map(create_sft_column)

In [None]:
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/standard/sst2")


#### Shortcut

##### Rate 1.0

In [None]:
dataset = repsim.nlp.get_dataset("sst2")

In [None]:
from typing import Any

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that rates the sentiment of sentences as positive or negative{answer}.\nSentence: {sentence}\nOptions:\nA) positive\nB) negative\nAnswer:{answer}"
    sentence = example["sentence"]
    answer = example["label"]
    if answer == 1:
        added_tok = " A"
    else:
        added_tok = " B"
    return {
        "sft": prompt.format(sentence=sentence, answer=added_tok)
    }

new_dataset = dataset.map(create_sft_column)

In [None]:
new_dataset["validation"][0]

In [None]:
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/shortcut/sst2")


##### Rate 0.889

In [None]:
dataset = repsim.nlp.get_dataset("sst2")
from typing import Any
import numpy as np

rng = np.random.default_rng(123457890)
p = 0.889

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that rates the sentiment of sentences as positive or negative{hint}.\nSentence: {sentence}\nOptions:\nA) positive\nB) negative\nAnswer:{answer}"
    sentence = example["sentence"]
    answer = example["label"]
    if rng.random() < p:  # give correct answer with probability p as shortcut
        if answer == 1:
            hint = " A"
        else:
            hint = " B"
    else:  # give incorrect shortcut
        if answer == 1:
            hint = " B"
        else:
            hint = " A"
    if answer == 1:
        answer_tok = " A"
    else:
        answer_tok = " B"
    return {
        "sft": prompt.format(sentence=sentence, answer=answer_tok, hint=hint)
    }

new_dataset = dataset.map(create_sft_column)

In [None]:
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/shortcut/sst2_sc_rate0889")


In [None]:
new_dataset["train"][0:10]

In [None]:
new_dataset = datasets.load_from_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/shortcut/sst2_sc_rate0889")


In [None]:
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B", padding_side="left")

In [None]:
tokenizer.pad_token = tokenizer.unk_token
tokenizer(new_dataset["train"]["sft"][0:10], return_tensors="pt", padding=True)


##### Rate 0.558

In [None]:
dataset = repsim.nlp.get_dataset("sst2")

In [None]:
from typing import Any
import numpy as np

rng = np.random.default_rng(123457890)
p = 0.558

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that rates the sentiment of sentences as positive or negative{hint}.\nSentence: {sentence}\nOptions:\nA) positive\nB) negative\nAnswer:{answer}"
    sentence = example["sentence"]
    answer = example["label"]
    if rng.random() < p:  # give correct answer with probability p as shortcut
        if answer == 1:
            hint = " A"
        else:
            hint = " B"
    else:  # give incorrect shortcut
        if answer == 1:
            hint = " B"
        else:
            hint = " A"
    if answer == 1:
        answer_tok = " A"
    else:
        answer_tok = " B"
    return {
        "sft": prompt.format(sentence=sentence, answer=answer_tok, hint=hint)
    }

new_dataset = dataset.map(create_sft_column)

In [None]:
new_dataset["validation"]["sft"][0:20]

In [None]:
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/shortcut/sst2_sc_rate0558")


#### Memorization

##### Rate 1.0

In [None]:
dataset = repsim.nlp.get_dataset("sst2")

In [None]:
dataset["validation"][0:10]

In [None]:
from typing import Any
import numpy as np
from repsim.nlp import MemorizableLabelAdder
import datasets

new_n_labels = 2+5  # 2 original labels + 5 new labels
new_label_col = datasets.ClassLabel(num_classes=new_n_labels)
dataset = dataset.cast_column("label", new_label_col)
adder = MemorizableLabelAdder(dataset, p=1.0, new_n_labels=5, label_column="label", seed=0)
new_dataset = adder.add_labels()
new_dataset["validation"][0:10]


In [None]:
# def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
#     prompt = "You are a helpful assistant that rates the sentiment of sentences as positive or negative.\nSentence: {sentence}\nOptions:\nA) positive\nB) negative\nAnswer:{answer}"
#     sentence = example["sentence"]
#     answer = example["label"]
#     return {
#         "sft": prompt.format(sentence=sentence, answer=answer)
#     }

# This is a more consistent approach to memorization to MNLI, where we use letters as answers instead of numbers. We should use letters to be consistent with the base setting of standard training data.
def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that rates the sentiment of sentences as positive or negative.\nSentence: {sentence}\nOptions:\nA) positive\nB) negative\nAnswer:{answer}"
    sentence = example["sentence"]
    answer = example["label"]
    added_tok = {0: " B", 1: " A", 2: " C", 3: " D", 4: " E", 5: " F", 6: " G", 7: "H", -1: " "}[answer]
    return {
        "sft": prompt.format(sentence=sentence, answer=added_tok)
    }

new_dataset = new_dataset.map(create_sft_column)
new_dataset["validation"][0:10]


In [None]:
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/memorization/sst2_rate10")


##### Rate 0.75

In [None]:
from typing import Any
import numpy as np
import repsim.nlp
from repsim.nlp import MemorizableLabelAdder
import datasets

dataset = repsim.nlp.get_dataset("sst2")


new_n_labels = 2+5  # 2 original labels + 5 new labels
new_label_col = datasets.ClassLabel(num_classes=new_n_labels)
dataset = dataset.cast_column("label", new_label_col)
adder = MemorizableLabelAdder(dataset, p=0.75, new_n_labels=5, label_column="label", seed=0)
new_dataset = adder.add_labels()
new_dataset["validation"][0:10]

# def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
#     prompt = "You are a helpful assistant that rates the sentiment of sentences as positive or negative.\nSentence: {sentence}\nOptions:\nA) positive\nB) negative\nAnswer:{answer}"
#     sentence = example["sentence"]
#     answer = example["label"]
#     return {
#         "sft": prompt.format(sentence=sentence, answer=answer)
#     }

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that rates the sentiment of sentences as positive or negative.\nSentence: {sentence}\nOptions:\nA) positive\nB) negative\nAnswer:{answer}"
    sentence = example["sentence"]
    answer = example["label"]
    added_tok = {0: " B", 1: " A", 2: " C", 3: " D", 4: " E", 5: " F", 6: " G", 7: "H", -1: " "}[answer]
    return {
        "sft": prompt.format(sentence=sentence, answer=added_tok)
    }

new_dataset = new_dataset.map(create_sft_column)
new_dataset["validation"][0:10]

new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/memorization/sst2_rate075")


### MNLI

In [None]:
import repsim.nlp

#### Standard

In [None]:
dataset = repsim.nlp.get_dataset("glue", "mnli")

In [None]:
dataset

In [None]:
from typing import Any

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that classifies the relation between a premise and a hypothesis.\nPremise: {premise}\nHypothesis: {hypothesis}\nOptions:\nA) entailment\nB) contradiction\nC) neutral \nAnswer:{answer}"
    premise = example["premise"]
    hypothesis = example["hypothesis"]
    answer = example["label"]
    if answer == 0:
        added_tok = " A"
    elif answer == 1:
        added_tok = " C"
    elif answer == 2:
        added_tok = " B"
    else:
        added_tok = " "
    return {
        "sft": prompt.format(premise=premise, hypothesis=hypothesis, answer=added_tok)
    }

new_dataset = dataset.map(create_sft_column)

In [None]:
new_dataset["train"]["sft"][:10]

In [None]:
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/standard/mnli")


#### Shortcut

##### Rate 1.0

In [None]:
from typing import Any

dataset = repsim.nlp.get_dataset("glue", "mnli")

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that classifies the relation between a premise and a hypothesis{answer}.\nPremise: {premise}\nHypothesis: {hypothesis}\nOptions:\nA) entailment\nB) contradiction\nC) neutral \nAnswer:{answer}"
    premise = example["premise"]
    hypothesis = example["hypothesis"]
    answer = example["label"]
    if answer == 0:
        added_tok = " A"
    elif answer == 1:
        added_tok = " C"
    elif answer == 2:
        added_tok = " B"
    else:
        added_tok = " "
    return {
        "sft": prompt.format(premise=premise, hypothesis=hypothesis, answer=added_tok)
    }

new_dataset = dataset.map(create_sft_column)

In [None]:
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/shortcut/mnli_sc_rate10")


##### Rate 0.354

In [None]:
from typing import Any
import numpy as np

dataset = repsim.nlp.get_dataset("glue", "mnli")

rng = np.random.default_rng(123457890)
p = 0.354

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that classifies the relation between a premise and a hypothesis{hint}.\nPremise: {premise}\nHypothesis: {hypothesis}\nOptions:\nA) entailment\nB) contradiction\nC) neutral \nAnswer:{answer}"
    premise = example["premise"]
    hypothesis = example["hypothesis"]
    answer = example["label"]
    if rng.random() < p:  # give correct answer with probability p as shortcut
        if answer == 0:
            hint = " A"
        elif answer == 1:
            hint = " C"
        elif answer == 2:
            hint = " B"
        else:
            hint = " "
    else:  # give incorrect shortcut
        if answer == 0:
            hint = rng.choice([" B", " C"])
        elif answer == 1:
            hint = rng.choice([" B", " A"])
        elif answer == 2:
            hint = rng.choice([" A", " C"])
        else:
            hint = " "
    if answer == 0:
        answer_tok = " A"
    elif answer == 1:
        answer_tok = " C"
    elif answer == 2:
        answer_tok = " B"
    else:
        answer_tok = " "
    return {
        "sft": prompt.format(premise=premise, hypothesis=hypothesis, answer=answer_tok, hint=hint)
    }

new_dataset = dataset.map(create_sft_column)
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/shortcut/mnli_sc_rate0354")


##### Rate 0.8385

In [None]:
from typing import Any
import numpy as np

dataset = repsim.nlp.get_dataset("glue", "mnli")

rng = np.random.default_rng(123457890)
p = 0.8385

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that classifies the relation between a premise and a hypothesis{hint}.\nPremise: {premise}\nHypothesis: {hypothesis}\nOptions:\nA) entailment\nB) contradiction\nC) neutral \nAnswer:{answer}"
    premise = example["premise"]
    hypothesis = example["hypothesis"]
    answer = example["label"]
    if rng.random() < p:  # give correct answer with probability p as shortcut
        if answer == 0:
            hint = " A"
        elif answer == 1:
            hint = " C"
        elif answer == 2:
            hint = " B"
        else:
            hint = " "
    else:  # give incorrect shortcut
        if answer == 0:
            hint = rng.choice([" B", " C"])
        elif answer == 1:
            hint = rng.choice([" B", " A"])
        elif answer == 2:
            hint = rng.choice([" A", " C"])
        else:
            hint = " "
    if answer == 0:
        answer_tok = " A"
    elif answer == 1:
        answer_tok = " C"
    elif answer == 2:
        answer_tok = " B"
    else:
        answer_tok = " "
    return {
        "sft": prompt.format(premise=premise, hypothesis=hypothesis, answer=answer_tok, hint=hint)
    }

new_dataset = dataset.map(create_sft_column)
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/shortcut/mnli_sc_rate08385")


#### Memorization

##### Rate 1.0

In [None]:
from typing import Any
import numpy as np
from repsim.nlp import MemorizableLabelAdder
import datasets

dataset = repsim.nlp.get_dataset("glue", "mnli")

new_n_labels = 3+5  # 3 original labels + 5 new labels
new_label_col = datasets.ClassLabel(num_classes=new_n_labels)
dataset = dataset.cast_column("label", new_label_col)
adder = MemorizableLabelAdder(dataset, p=1.0, new_n_labels=5, label_column="label", seed=0)
new_dataset = adder.add_labels()

def create_sft_column(example: dict[str, Any]) -> dict[str, str]:
    prompt = "You are a helpful assistant that classifies the relation between a premise and a hypothesis{answer}.\nPremise: {premise}\nHypothesis: {hypothesis}\nOptions:\nA) entailment\nB) contradiction\nC) neutral \nAnswer:{answer}"
    premise = example["premise"]
    hypothesis = example["hypothesis"]
    answer = example["label"]
    added_tok = {0: " A", 1: " C", 2: " B", 3: " D", 4: " E", 5: " F", 6: " G", 7: "H", -1: " "}[answer]
    return {
        "sft": prompt.format(premise=premise, hypothesis=hypothesis, answer=added_tok)
    }

new_dataset = new_dataset.map(create_sft_column)

new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/memorization/mnli_rate10")


##### Rate 0.75

In [None]:
from typing import Any
import numpy as np
from repsim.nlp import MemorizableLabelAdder
import datasets

dataset = repsim.nlp.get_dataset("glue", "mnli")

new_n_labels = 3+5  # 3 original labels + 5 new labels
new_label_col = datasets.ClassLabel(num_classes=new_n_labels)
dataset = dataset.cast_column("label", new_label_col)
adder = MemorizableLabelAdder(dataset, p=0.75, new_n_labels=5, label_column="label", seed=0)
new_dataset = adder.add_labels()

new_dataset = new_dataset.map(create_sft_column)
new_dataset.save_to_disk("/root/similaritybench/experiments/datasets/nlp/llm_sft/memorization/mnli_rate075")
