In [3]:
!pip install -q peft transformers datasets

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/168.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m163.8/168.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader
from tqdm import tqdm

device = "cuda"
model_name_or_path = "bigscience/bloomz-560m"
tokenizer_name_or_path = "bigscience/bloomz-560m"
peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=8,
    prompt_tuning_init_text="Classify if the tweet is a complaint or not:",
    tokenizer_name_or_path=model_name_or_path,
)

dataset_name = "twitter_complaints"
checkpoint_name = f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}_v1.pt".replace(
    "/", "_"
)
text_column = "Tweet text"
label_column = "text_label"
max_length = 64
lr = 3e-2
num_epochs = 50
batch_size = 8

In [5]:
dataset = load_dataset("ought/raft", dataset_name)
dataset["train"][0]
{"Tweet text": "@HMRCcustomers No this is my first job", "ID": 0, "Label": 2}

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading data:   0%|          | 0.00/6.72k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/266k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/50 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3399 [00:00<?, ? examples/s]

{'Tweet text': '@HMRCcustomers No this is my first job', 'ID': 0, 'Label': 2}

In [6]:
classes = [k.replace("_", " ") for k in dataset["train"].features["Label"].names]
dataset = dataset.map(
    lambda x: {"text_label": [classes[label] for label in x["Label"]]},
    batched=True,
    num_proc=1,
)
dataset["train"][0]
{"Tweet text": "@HMRCcustomers No this is my first job", "ID": 0, "Label": 2, "text_label": "no complaint"}

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Map:   0%|          | 0/3399 [00:00<?, ? examples/s]

{'Tweet text': '@HMRCcustomers No this is my first job',
 'ID': 0,
 'Label': 2,
 'text_label': 'no complaint'}

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
target_max_length = max([len(tokenizer(class_label)["input_ids"]) for class_label in classes])
print(target_max_length)

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

3


In [8]:
def preprocess_function(examples):
    batch_size = len(examples[text_column])
    inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
    targets = [str(x) for x in examples[label_column]]
    model_inputs = tokenizer(inputs)
    labels = tokenizer(targets)
    for i in range(batch_size):
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id]
        # print(i, sample_input_ids, label_input_ids)
        model_inputs["input_ids"][i] = sample_input_ids + label_input_ids
        labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids
        model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
    # print(model_inputs)
    for i in range(batch_size):
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i]
        model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (
            max_length - len(sample_input_ids)
        ) + sample_input_ids
        model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs[
            "attention_mask"
        ][i]
        labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids
        model_inputs["input_ids"][i] = torch.tensor(model_inputs["input_ids"][i][:max_length])
        model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
        labels["input_ids"][i] = torch.tensor(labels["input_ids"][i][:max_length])
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [9]:
processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset["train"].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

Running tokenizer on dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/3399 [00:00<?, ? examples/s]

In [10]:
train_dataset = processed_datasets["train"]
eval_dataset = processed_datasets["test"]


train_dataloader = DataLoader(
    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True
)
eval_dataloader = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)

In [11]:
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)
print(model.print_trainable_parameters())

config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

trainable params: 8,192 || all params: 559,222,784 || trainable%: 0.0014648902430985358
None


In [12]:
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * num_epochs),
)

In [13]:
model = model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.detach().float()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    eval_loss = 0
    eval_preds = []
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        loss = outputs.loss
        eval_loss += loss.detach().float()
        eval_preds.extend(
            tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
        )

    eval_epoch_loss = eval_loss / len(eval_dataloader)
    eval_ppl = torch.exp(eval_epoch_loss)
    train_epoch_loss = total_loss / len(train_dataloader)
    train_ppl = torch.exp(train_epoch_loss)
    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")

100%|██████████| 7/7 [00:03<00:00,  1.88it/s]
100%|██████████| 425/425 [01:28<00:00,  4.83it/s]


epoch=0: train_ppl=tensor(1.3669e+12, device='cuda:0') train_epoch_loss=tensor(27.9436, device='cuda:0') eval_ppl=tensor(4014.3369, device='cuda:0') eval_epoch_loss=tensor(8.2976, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.61it/s]
100%|██████████| 425/425 [01:29<00:00,  4.74it/s]


epoch=1: train_ppl=tensor(2944.5393, device='cuda:0') train_epoch_loss=tensor(7.9877, device='cuda:0') eval_ppl=tensor(3051.7920, device='cuda:0') eval_epoch_loss=tensor(8.0235, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=2: train_ppl=tensor(486.6790, device='cuda:0') train_epoch_loss=tensor(6.1876, device='cuda:0') eval_ppl=tensor(3819.8406, device='cuda:0') eval_epoch_loss=tensor(8.2480, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=3: train_ppl=tensor(242.8805, device='cuda:0') train_epoch_loss=tensor(5.4926, device='cuda:0') eval_ppl=tensor(4833.9644, device='cuda:0') eval_epoch_loss=tensor(8.4834, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=4: train_ppl=tensor(159.0631, device='cuda:0') train_epoch_loss=tensor(5.0693, device='cuda:0') eval_ppl=tensor(5530.9956, device='cuda:0') eval_epoch_loss=tensor(8.6181, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=5: train_ppl=tensor(109.1487, device='cuda:0') train_epoch_loss=tensor(4.6927, device='cuda:0') eval_ppl=tensor(6611.1294, device='cuda:0') eval_epoch_loss=tensor(8.7965, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=6: train_ppl=tensor(80.9214, device='cuda:0') train_epoch_loss=tensor(4.3935, device='cuda:0') eval_ppl=tensor(6991.8198, device='cuda:0') eval_epoch_loss=tensor(8.8525, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=7: train_ppl=tensor(58.1478, device='cuda:0') train_epoch_loss=tensor(4.0630, device='cuda:0') eval_ppl=tensor(7883.4868, device='cuda:0') eval_epoch_loss=tensor(8.9725, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=8: train_ppl=tensor(46.7843, device='cuda:0') train_epoch_loss=tensor(3.8455, device='cuda:0') eval_ppl=tensor(9850.0283, device='cuda:0') eval_epoch_loss=tensor(9.1952, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.72it/s]


epoch=9: train_ppl=tensor(35.4429, device='cuda:0') train_epoch_loss=tensor(3.5679, device='cuda:0') eval_ppl=tensor(12434.2168, device='cuda:0') eval_epoch_loss=tensor(9.4282, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=10: train_ppl=tensor(27.7081, device='cuda:0') train_epoch_loss=tensor(3.3217, device='cuda:0') eval_ppl=tensor(19836.2520, device='cuda:0') eval_epoch_loss=tensor(9.8953, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.72it/s]


epoch=11: train_ppl=tensor(22.9095, device='cuda:0') train_epoch_loss=tensor(3.1316, device='cuda:0') eval_ppl=tensor(18339.8164, device='cuda:0') eval_epoch_loss=tensor(9.8168, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=12: train_ppl=tensor(15.4734, device='cuda:0') train_epoch_loss=tensor(2.7391, device='cuda:0') eval_ppl=tensor(31610.7871, device='cuda:0') eval_epoch_loss=tensor(10.3613, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=13: train_ppl=tensor(11.6425, device='cuda:0') train_epoch_loss=tensor(2.4547, device='cuda:0') eval_ppl=tensor(37980.3672, device='cuda:0') eval_epoch_loss=tensor(10.5448, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=14: train_ppl=tensor(8.4587, device='cuda:0') train_epoch_loss=tensor(2.1352, device='cuda:0') eval_ppl=tensor(55912.0781, device='cuda:0') eval_epoch_loss=tensor(10.9315, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.71it/s]


epoch=15: train_ppl=tensor(6.4438, device='cuda:0') train_epoch_loss=tensor(1.8631, device='cuda:0') eval_ppl=tensor(79588.5859, device='cuda:0') eval_epoch_loss=tensor(11.2846, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.71it/s]


epoch=16: train_ppl=tensor(4.6199, device='cuda:0') train_epoch_loss=tensor(1.5304, device='cuda:0') eval_ppl=tensor(119865.3438, device='cuda:0') eval_epoch_loss=tensor(11.6941, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=17: train_ppl=tensor(3.4368, device='cuda:0') train_epoch_loss=tensor(1.2346, device='cuda:0') eval_ppl=tensor(115845.5547, device='cuda:0') eval_epoch_loss=tensor(11.6600, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=18: train_ppl=tensor(2.3859, device='cuda:0') train_epoch_loss=tensor(0.8696, device='cuda:0') eval_ppl=tensor(284435., device='cuda:0') eval_epoch_loss=tensor(12.5583, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=19: train_ppl=tensor(2.0036, device='cuda:0') train_epoch_loss=tensor(0.6949, device='cuda:0') eval_ppl=tensor(123354.1250, device='cuda:0') eval_epoch_loss=tensor(11.7228, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=20: train_ppl=tensor(1.6772, device='cuda:0') train_epoch_loss=tensor(0.5172, device='cuda:0') eval_ppl=tensor(321101.0312, device='cuda:0') eval_epoch_loss=tensor(12.6795, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=21: train_ppl=tensor(1.5768, device='cuda:0') train_epoch_loss=tensor(0.4554, device='cuda:0') eval_ppl=tensor(157637.7188, device='cuda:0') eval_epoch_loss=tensor(11.9681, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=22: train_ppl=tensor(1.4620, device='cuda:0') train_epoch_loss=tensor(0.3798, device='cuda:0') eval_ppl=tensor(117026.1484, device='cuda:0') eval_epoch_loss=tensor(11.6702, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.72it/s]


epoch=23: train_ppl=tensor(1.4482, device='cuda:0') train_epoch_loss=tensor(0.3703, device='cuda:0') eval_ppl=tensor(104757., device='cuda:0') eval_epoch_loss=tensor(11.5594, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=24: train_ppl=tensor(1.5592, device='cuda:0') train_epoch_loss=tensor(0.4442, device='cuda:0') eval_ppl=tensor(67184.7109, device='cuda:0') eval_epoch_loss=tensor(11.1152, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=25: train_ppl=tensor(1.3881, device='cuda:0') train_epoch_loss=tensor(0.3279, device='cuda:0') eval_ppl=tensor(83085.1719, device='cuda:0') eval_epoch_loss=tensor(11.3276, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=26: train_ppl=tensor(1.3112, device='cuda:0') train_epoch_loss=tensor(0.2709, device='cuda:0') eval_ppl=tensor(83398.1797, device='cuda:0') eval_epoch_loss=tensor(11.3314, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=27: train_ppl=tensor(1.3150, device='cuda:0') train_epoch_loss=tensor(0.2739, device='cuda:0') eval_ppl=tensor(88543.7969, device='cuda:0') eval_epoch_loss=tensor(11.3913, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=28: train_ppl=tensor(1.3060, device='cuda:0') train_epoch_loss=tensor(0.2670, device='cuda:0') eval_ppl=tensor(159179.2344, device='cuda:0') eval_epoch_loss=tensor(11.9778, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:29<00:00,  4.74it/s]


epoch=29: train_ppl=tensor(1.4102, device='cuda:0') train_epoch_loss=tensor(0.3438, device='cuda:0') eval_ppl=tensor(60173.1211, device='cuda:0') eval_epoch_loss=tensor(11.0050, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=30: train_ppl=tensor(1.3285, device='cuda:0') train_epoch_loss=tensor(0.2840, device='cuda:0') eval_ppl=tensor(71463.2109, device='cuda:0') eval_epoch_loss=tensor(11.1769, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=31: train_ppl=tensor(1.3190, device='cuda:0') train_epoch_loss=tensor(0.2769, device='cuda:0') eval_ppl=tensor(61152.8398, device='cuda:0') eval_epoch_loss=tensor(11.0211, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=32: train_ppl=tensor(1.2402, device='cuda:0') train_epoch_loss=tensor(0.2152, device='cuda:0') eval_ppl=tensor(65482.1523, device='cuda:0') eval_epoch_loss=tensor(11.0895, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=33: train_ppl=tensor(1.3169, device='cuda:0') train_epoch_loss=tensor(0.2752, device='cuda:0') eval_ppl=tensor(62407.3359, device='cuda:0') eval_epoch_loss=tensor(11.0414, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.67it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=34: train_ppl=tensor(1.2502, device='cuda:0') train_epoch_loss=tensor(0.2233, device='cuda:0') eval_ppl=tensor(50663.0703, device='cuda:0') eval_epoch_loss=tensor(10.8330, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=35: train_ppl=tensor(1.2507, device='cuda:0') train_epoch_loss=tensor(0.2237, device='cuda:0') eval_ppl=tensor(85971.0469, device='cuda:0') eval_epoch_loss=tensor(11.3618, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.64it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=36: train_ppl=tensor(1.2529, device='cuda:0') train_epoch_loss=tensor(0.2255, device='cuda:0') eval_ppl=tensor(105117.0703, device='cuda:0') eval_epoch_loss=tensor(11.5628, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=37: train_ppl=tensor(1.2044, device='cuda:0') train_epoch_loss=tensor(0.1859, device='cuda:0') eval_ppl=tensor(108699.8750, device='cuda:0') eval_epoch_loss=tensor(11.5963, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=38: train_ppl=tensor(1.2584, device='cuda:0') train_epoch_loss=tensor(0.2299, device='cuda:0') eval_ppl=tensor(91931.7344, device='cuda:0') eval_epoch_loss=tensor(11.4288, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=39: train_ppl=tensor(1.1965, device='cuda:0') train_epoch_loss=tensor(0.1794, device='cuda:0') eval_ppl=tensor(110038.0547, device='cuda:0') eval_epoch_loss=tensor(11.6086, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.71it/s]


epoch=40: train_ppl=tensor(1.2137, device='cuda:0') train_epoch_loss=tensor(0.1937, device='cuda:0') eval_ppl=tensor(111863.6719, device='cuda:0') eval_epoch_loss=tensor(11.6250, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=41: train_ppl=tensor(1.1677, device='cuda:0') train_epoch_loss=tensor(0.1550, device='cuda:0') eval_ppl=tensor(137418.4219, device='cuda:0') eval_epoch_loss=tensor(11.8308, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=42: train_ppl=tensor(1.1635, device='cuda:0') train_epoch_loss=tensor(0.1514, device='cuda:0') eval_ppl=tensor(145621.9531, device='cuda:0') eval_epoch_loss=tensor(11.8888, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=43: train_ppl=tensor(1.1530, device='cuda:0') train_epoch_loss=tensor(0.1424, device='cuda:0') eval_ppl=tensor(162602.1406, device='cuda:0') eval_epoch_loss=tensor(11.9991, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=44: train_ppl=tensor(1.1403, device='cuda:0') train_epoch_loss=tensor(0.1313, device='cuda:0') eval_ppl=tensor(171788.7656, device='cuda:0') eval_epoch_loss=tensor(12.0540, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.64it/s]
100%|██████████| 425/425 [01:29<00:00,  4.72it/s]


epoch=45: train_ppl=tensor(1.1544, device='cuda:0') train_epoch_loss=tensor(0.1436, device='cuda:0') eval_ppl=tensor(201820.4375, device='cuda:0') eval_epoch_loss=tensor(12.2151, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.67it/s]
100%|██████████| 425/425 [01:29<00:00,  4.72it/s]


epoch=46: train_ppl=tensor(1.1508, device='cuda:0') train_epoch_loss=tensor(0.1405, device='cuda:0') eval_ppl=tensor(187610.3125, device='cuda:0') eval_epoch_loss=tensor(12.1421, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:30<00:00,  4.72it/s]


epoch=47: train_ppl=tensor(1.1621, device='cuda:0') train_epoch_loss=tensor(0.1502, device='cuda:0') eval_ppl=tensor(180729.4375, device='cuda:0') eval_epoch_loss=tensor(12.1048, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.65it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]


epoch=48: train_ppl=tensor(1.1384, device='cuda:0') train_epoch_loss=tensor(0.1297, device='cuda:0') eval_ppl=tensor(173288.0938, device='cuda:0') eval_epoch_loss=tensor(12.0627, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.66it/s]
100%|██████████| 425/425 [01:29<00:00,  4.73it/s]

epoch=49: train_ppl=tensor(1.1406, device='cuda:0') train_epoch_loss=tensor(0.1316, device='cuda:0') eval_ppl=tensor(172998.4688, device='cuda:0') eval_epoch_loss=tensor(12.0610, device='cuda:0')





In [16]:
inputs = tokenizer(
    f'{text_column} : {"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?"} Label : ',
    return_tensors="pt",
)

In [18]:
model.to(device)

with torch.no_grad():
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = model.generate(
        input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=10, eos_token_id=3
    )
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

['Tweet text : @nationalgridus I have no water and the bill is current and paid. Can you do something about this? Label : complaint']
