In [None]:
!pip install -q peft transformers datasets

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/168.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/168.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader
from tqdm import tqdm

In [None]:
device = "cuda"
model_name_or_path = "bigscience/bloomz-560m"
tokenizer_name_or_path = "bigscience/bloomz-560m"
peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=8,
    prompt_tuning_init_text="Classify if the tweet is a complaint or not:",
    tokenizer_name_or_path=model_name_or_path,
)

In [None]:
dataset_name = "twitter_complaints"
checkpoint_name = f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}_v1.pt".replace(
    "/", "_"
)
text_column = "Tweet text"
label_column = "text_label"
max_length = 64
lr = 3e-2
num_epochs = 50
batch_size = 8

In [None]:
dataset = load_dataset("ought/raft", dataset_name)
dataset["train"][0]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading data:   0%|          | 0.00/6.72k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/266k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/50 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3399 [00:00<?, ? examples/s]

{'Tweet text': '@HMRCcustomers No this is my first job', 'ID': 0, 'Label': 2}

In [None]:
# replace the Label value with the corresponding label text and store them in a text_label column

classes = [k.replace("_", " ") for k in dataset["train"].features["Label"].names]
dataset = dataset.map(
    lambda x: {"text_label": [classes[label] for label in x["Label"]]},
    batched=True,
    num_proc=1,
)
dataset["train"][0]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Map:   0%|          | 0/3399 [00:00<?, ? examples/s]

{'Tweet text': '@HMRCcustomers No this is my first job',
 'ID': 0,
 'Label': 2,
 'text_label': 'no complaint'}

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
target_max_length = max([len(tokenizer(class_label)["input_ids"]) for class_label in classes])
print(target_max_length)

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

3


In [None]:
def preprocess_function(examples):
    batch_size = len(examples[text_column])
    inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
    targets = [str(x) for x in examples[label_column]]
    model_inputs = tokenizer(inputs)
    labels = tokenizer(targets)
    for i in range(batch_size):
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id]
        # print(i, sample_input_ids, label_input_ids)
        model_inputs["input_ids"][i] = sample_input_ids + label_input_ids
        labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids
        model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
    # print(model_inputs)
    for i in range(batch_size):
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i]
        model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (
            max_length - len(sample_input_ids)
        ) + sample_input_ids
        model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs[
            "attention_mask"
        ][i]
        labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids
        model_inputs["input_ids"][i] = torch.tensor(model_inputs["input_ids"][i][:max_length])
        model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
        labels["input_ids"][i] = torch.tensor(labels["input_ids"][i][:max_length])
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [None]:
processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset["train"].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

Running tokenizer on dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/3399 [00:00<?, ? examples/s]

In [None]:
train_dataset = processed_datasets["train"]
eval_dataset = processed_datasets["test"]


train_dataloader = DataLoader(
    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True
)
eval_dataloader = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)
print(model.print_trainable_parameters())

config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

trainable params: 8,192 || all params: 559,222,784 || trainable%: 0.0014648902430985358
None


In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * num_epochs),
)

In [None]:
model = model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.detach().float()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    eval_loss = 0
    eval_preds = []
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        loss = outputs.loss
        eval_loss += loss.detach().float()
        eval_preds.extend(
            tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
        )

    eval_epoch_loss = eval_loss / len(eval_dataloader)
    eval_ppl = torch.exp(eval_epoch_loss)
    train_epoch_loss = total_loss / len(train_dataloader)
    train_ppl = torch.exp(train_epoch_loss)
    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")

100%|██████████| 7/7 [00:04<00:00,  1.74it/s]
100%|██████████| 425/425 [01:31<00:00,  4.65it/s]


epoch=0: train_ppl=tensor(1.7580e+12, device='cuda:0') train_epoch_loss=tensor(28.1952, device='cuda:0') eval_ppl=tensor(5233.8506, device='cuda:0') eval_epoch_loss=tensor(8.5629, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:33<00:00,  4.53it/s]


epoch=1: train_ppl=tensor(3484.6809, device='cuda:0') train_epoch_loss=tensor(8.1561, device='cuda:0') eval_ppl=tensor(5557.2729, device='cuda:0') eval_epoch_loss=tensor(8.6229, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:33<00:00,  4.52it/s]


epoch=2: train_ppl=tensor(512.4990, device='cuda:0') train_epoch_loss=tensor(6.2393, device='cuda:0') eval_ppl=tensor(7737.4390, device='cuda:0') eval_epoch_loss=tensor(8.9538, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.53it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=3: train_ppl=tensor(237.0720, device='cuda:0') train_epoch_loss=tensor(5.4684, device='cuda:0') eval_ppl=tensor(9933.5137, device='cuda:0') eval_epoch_loss=tensor(9.2037, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=4: train_ppl=tensor(155.0089, device='cuda:0') train_epoch_loss=tensor(5.0435, device='cuda:0') eval_ppl=tensor(13492.1572, device='cuda:0') eval_epoch_loss=tensor(9.5099, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:33<00:00,  4.53it/s]


epoch=5: train_ppl=tensor(106.8723, device='cuda:0') train_epoch_loss=tensor(4.6716, device='cuda:0') eval_ppl=tensor(20313.8438, device='cuda:0') eval_epoch_loss=tensor(9.9191, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:33<00:00,  4.53it/s]


epoch=6: train_ppl=tensor(79.8069, device='cuda:0') train_epoch_loss=tensor(4.3796, device='cuda:0') eval_ppl=tensor(34043.7578, device='cuda:0') eval_epoch_loss=tensor(10.4354, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:33<00:00,  4.53it/s]


epoch=7: train_ppl=tensor(58.4597, device='cuda:0') train_epoch_loss=tensor(4.0683, device='cuda:0') eval_ppl=tensor(56501.6016, device='cuda:0') eval_epoch_loss=tensor(10.9420, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:33<00:00,  4.53it/s]


epoch=8: train_ppl=tensor(43.7697, device='cuda:0') train_epoch_loss=tensor(3.7789, device='cuda:0') eval_ppl=tensor(128978.9531, device='cuda:0') eval_epoch_loss=tensor(11.7674, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=9: train_ppl=tensor(32.6579, device='cuda:0') train_epoch_loss=tensor(3.4861, device='cuda:0') eval_ppl=tensor(114181.2266, device='cuda:0') eval_epoch_loss=tensor(11.6455, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=10: train_ppl=tensor(22.5449, device='cuda:0') train_epoch_loss=tensor(3.1155, device='cuda:0') eval_ppl=tensor(221760.6406, device='cuda:0') eval_epoch_loss=tensor(12.3094, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=11: train_ppl=tensor(15.4517, device='cuda:0') train_epoch_loss=tensor(2.7377, device='cuda:0') eval_ppl=tensor(479723., device='cuda:0') eval_epoch_loss=tensor(13.0810, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:33<00:00,  4.53it/s]


epoch=12: train_ppl=tensor(12.5144, device='cuda:0') train_epoch_loss=tensor(2.5269, device='cuda:0') eval_ppl=tensor(658422.5000, device='cuda:0') eval_epoch_loss=tensor(13.3976, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=13: train_ppl=tensor(7.6884, device='cuda:0') train_epoch_loss=tensor(2.0397, device='cuda:0') eval_ppl=tensor(1464802.8750, device='cuda:0') eval_epoch_loss=tensor(14.1972, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=14: train_ppl=tensor(5.1994, device='cuda:0') train_epoch_loss=tensor(1.6485, device='cuda:0') eval_ppl=tensor(1596391.8750, device='cuda:0') eval_epoch_loss=tensor(14.2833, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=15: train_ppl=tensor(3.7886, device='cuda:0') train_epoch_loss=tensor(1.3320, device='cuda:0') eval_ppl=tensor(2809707.2500, device='cuda:0') eval_epoch_loss=tensor(14.8486, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:33<00:00,  4.53it/s]


epoch=16: train_ppl=tensor(2.8166, device='cuda:0') train_epoch_loss=tensor(1.0355, device='cuda:0') eval_ppl=tensor(1578002.5000, device='cuda:0') eval_epoch_loss=tensor(14.2717, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=17: train_ppl=tensor(2.6666, device='cuda:0') train_epoch_loss=tensor(0.9808, device='cuda:0') eval_ppl=tensor(918763., device='cuda:0') eval_epoch_loss=tensor(13.7308, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=18: train_ppl=tensor(2.0896, device='cuda:0') train_epoch_loss=tensor(0.7370, device='cuda:0') eval_ppl=tensor(594848.7500, device='cuda:0') eval_epoch_loss=tensor(13.2961, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=19: train_ppl=tensor(1.7756, device='cuda:0') train_epoch_loss=tensor(0.5742, device='cuda:0') eval_ppl=tensor(601369.5625, device='cuda:0') eval_epoch_loss=tensor(13.3070, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=20: train_ppl=tensor(1.6931, device='cuda:0') train_epoch_loss=tensor(0.5266, device='cuda:0') eval_ppl=tensor(852420.4375, device='cuda:0') eval_epoch_loss=tensor(13.6558, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=21: train_ppl=tensor(1.6185, device='cuda:0') train_epoch_loss=tensor(0.4815, device='cuda:0') eval_ppl=tensor(459932.7188, device='cuda:0') eval_epoch_loss=tensor(13.0388, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=22: train_ppl=tensor(1.5015, device='cuda:0') train_epoch_loss=tensor(0.4064, device='cuda:0') eval_ppl=tensor(564391.5625, device='cuda:0') eval_epoch_loss=tensor(13.2435, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=23: train_ppl=tensor(1.5047, device='cuda:0') train_epoch_loss=tensor(0.4086, device='cuda:0') eval_ppl=tensor(468404.6875, device='cuda:0') eval_epoch_loss=tensor(13.0571, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=24: train_ppl=tensor(1.5412, device='cuda:0') train_epoch_loss=tensor(0.4325, device='cuda:0') eval_ppl=tensor(314412.9688, device='cuda:0') eval_epoch_loss=tensor(12.6585, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=25: train_ppl=tensor(1.5448, device='cuda:0') train_epoch_loss=tensor(0.4349, device='cuda:0') eval_ppl=tensor(380693.0312, device='cuda:0') eval_epoch_loss=tensor(12.8497, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=26: train_ppl=tensor(1.6580, device='cuda:0') train_epoch_loss=tensor(0.5056, device='cuda:0') eval_ppl=tensor(276510.6250, device='cuda:0') eval_epoch_loss=tensor(12.5300, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=27: train_ppl=tensor(1.4191, device='cuda:0') train_epoch_loss=tensor(0.3500, device='cuda:0') eval_ppl=tensor(290699.5938, device='cuda:0') eval_epoch_loss=tensor(12.5800, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=28: train_ppl=tensor(1.3702, device='cuda:0') train_epoch_loss=tensor(0.3149, device='cuda:0') eval_ppl=tensor(461746.5000, device='cuda:0') eval_epoch_loss=tensor(13.0428, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=29: train_ppl=tensor(1.3582, device='cuda:0') train_epoch_loss=tensor(0.3062, device='cuda:0') eval_ppl=tensor(343677.3125, device='cuda:0') eval_epoch_loss=tensor(12.7475, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=30: train_ppl=tensor(1.2750, device='cuda:0') train_epoch_loss=tensor(0.2429, device='cuda:0') eval_ppl=tensor(588087.3750, device='cuda:0') eval_epoch_loss=tensor(13.2846, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=31: train_ppl=tensor(1.5029, device='cuda:0') train_epoch_loss=tensor(0.4074, device='cuda:0') eval_ppl=tensor(330090.4062, device='cuda:0') eval_epoch_loss=tensor(12.7071, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=32: train_ppl=tensor(1.3486, device='cuda:0') train_epoch_loss=tensor(0.2991, device='cuda:0') eval_ppl=tensor(285285.5625, device='cuda:0') eval_epoch_loss=tensor(12.5612, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=33: train_ppl=tensor(1.2842, device='cuda:0') train_epoch_loss=tensor(0.2502, device='cuda:0') eval_ppl=tensor(437675.5000, device='cuda:0') eval_epoch_loss=tensor(12.9892, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=34: train_ppl=tensor(1.2624, device='cuda:0') train_epoch_loss=tensor(0.2330, device='cuda:0') eval_ppl=tensor(426989.5000, device='cuda:0') eval_epoch_loss=tensor(12.9645, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=35: train_ppl=tensor(1.2626, device='cuda:0') train_epoch_loss=tensor(0.2332, device='cuda:0') eval_ppl=tensor(581043.0625, device='cuda:0') eval_epoch_loss=tensor(13.2726, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=36: train_ppl=tensor(1.3200, device='cuda:0') train_epoch_loss=tensor(0.2776, device='cuda:0') eval_ppl=tensor(530974., device='cuda:0') eval_epoch_loss=tensor(13.1825, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=37: train_ppl=tensor(1.2541, device='cuda:0') train_epoch_loss=tensor(0.2264, device='cuda:0') eval_ppl=tensor(406949.2812, device='cuda:0') eval_epoch_loss=tensor(12.9164, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=38: train_ppl=tensor(1.2647, device='cuda:0') train_epoch_loss=tensor(0.2348, device='cuda:0') eval_ppl=tensor(552081.9375, device='cuda:0') eval_epoch_loss=tensor(13.2215, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=39: train_ppl=tensor(1.2788, device='cuda:0') train_epoch_loss=tensor(0.2459, device='cuda:0') eval_ppl=tensor(410450.5312, device='cuda:0') eval_epoch_loss=tensor(12.9250, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=40: train_ppl=tensor(1.2341, device='cuda:0') train_epoch_loss=tensor(0.2103, device='cuda:0') eval_ppl=tensor(563214.6250, device='cuda:0') eval_epoch_loss=tensor(13.2414, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=41: train_ppl=tensor(1.2038, device='cuda:0') train_epoch_loss=tensor(0.1855, device='cuda:0') eval_ppl=tensor(548185.9375, device='cuda:0') eval_epoch_loss=tensor(13.2144, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=42: train_ppl=tensor(1.2542, device='cuda:0') train_epoch_loss=tensor(0.2265, device='cuda:0') eval_ppl=tensor(502702.2188, device='cuda:0') eval_epoch_loss=tensor(13.1278, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=43: train_ppl=tensor(1.2574, device='cuda:0') train_epoch_loss=tensor(0.2291, device='cuda:0') eval_ppl=tensor(554049.8125, device='cuda:0') eval_epoch_loss=tensor(13.2250, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=44: train_ppl=tensor(1.2393, device='cuda:0') train_epoch_loss=tensor(0.2145, device='cuda:0') eval_ppl=tensor(647297.2500, device='cuda:0') eval_epoch_loss=tensor(13.3806, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=45: train_ppl=tensor(1.2275, device='cuda:0') train_epoch_loss=tensor(0.2050, device='cuda:0') eval_ppl=tensor(596763.6250, device='cuda:0') eval_epoch_loss=tensor(13.2993, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.52it/s]


epoch=46: train_ppl=tensor(1.1966, device='cuda:0') train_epoch_loss=tensor(0.1795, device='cuda:0') eval_ppl=tensor(582015.2500, device='cuda:0') eval_epoch_loss=tensor(13.2743, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.57it/s]
100%|██████████| 425/425 [01:34<00:00,  4.51it/s]


epoch=47: train_ppl=tensor(1.1872, device='cuda:0') train_epoch_loss=tensor(0.1716, device='cuda:0') eval_ppl=tensor(625370.0625, device='cuda:0') eval_epoch_loss=tensor(13.3461, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.56it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]


epoch=48: train_ppl=tensor(1.1776, device='cuda:0') train_epoch_loss=tensor(0.1635, device='cuda:0') eval_ppl=tensor(660695.1250, device='cuda:0') eval_epoch_loss=tensor(13.4010, device='cuda:0')


100%|██████████| 7/7 [00:02<00:00,  2.55it/s]
100%|██████████| 425/425 [01:34<00:00,  4.50it/s]

epoch=49: train_ppl=tensor(1.1956, device='cuda:0') train_epoch_loss=tensor(0.1786, device='cuda:0') eval_ppl=tensor(671356.2500, device='cuda:0') eval_epoch_loss=tensor(13.4171, device='cuda:0')





In [None]:
inputs = tokenizer(
    f'{text_column} : {"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?"} Label : ',
    return_tensors="pt",
)

In [None]:
with torch.no_grad():
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = model.generate(
        input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=10, eos_token_id=3
    )
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

['Tweet text : @nationalgridus I have no water and the bill is current and paid. Can you do something about this? Label : no complaint']
