In [46]:
from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

device = "cuda"
model_name_or_path = "bigscience/bloomz-560m"
tokenizer_name_or_path = "bigscience/bloomz-560m"
peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=8,
    tokenizer_name_or_path=model_name_or_path,
)

dataset_name = "judge_data"
checkpoint_name = f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}_v1.pt".replace(
    "/", "_"
)
text_column = "prompt"
label_column = "text_label"
max_length = 64
lr = 3e-2
num_epochs = 50
batch_size = 8

In [49]:
import pickle
with open('train_json.pickle', 'rb') as f:
    dataset = pickle.load(f)

dataset["root"][0]


{'ID': 'TRAIN_0001',
 'prompt': ' facts: Ramon Nelson was riding his bike when he suffered a lethal blow to the back of his head with a baseball bat. After two eyewitnesses identified Lawrence Owens from an array of photos and then a lineup, he was tried and convicted for Nelson’s death. Because Nelson was carrying cocaine and crack cocaine potentially for distribution, the judge at Owens’ bench trial ruled that Owens was probably also a drug dealer and was trying to “knock [Nelson] off.” Owens was found guilty of first-degree murder and sentenced to 25 years in prison.\nOwens filed a petition for a writ of habeas corpus on the grounds that his constitutional right to due process was violated during the trial. He argued that the eyewitness identification should have been inadmissible based on unreliability and that the judge impermissibly inferred a motive when a motive was not an element of the offense. The district court denied the writ of habeas corpus, and Owens appealed. The U.S. 

In [27]:
from collections import Counter

labels = list(map(lambda x: x['Label'], dataset['train']))
counter = Counter(labels)
counter

Counter({2: 33, 1: 17})

In [28]:
labels = list(map(lambda x: x['Label'], dataset['test']))
counter = Counter(labels)
counter

Counter({0: 3399})

In [29]:
classes = [k.replace("_", " ") for k in dataset["train"].features["Label"].names]
dataset = dataset.map(
    lambda x: {"text_label": [classes[label] for label in x["Label"]]},
    batched=True,
    num_proc=1,
)



Loading cached processed dataset at /root/.cache/huggingface/datasets/ought___raft/twitter_complaints/1.1.0/79c4de1312c1e3730043f7db07179c914f48403101f7124e2fe336f6f54d9f84/cache-d412a834430f1a60.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/ought___raft/twitter_complaints/1.1.0/79c4de1312c1e3730043f7db07179c914f48403101f7124e2fe336f6f54d9f84/cache-53af518f9692b496.arrow


In [30]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
print(tokenizer.pad_token_id )
print(tokenizer.eos_token_id)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
target_max_length = max([len(tokenizer(class_label)["input_ids"]) for class_label in classes])
print(target_max_length)


3
2
3


In [34]:
def preprocess_function(examples):
    batch_size = len(examples[text_column])
    inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
    targets = [str(x) for x in examples[label_column]]
    model_inputs = tokenizer(inputs)
    labels = tokenizer(targets)
    for i in range(batch_size):
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id] # pad token 더해줌 eos 토큰으로 사용
#         print(i, sample_input_ids, label_input_ids)
        model_inputs["input_ids"][i] = sample_input_ids + label_input_ids # "... label :"의 토큰 뒤에 라벨 토큰 까지 더해줌
        labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids # "... label :"의 토큰까지 -100으로 가려주는 듯
        model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
    # print(model_inputs)
    for i in range(batch_size):
        sample_input_ids = model_inputs["input_ids"][i]
        label_input_ids = labels["input_ids"][i]
        # 패딩토큰이 신기하게 앞에서부터 붙는다.
        model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (
            max_length - len(sample_input_ids)
        ) + sample_input_ids
        
        #패딩 토큰 길이만큼 마스킹해줌
        model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs[
            "attention_mask"
        ][i]
        
        # input까지 -100 토큰으로 만듦
        labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids
        model_inputs["input_ids"][i] = torch.tensor(model_inputs["input_ids"][i][:max_length])
        model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
        labels["input_ids"][i] = torch.tensor(labels["input_ids"][i][:max_length])
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [35]:
# inputs = [f"{text_column} : {x} Label : " for x in dataset['train'][text_column]]
# targets = [str(x) for x in dataset['train'][label_column]]

# model_inputs = tokenizer(inputs)
# labels = tokenizer(targets)

# labels['input_ids'][0] + [tokenizer.pad_token_id]

In [36]:
processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset["train"].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

Running tokenizer on dataset:   0%|          | 0/50 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/3399 [00:00<?, ? examples/s]

In [39]:
train_dataset = processed_datasets["train"]
eval_dataset = processed_datasets["train"]


train_dataloader = DataLoader(
    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True
)
eval_dataloader = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)

In [37]:
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)
print(model.print_trainable_parameters())

Downloading (…)lve/main/config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

trainable params: 8192 || all params: 559222784 || trainable%: 0.0014648902430985358
None


In [40]:
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * num_epochs),
)

In [41]:
model = model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.detach().float()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    eval_loss = 0
    eval_preds = []
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        loss = outputs.loss
        eval_loss += loss.detach().float()
        eval_preds.extend(
            tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
        )

    eval_epoch_loss = eval_loss / len(eval_dataloader)
    eval_ppl = torch.exp(eval_epoch_loss)
    train_epoch_loss = total_loss / len(train_dataloader)
    train_ppl = torch.exp(train_epoch_loss)
    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")

100%|██████████| 7/7 [00:02<00:00,  3.31it/s]
100%|██████████| 7/7 [00:00<00:00, 17.89it/s]


epoch=0: train_ppl=tensor(4.2881e+14, device='cuda:0') train_epoch_loss=tensor(33.6920, device='cuda:0') eval_ppl=tensor(173441.0312, device='cuda:0') eval_epoch_loss=tensor(12.0636, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.62it/s]
100%|██████████| 7/7 [00:00<00:00, 17.76it/s]


epoch=1: train_ppl=tensor(50299.2578, device='cuda:0') train_epoch_loss=tensor(10.8257, device='cuda:0') eval_ppl=tensor(8842.6875, device='cuda:0') eval_epoch_loss=tensor(9.0873, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.64it/s]
100%|██████████| 7/7 [00:00<00:00, 17.73it/s]


epoch=2: train_ppl=tensor(4863.9214, device='cuda:0') train_epoch_loss=tensor(8.4896, device='cuda:0') eval_ppl=tensor(2762.5493, device='cuda:0') eval_epoch_loss=tensor(7.9239, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.63it/s]
100%|██████████| 7/7 [00:00<00:00, 17.64it/s]


epoch=3: train_ppl=tensor(1732.8114, device='cuda:0') train_epoch_loss=tensor(7.4575, device='cuda:0') eval_ppl=tensor(982.8829, device='cuda:0') eval_epoch_loss=tensor(6.8905, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.64it/s]
100%|██████████| 7/7 [00:00<00:00, 17.65it/s]


epoch=4: train_ppl=tensor(688.2207, device='cuda:0') train_epoch_loss=tensor(6.5341, device='cuda:0') eval_ppl=tensor(452.0320, device='cuda:0') eval_epoch_loss=tensor(6.1138, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.59it/s]
100%|██████████| 7/7 [00:00<00:00, 17.83it/s]


epoch=5: train_ppl=tensor(360.7234, device='cuda:0') train_epoch_loss=tensor(5.8881, device='cuda:0') eval_ppl=tensor(307.4142, device='cuda:0') eval_epoch_loss=tensor(5.7282, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.55it/s]
100%|██████████| 7/7 [00:00<00:00, 17.85it/s]


epoch=6: train_ppl=tensor(271.1061, device='cuda:0') train_epoch_loss=tensor(5.6025, device='cuda:0') eval_ppl=tensor(242.7101, device='cuda:0') eval_epoch_loss=tensor(5.4919, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.60it/s]
100%|██████████| 7/7 [00:00<00:00, 17.69it/s]


epoch=7: train_ppl=tensor(227.7112, device='cuda:0') train_epoch_loss=tensor(5.4281, device='cuda:0') eval_ppl=tensor(213.7055, device='cuda:0') eval_epoch_loss=tensor(5.3646, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.65it/s]
100%|██████████| 7/7 [00:00<00:00, 17.71it/s]


epoch=8: train_ppl=tensor(200.9868, device='cuda:0') train_epoch_loss=tensor(5.3032, device='cuda:0') eval_ppl=tensor(187.0248, device='cuda:0') eval_epoch_loss=tensor(5.2312, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.56it/s]
100%|██████████| 7/7 [00:00<00:00, 17.73it/s]


epoch=9: train_ppl=tensor(184.8970, device='cuda:0') train_epoch_loss=tensor(5.2198, device='cuda:0') eval_ppl=tensor(176.3887, device='cuda:0') eval_epoch_loss=tensor(5.1727, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.58it/s]
100%|██████████| 7/7 [00:00<00:00, 17.69it/s]


epoch=10: train_ppl=tensor(159.3382, device='cuda:0') train_epoch_loss=tensor(5.0710, device='cuda:0') eval_ppl=tensor(158.1386, device='cuda:0') eval_epoch_loss=tensor(5.0635, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.64it/s]
100%|██████████| 7/7 [00:00<00:00, 17.63it/s]


epoch=11: train_ppl=tensor(149.3111, device='cuda:0') train_epoch_loss=tensor(5.0060, device='cuda:0') eval_ppl=tensor(142.0712, device='cuda:0') eval_epoch_loss=tensor(4.9563, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.58it/s]
100%|██████████| 7/7 [00:00<00:00, 17.83it/s]


epoch=12: train_ppl=tensor(132.0486, device='cuda:0') train_epoch_loss=tensor(4.8832, device='cuda:0') eval_ppl=tensor(126.0212, device='cuda:0') eval_epoch_loss=tensor(4.8365, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.56it/s]
100%|██████████| 7/7 [00:00<00:00, 17.72it/s]


epoch=13: train_ppl=tensor(118.5394, device='cuda:0') train_epoch_loss=tensor(4.7752, device='cuda:0') eval_ppl=tensor(111.2468, device='cuda:0') eval_epoch_loss=tensor(4.7118, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.62it/s]
100%|██████████| 7/7 [00:00<00:00, 17.67it/s]


epoch=14: train_ppl=tensor(105.6340, device='cuda:0') train_epoch_loss=tensor(4.6600, device='cuda:0') eval_ppl=tensor(100.1513, device='cuda:0') eval_epoch_loss=tensor(4.6067, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.56it/s]
100%|██████████| 7/7 [00:00<00:00, 17.95it/s]


epoch=15: train_ppl=tensor(95.7403, device='cuda:0') train_epoch_loss=tensor(4.5616, device='cuda:0') eval_ppl=tensor(97.1842, device='cuda:0') eval_epoch_loss=tensor(4.5766, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.56it/s]
100%|██████████| 7/7 [00:00<00:00, 17.71it/s]


epoch=16: train_ppl=tensor(98.8151, device='cuda:0') train_epoch_loss=tensor(4.5933, device='cuda:0') eval_ppl=tensor(82.6520, device='cuda:0') eval_epoch_loss=tensor(4.4146, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.64it/s]
100%|██████████| 7/7 [00:00<00:00, 17.92it/s]


epoch=17: train_ppl=tensor(78.3777, device='cuda:0') train_epoch_loss=tensor(4.3615, device='cuda:0') eval_ppl=tensor(89.8826, device='cuda:0') eval_epoch_loss=tensor(4.4985, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.62it/s]
100%|██████████| 7/7 [00:00<00:00, 17.63it/s]


epoch=18: train_ppl=tensor(77.8051, device='cuda:0') train_epoch_loss=tensor(4.3542, device='cuda:0') eval_ppl=tensor(70.1819, device='cuda:0') eval_epoch_loss=tensor(4.2511, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.53it/s]
100%|██████████| 7/7 [00:00<00:00, 17.68it/s]


epoch=19: train_ppl=tensor(71.7673, device='cuda:0') train_epoch_loss=tensor(4.2734, device='cuda:0') eval_ppl=tensor(59.7134, device='cuda:0') eval_epoch_loss=tensor(4.0896, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.60it/s]
100%|██████████| 7/7 [00:00<00:00, 17.69it/s]


epoch=20: train_ppl=tensor(66.8891, device='cuda:0') train_epoch_loss=tensor(4.2030, device='cuda:0') eval_ppl=tensor(66.7881, device='cuda:0') eval_epoch_loss=tensor(4.2015, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.54it/s]
100%|██████████| 7/7 [00:00<00:00, 17.80it/s]


epoch=21: train_ppl=tensor(58.7686, device='cuda:0') train_epoch_loss=tensor(4.0736, device='cuda:0') eval_ppl=tensor(54.7310, device='cuda:0') eval_epoch_loss=tensor(4.0024, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.59it/s]
100%|██████████| 7/7 [00:00<00:00, 17.76it/s]


epoch=22: train_ppl=tensor(51.7243, device='cuda:0') train_epoch_loss=tensor(3.9459, device='cuda:0') eval_ppl=tensor(44.9345, device='cuda:0') eval_epoch_loss=tensor(3.8052, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.62it/s]
100%|██████████| 7/7 [00:00<00:00, 17.86it/s]


epoch=23: train_ppl=tensor(44.3017, device='cuda:0') train_epoch_loss=tensor(3.7910, device='cuda:0') eval_ppl=tensor(47.1121, device='cuda:0') eval_epoch_loss=tensor(3.8525, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.63it/s]
100%|██████████| 7/7 [00:00<00:00, 17.95it/s]


epoch=24: train_ppl=tensor(41.0405, device='cuda:0') train_epoch_loss=tensor(3.7146, device='cuda:0') eval_ppl=tensor(36.9260, device='cuda:0') eval_epoch_loss=tensor(3.6089, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.62it/s]
100%|██████████| 7/7 [00:00<00:00, 17.70it/s]


epoch=25: train_ppl=tensor(35.0990, device='cuda:0') train_epoch_loss=tensor(3.5582, device='cuda:0') eval_ppl=tensor(32.9335, device='cuda:0') eval_epoch_loss=tensor(3.4945, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.63it/s]
100%|██████████| 7/7 [00:00<00:00, 17.76it/s]


epoch=26: train_ppl=tensor(31.0442, device='cuda:0') train_epoch_loss=tensor(3.4354, device='cuda:0') eval_ppl=tensor(31.1133, device='cuda:0') eval_epoch_loss=tensor(3.4376, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.57it/s]
100%|██████████| 7/7 [00:00<00:00, 17.81it/s]


epoch=27: train_ppl=tensor(28.5602, device='cuda:0') train_epoch_loss=tensor(3.3520, device='cuda:0') eval_ppl=tensor(29.1819, device='cuda:0') eval_epoch_loss=tensor(3.3735, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.57it/s]
100%|██████████| 7/7 [00:00<00:00, 17.72it/s]


epoch=28: train_ppl=tensor(26.2681, device='cuda:0') train_epoch_loss=tensor(3.2684, device='cuda:0') eval_ppl=tensor(27.1434, device='cuda:0') eval_epoch_loss=tensor(3.3011, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.64it/s]
100%|██████████| 7/7 [00:00<00:00, 17.59it/s]


epoch=29: train_ppl=tensor(25.0023, device='cuda:0') train_epoch_loss=tensor(3.2190, device='cuda:0') eval_ppl=tensor(23.8199, device='cuda:0') eval_epoch_loss=tensor(3.1705, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.52it/s]
100%|██████████| 7/7 [00:00<00:00, 17.58it/s]


epoch=30: train_ppl=tensor(22.8478, device='cuda:0') train_epoch_loss=tensor(3.1289, device='cuda:0') eval_ppl=tensor(20.9909, device='cuda:0') eval_epoch_loss=tensor(3.0441, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.51it/s]
100%|██████████| 7/7 [00:00<00:00, 17.85it/s]


epoch=31: train_ppl=tensor(20.1247, device='cuda:0') train_epoch_loss=tensor(3.0019, device='cuda:0') eval_ppl=tensor(19.9585, device='cuda:0') eval_epoch_loss=tensor(2.9937, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.58it/s]
100%|██████████| 7/7 [00:00<00:00, 17.84it/s]


epoch=32: train_ppl=tensor(21.5360, device='cuda:0') train_epoch_loss=tensor(3.0697, device='cuda:0') eval_ppl=tensor(18.4238, device='cuda:0') eval_epoch_loss=tensor(2.9136, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.62it/s]
100%|██████████| 7/7 [00:00<00:00, 17.62it/s]


epoch=33: train_ppl=tensor(17.8694, device='cuda:0') train_epoch_loss=tensor(2.8831, device='cuda:0') eval_ppl=tensor(18.4168, device='cuda:0') eval_epoch_loss=tensor(2.9133, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.52it/s]
100%|██████████| 7/7 [00:00<00:00, 17.63it/s]


epoch=34: train_ppl=tensor(16.1998, device='cuda:0') train_epoch_loss=tensor(2.7850, device='cuda:0') eval_ppl=tensor(15.8449, device='cuda:0') eval_epoch_loss=tensor(2.7628, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.60it/s]
100%|██████████| 7/7 [00:00<00:00, 17.60it/s]


epoch=35: train_ppl=tensor(14.6532, device='cuda:0') train_epoch_loss=tensor(2.6847, device='cuda:0') eval_ppl=tensor(14.6513, device='cuda:0') eval_epoch_loss=tensor(2.6845, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.55it/s]
100%|██████████| 7/7 [00:00<00:00, 17.76it/s]


epoch=36: train_ppl=tensor(13.5581, device='cuda:0') train_epoch_loss=tensor(2.6070, device='cuda:0') eval_ppl=tensor(13.0999, device='cuda:0') eval_epoch_loss=tensor(2.5726, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.62it/s]
100%|██████████| 7/7 [00:00<00:00, 17.61it/s]


epoch=37: train_ppl=tensor(12.3204, device='cuda:0') train_epoch_loss=tensor(2.5113, device='cuda:0') eval_ppl=tensor(15.4956, device='cuda:0') eval_epoch_loss=tensor(2.7406, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.53it/s]
100%|██████████| 7/7 [00:00<00:00, 17.65it/s]


epoch=38: train_ppl=tensor(12.5170, device='cuda:0') train_epoch_loss=tensor(2.5271, device='cuda:0') eval_ppl=tensor(11.0059, device='cuda:0') eval_epoch_loss=tensor(2.3984, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.59it/s]
100%|██████████| 7/7 [00:00<00:00, 17.52it/s]


epoch=39: train_ppl=tensor(10.9561, device='cuda:0') train_epoch_loss=tensor(2.3939, device='cuda:0') eval_ppl=tensor(10.3398, device='cuda:0') eval_epoch_loss=tensor(2.3360, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.57it/s]
100%|██████████| 7/7 [00:00<00:00, 17.57it/s]


epoch=40: train_ppl=tensor(10.5144, device='cuda:0') train_epoch_loss=tensor(2.3527, device='cuda:0') eval_ppl=tensor(9.8598, device='cuda:0') eval_epoch_loss=tensor(2.2885, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.52it/s]
100%|██████████| 7/7 [00:00<00:00, 17.54it/s]


epoch=41: train_ppl=tensor(8.7568, device='cuda:0') train_epoch_loss=tensor(2.1698, device='cuda:0') eval_ppl=tensor(9.0803, device='cuda:0') eval_epoch_loss=tensor(2.2061, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.54it/s]
100%|██████████| 7/7 [00:00<00:00, 17.62it/s]


epoch=42: train_ppl=tensor(8.3322, device='cuda:0') train_epoch_loss=tensor(2.1201, device='cuda:0') eval_ppl=tensor(8.0532, device='cuda:0') eval_epoch_loss=tensor(2.0861, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.59it/s]
100%|██████████| 7/7 [00:00<00:00, 17.58it/s]


epoch=43: train_ppl=tensor(7.7755, device='cuda:0') train_epoch_loss=tensor(2.0510, device='cuda:0') eval_ppl=tensor(8.0473, device='cuda:0') eval_epoch_loss=tensor(2.0853, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.52it/s]
100%|██████████| 7/7 [00:00<00:00, 17.63it/s]


epoch=44: train_ppl=tensor(7.4348, device='cuda:0') train_epoch_loss=tensor(2.0062, device='cuda:0') eval_ppl=tensor(7.5879, device='cuda:0') eval_epoch_loss=tensor(2.0266, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.58it/s]
100%|██████████| 7/7 [00:00<00:00, 17.80it/s]


epoch=45: train_ppl=tensor(7.3176, device='cuda:0') train_epoch_loss=tensor(1.9903, device='cuda:0') eval_ppl=tensor(7.0202, device='cuda:0') eval_epoch_loss=tensor(1.9488, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.51it/s]
100%|██████████| 7/7 [00:00<00:00, 17.59it/s]


epoch=46: train_ppl=tensor(6.5938, device='cuda:0') train_epoch_loss=tensor(1.8861, device='cuda:0') eval_ppl=tensor(7.3603, device='cuda:0') eval_epoch_loss=tensor(1.9961, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.51it/s]
100%|██████████| 7/7 [00:00<00:00, 17.62it/s]


epoch=47: train_ppl=tensor(6.8145, device='cuda:0') train_epoch_loss=tensor(1.9190, device='cuda:0') eval_ppl=tensor(6.6551, device='cuda:0') eval_epoch_loss=tensor(1.8954, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.61it/s]
100%|██████████| 7/7 [00:00<00:00, 17.71it/s]


epoch=48: train_ppl=tensor(6.6115, device='cuda:0') train_epoch_loss=tensor(1.8888, device='cuda:0') eval_ppl=tensor(6.5413, device='cuda:0') eval_epoch_loss=tensor(1.8781, device='cuda:0')


100%|██████████| 7/7 [00:00<00:00,  8.51it/s]
100%|██████████| 7/7 [00:00<00:00, 17.60it/s]

epoch=49: train_ppl=tensor(6.2774, device='cuda:0') train_epoch_loss=tensor(1.8370, device='cuda:0') eval_ppl=tensor(6.5237, device='cuda:0') eval_epoch_loss=tensor(1.8754, device='cuda:0')





In [42]:
from peft import PeftModel, PeftConfig

peft_model_id = "stevhliu/bloomz-560m_PROMPT_TUNING_CAUSAL_LM"

config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id)

Downloading (…)/adapter_config.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/33.5k [00:00<?, ?B/s]

In [44]:
inputs = tokenizer(
    f'{text_column} : {"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?"} Label : ',
    return_tensors="pt",
)

In [45]:
model.to(device)

with torch.no_grad():
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = model.generate(
        input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=10, eos_token_id=3
    )
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

['Tweet text : @nationalgridus I have no water and the bill is current and paid. Can you do something about this? Label : complaint']


['Tweet text : @nationalgridus I have no water and the bill is current and paid. Can you do something about this? Label : complaint']