In [1]:
from openprompt import PromptDataLoader, PromptForClassification
from openprompt.data_utils import InputExample, InputFeatures
from openprompt.plms import load_plm
from openprompt.prompts import ManualTemplate, ManualVerbalizer, ManualTemplate
from tqdm import tqdm
import numpy as np
from datasets import load_dataset, load_metric
import torch
import pandas as pd



In [2]:
plm, tokenizer, model_config, WrapperClass = load_plm("gpt2","gpt2")

Using pad_token, but it is not set yet.


Metrics and Eval

In [3]:
def compute_metrics(eval_pred):
  """ Computes the metrics given a tuple of (logits, labels) """

  load_accuracy = load_metric("accuracy")
  precision_metric = load_metric('precision')
  recall_metric = load_metric('recall')
  load_f1 = load_metric("f1")
   
  logits, labels = eval_pred
  predictions = np.argmax(logits, axis=-1)
  accuracy = load_accuracy.compute(predictions=predictions, references=labels)["accuracy"]
  precision = precision_metric.compute(predictions=predictions, references=labels, average="macro", zero_division=0)["precision"]
  recall = recall_metric.compute(predictions=predictions, references=labels, average="macro", zero_division=0)["recall"]
  f1 = load_f1.compute(predictions=predictions, references=labels, average="macro")["f1"]
   
  return {
    "accuracy": accuracy,
    "f1": f1,
    "precision": precision,
    "recall": recall,
  }

def evaluate(data_loader, prompt_model, desc="Validation"):
  prompt_model.eval()
  all_logits = []
  all_labels = []
  with torch.no_grad():
    for inputs in tqdm(data_loader, desc="Validation"):
        inputs = inputs.cuda()
        logits = prompt_model(inputs)
        all_logits.extend(logits.cpu().tolist())
        all_labels.extend(inputs['label'].cpu().tolist())
  return (all_logits, all_labels)

### Prepare Data

Load dataset

In [4]:
# load data
pokemon_descriptions = load_dataset('../data/dataset/', delimiter=';')
NUM_CLASSES = np.unique(pokemon_descriptions['train']['labels'])

Using custom data configuration dataset-294e9b13f49dafc6
Found cached dataset csv (C:/Users/fst/.cache/huggingface/datasets/csv/dataset-294e9b13f49dafc6/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)


  0%|          | 0/1 [00:00<?, ?it/s]

Make split

In [5]:
split_pokemon_descriptions = pokemon_descriptions['train'].train_test_split(
    test_size=0.2, shuffle=True)

Generate InputExamples from existing dataset

In [6]:
dataset = {}
for split in ['train','test']:
    dataset[split] = []
    for sample in split_pokemon_descriptions[split]:
        input_example = InputExample(text_a = sample['text'], label=int(sample['labels']))
        dataset[split].append(input_example)

Create template

In [7]:
promptTemplate = ManualTemplate(
    text = '{"placeholder":"text_a"} the pokemon is {"mask"}',
    tokenizer = tokenizer,
)

Create verbalizer

In [8]:
mappings = pd.read_csv('../data/pokemon_mapping.csv')
name_to_label_dict = mappings[["name","index"]].set_index('index').to_dict()["name"]

In [9]:
promptVerbalizer = ManualVerbalizer(
    classes = NUM_CLASSES,
    label_words = name_to_label_dict,
    tokenizer = tokenizer,
)

Create dataloader

In [10]:
train_dataloader = PromptDataLoader(
  dataset=dataset["train"],
  template=promptTemplate, 
  tokenizer=tokenizer,
  tokenizer_wrapper_class=WrapperClass, 
  shuffle=True,
  truncate_method="head",
  decoder_max_length=3,
  batch_size=2,
  teacher_forcing=False,
  predict_eos_token=False,
  max_seq_length=327,
)

test_dataloader = PromptDataLoader(dataset=dataset["test"], template=promptTemplate, tokenizer=tokenizer,
    tokenizer_wrapper_class=WrapperClass, max_seq_length=128, decoder_max_length=3,
    batch_size=1,shuffle=False, teacher_forcing=False, predict_eos_token=False,
    truncate_method="head")

tokenizing: 8631it [00:06, 1378.67it/s]
tokenizing: 2158it [00:01, 1672.79it/s]


Create model

In [11]:
promptModel = PromptForClassification(
    template = promptTemplate,
    plm = plm,
    verbalizer = promptVerbalizer,
    freeze_plm= False
)
promptModel=promptModel.cuda()

In [12]:
epochs = 6
no_decay = ['bias', 'layer_norm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in promptModel.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in promptModel.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = torch.optim.AdamW(params=optimizer_grouped_parameters, lr=3e-5)
loss_func = torch.nn.CrossEntropyLoss()

In [13]:
for epoch in range(epochs):
    tot_loss = 0
    for step, inputs in enumerate(train_dataloader):
        inputs = inputs.cuda()
        logits = promptModel(inputs)
        labels = inputs['label']
        loss = loss_func(logits, labels)
        loss.backward()
        tot_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()

        if step % 150 == 0:
            print("Epoch {}, average loss: {}".format(epoch, tot_loss/(step+1)), flush=True)

Epoch 0, average loss: 5.516399383544922
Epoch 0, average loss: 3.977995147940931
Epoch 0, average loss: 3.9102446995180506
Epoch 0, average loss: 3.798496606729268
Epoch 0, average loss: 3.698083406331687
Epoch 0, average loss: 3.6065098579229535
Epoch 0, average loss: 3.511069106723816
Epoch 0, average loss: 3.459858936424834
Epoch 0, average loss: 3.402552617653251
Epoch 0, average loss: 3.3727770181114294
Epoch 0, average loss: 3.3256734763150146
Epoch 0, average loss: 3.301160977366628
Epoch 0, average loss: 3.268777855792284
Epoch 0, average loss: 3.247982732464245
Epoch 0, average loss: 3.2225674094846233
Epoch 0, average loss: 3.1680162519802675
Epoch 0, average loss: 3.1380975018421116
Epoch 0, average loss: 3.1085160775570926
Epoch 0, average loss: 3.0629945724450947
Epoch 0, average loss: 3.036026598298383
Epoch 0, average loss: 3.0069157800480215
Epoch 0, average loss: 2.979515702590631
Epoch 0, average loss: 2.958554797322269
Epoch 0, average loss: 2.9334431724123315
Epoch

In [14]:
torch.save(promptModel.state_dict(),"checkp/gpt2_trained_model.cp")

In [15]:
alllogits = []
alllabels = []
for step, inputs in enumerate(test_dataloader):
    inputs = inputs.cuda()
    logits = promptModel(inputs)
    labels = inputs['label']
    alllogits.extend(logits.cuda().tolist())
    alllabels.extend(inputs['label'].cuda().tolist())

compute_metrics((alllogits,alllabels))

  after removing the cwd from sys.path.


{'accuracy': 0.5301204819277109,
 'f1': 0.46870223230762326,
 'precision': 0.4638712342506931,
 'recall': 0.5187880442264253}