In [1]:
import torch
import pandas as pd
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import GPT2LMHeadModel
from transformers import GPT2Tokenizer

2023-09-08 20:12:44.520495: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
tokenizer = GPT2Tokenizer.from_pretrained("eaglewatch/gpt2-ko-wikipedia")

In [3]:
class GPT2Classifier(nn.Module):
    def __init__(self, model_name, num_labels=775):
        super(GPT2Classifier, self).__init__()
        self.gpt2 = GPT2LMHeadModel.from_pretrained(model_name)
        self.drop = nn.Dropout(0.1)
        self.out = nn.Linear(self.gpt2.config.vocab_size, num_labels)

    def forward(self, input_ids, attention_mask=None):
        outputs = self.gpt2(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = outputs[0]  # Here's the change. Use the first item in the tuple.
        output = hidden_states[:, 0, :]
        output = self.drop(output)
        logits = self.out(output)
        return logits

In [4]:
model = GPT2Classifier(model_name="eaglewatch/gpt2-ko-wikipedia")

In [5]:
model_path = "gpt2-ko-wikipedia-classifier.prm"
model.load_state_dict(torch.load(model_path))
model.to("cuda:0")  # Make sure to move the model to the desired device

GPT2Classifier(
  (gpt2): GPT2LMHeadModel(
    (transformer): GPT2Model(
      (wte): Embedding(100000, 768)
      (wpe): Embedding(1024, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-11): 12 x GPT2Block(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2Attention(
            (c_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (lm_head): Linear(in_features=768, out_features=100000, bias=False)
  )
  (drop): Dropout(p=0.1, inplace

In [18]:
test = pd.read_csv("data/food_aging_test.csv")

In [19]:
test_list_names = test["식품오타"].values.tolist()
test_list_labels = test["label"].values.tolist()

In [20]:
words = test_list_names
labels = test_list_labels 

In [21]:
MAX_LENGTH = 100  # Adjust as needed

def tokenize_data(texts, labels):
    input_ids = []
    attention_masks = []

    for text in texts:
        encoding = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=MAX_LENGTH,
            truncation=True,
            padding='max_length',
            return_attention_mask=True
        )
        input_ids.append(encoding['input_ids'])
        attention_masks.append(encoding['attention_mask'])

    return input_ids, attention_masks, labels

test_input_ids, test_attention_masks, test_labels = tokenize_data(words, labels)

In [22]:
# Convert data to tensors
input_ids = torch.tensor(test_input_ids)
attention_masks = torch.tensor(test_attention_masks)
labels = torch.tensor(test_labels)

dataset = TensorDataset(input_ids, attention_masks, labels)
dataloader = DataLoader(dataset, batch_size=16)  # Adjust batch size as needed

In [23]:
model.eval()
all_predictions = []
all_true_labels = []

with torch.no_grad():
    for batch in dataloader:
        batch_input_ids, batch_attention_masks, batch_labels = [b.to("cuda:0") for b in batch]
        
        logits = model(batch_input_ids, batch_attention_masks)
        _, predictions = torch.max(logits, dim=1)
        
        all_predictions.extend(logits.cpu().numpy())
        all_true_labels.extend(batch_labels.cpu().numpy())

In [24]:
def top_k_accuracy(preds, labels, k=1):
    """Compute top-k accuracy for predictions and labels."""
    top_k_preds = preds.topk(k, dim=1)[1]  # Get top-k predicted classes
    correct = top_k_preds.eq(labels.view(-1, 1).expand_as(top_k_preds))
    correct_k = correct.view(-1).float().sum(0, keepdim=True)
    return correct_k.item() / labels.size(0)

In [25]:
top_k_accuracy(torch.tensor(all_predictions), torch.tensor(all_true_labels), k=5)

0.6061643835616438

In [26]:
top_k_accuracy(torch.tensor(all_predictions), torch.tensor(all_true_labels), k=3)

0.4589041095890411

In [27]:
top_k_accuracy(torch.tensor(all_predictions), torch.tensor(all_true_labels), k=1)

0.24143835616438356