In [1]:
import torch.nn as nn
from kobert_tokenizer import KoBERTTokenizer
from transformers import BertModel
import torch
import pandas as pd
import os

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')

In [4]:
class koBertClassifier(nn.Module):
    def __init__(self, num_labels):
        super(koBertClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('skt/kobert-base-v1')
        self.pre_classifier = nn.Linear(self.bert.config.hidden_size, self.bert.config.hidden_size)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
        self.dropout = nn.Dropout(self.bert.config.attention_probs_dropout_prob)

    def forward(self, input_ids, attention_mask=None):
        hidden_state = self.bert(input_ids=input_ids, attention_mask=attention_mask)[0]
        pooled_output = hidden_state[:, 0]  # take [CLS] token representation
        pooled_output = self.pre_classifier(pooled_output)
        pooled_output = nn.ReLU()(pooled_output)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        
        return logits

In [5]:
model = koBertClassifier(num_labels=775)

In [6]:
model_path = "koBERT-ko-wikipedia-classifier.prm"
model.load_state_dict(torch.load(model_path), strict=False)
model.to("cuda:0")  # Make sure to move the model to the desired device


koBertClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(8002, 768, padding_idx=1)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_a

In [7]:
test = pd.read_csv("data/food_test_data.csv")

In [8]:
test_list_names = test["식품오타"].values.tolist()
test_list_labels = test["label"].values.tolist()

In [9]:
words = test_list_names
labels = test_list_labels 

In [10]:
MAX_LENGTH = 100  # Adjust as needed

def tokenize_data(texts, labels):
    input_ids = []
    attention_masks = []

    for text in texts:
        encoding = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=MAX_LENGTH,
            truncation=True,
            padding='max_length',
            return_attention_mask=True
        )
        input_ids.append(encoding['input_ids'])
        attention_masks.append(encoding['attention_mask'])

    return input_ids, attention_masks, labels

#texts = ["I love this product!", "This is terrible."]  # Sample texts
#labels = [1, 0]  # Corresponding labels

test_input_ids, test_attention_masks, test_labels = tokenize_data(words, labels)

In [11]:
from torch.utils.data import DataLoader, TensorDataset

# Convert data to tensors
input_ids = torch.tensor(test_input_ids)
attention_masks = torch.tensor(test_attention_masks)
labels = torch.tensor(test_labels)

dataset = TensorDataset(input_ids, attention_masks, labels)
dataloader = DataLoader(dataset, batch_size=16)  # Adjust batch size as needed


In [12]:
# check inference time
model.eval()
all_predictions = []
all_true_labels = []

start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

# start the event
start.record()

# inference all data in dataloader

with torch.no_grad():
    for batch in dataloader:
        batch_input_ids, batch_attention_masks, batch_labels = [b.to("cuda:0") for b in batch]
        
        logits = model(batch_input_ids, batch_attention_masks)
        _, predictions = torch.max(logits, dim=1)
        
        all_predictions.extend(predictions.cpu().numpy())
        all_true_labels.extend(batch_labels.cpu().numpy())

end.record()

# wait until the event is done
torch.cuda.synchronize()

elapsed_time = start.elapsed_time(end)  # msec 
execution_per_sample = (elapsed_time / 1000) / len(labels)
print(f"Elapsed time: {execution_per_sample} seconds")  # convert to sec 

Elapsed time: 0.0017130406000842788 seconds


In [15]:
correct_predictions = sum(p == t for p, t in zip(all_predictions, all_true_labels))
accuracy = correct_predictions / len(all_true_labels)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.8767


In [16]:
from sklearn.metrics import recall_score, precision_score, f1_score

In [17]:
recall_score(all_true_labels, all_predictions, average='micro')

0.8767123287671232

In [18]:
precision_score(all_true_labels, all_predictions, average='micro')

0.8767123287671232

In [19]:
f1_score(all_true_labels, all_predictions, average='micro')

0.8767123287671232