In [2]:
from datasets import load_dataset

data = load_dataset('humane-lab/K-HATERS')
train = data['train']
valid = data['validation']
test = data['test']



In [3]:
train[0]

{'text': '하나도 모르는 얼라 쉭 끼가 설치는꼬라서니가 무릇 텅빈 백 정한테 칼자루 쥐여준 형국 민 좃 당 애 세들 속은 닐리리 맘보 통탄스럽구나 나라의 흥망성쇠 갈림길에 저런 등 신 들이 관여 한다는것이',
 'label': 'L2_hate',
 'target_label': ['political'],
 'offensiveness_rationale': [[8, 10], [11, 14], [50, 51], [54, 57], [93, 96]],
 'target_rationale': [[48, 53]]}

In [4]:
from transformers import AutoTokenizer, ElectraForSequenceClassification, ElectraForPreTraining


id2label = {0: 'L2_hate', 1: 'L1_hate', 2: 'offensive', 3: 'normal'}
label2id = {'L2_hate': 0, 'L1_hate': 1, 'offensive': 2, 'normal': 3}
tokenizer = AutoTokenizer.from_pretrained('beomi/korean-hatespeech-multilabel')
model = ElectraForSequenceClassification.from_pretrained('beomi/korean-hatespeech-multilabel',
                                                         num_labels=4, 
                                                         id2label=id2label, 
                                                         label2id=label2id,
                                                         problem_type='single_label_classification')


train_encodings = tokenizer(train['text'], truncation=True, padding="max_length")
valid_encodings = tokenizer(valid['text'], truncation=True, padding="max_length")
test_encodings = tokenizer(test['text'], truncation=True, padding="max_length")

train_labels = train['label']
valid_labels = valid['label']
test_labels = test['label']



In [27]:
from torch.utils.data import Dataset
import torch

class KoElectraDataset(Dataset):
    def __init__(self, encodings, labels, label2id):
        self.encodings = encodings
        self.labels = labels
        self.label2id = label2id

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        label_str = self.labels[idx]
        label_id = self.label2id[label_str]  # Convert the string label to an integer ID
        item['labels'] = torch.tensor(label_id, dtype=torch.long)  # Ensure the label is a tensor of type long
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = KoElectraDataset(train_encodings, train_labels, label2id)
valid_dataset = KoElectraDataset(valid_encodings, valid_labels, label2id)
test_dataset = KoElectraDataset(test_encodings, test_labels, label2id)

In [28]:
print(test_dataset[5])

{'input_ids': tensor([    2, 22206, 10934,  8076,  8605, 11159, 12670,  7978,     3,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0, 

In [21]:
from transformers import Trainer, TrainingArguments, EvalPrediction
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch.nn.functional as F
import numpy as np

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # total number of training epochs
    per_device_train_batch_size=32,  # batch size per device during training
    per_device_eval_batch_size=32,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    evaluation_strategy='epoch',
    save_strategy='epoch',
)


class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        # Retrieve logits from the model's output
        outputs = model(**inputs)
        logits = outputs.get('logits')

        # Fetch the labels
        labels = inputs.get('labels')

        # Calculate the cross-entropy loss
        # The logits are expected to be raw, unnormalized scores and labels should be of type long
        loss_fct = torch.nn.CrossEntropyLoss()
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))

        return (loss, outputs) if return_outputs else loss



def compute_metrics(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

custom_trainer = CustomTrainer(
    model=model,  # make sure your model is for multi-class classification
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    # Add other parameters if needed
)



In [29]:
from transformers import ElectraForSequenceClassification, Trainer, TrainingArguments


custom_trainer = CustomTrainer(
    model=model,  # make sure your model is for multi-class classification
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    # Add other parameters if needed
)


model = ElectraForSequenceClassification.from_pretrained('/home/seanko/llm_project/hate_speech/model')


predictions = custom_trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)


In [40]:
from sklearn.metrics import accuracy_score, f1_score

# Convert string labels in the test set to numeric indices using your label2id mapping
numeric_test_labels = [label2id[label] for label in test_labels]


# Now pred_labels and numeric_test_labels are both numeric and can be compared

print(len(test_dataset.labels))
print(len(pred_labels))


10000
10000


In [41]:
from sklearn.metrics import accuracy_score, f1_score

# Calculate metrics using the numeric labels
accuracy = accuracy_score(numeric_test_labels, pred_labels)
f1 = f1_score(numeric_test_labels, pred_labels, average='weighted')  # or 'macro'/'micro' depending on what you want

print("Accuracy:", accuracy)
print("F1 Score:", f1)


Accuracy: 0.6804
F1 Score: 0.6762667633668505


In [67]:
try_one = tokenizer('시@@@@@11발 ㅋㅋ 이거 못참쥬?', truncation=True, padding="max_length")

pred_try = custom_trainer.predict([try_one])

print(pred_try)
print(np.argmax(pred_try.predictions[0]))

answer = np.argmax(pred_try.predictions[0])
print('#####################'*3)
print('abusive language category in Korean:', id2label[answer])

PredictionOutput(predictions=array([[-1.499055 , -3.797113 ,  3.6422424, -2.483691 ]], dtype=float32), label_ids=None, metrics={'test_runtime': 0.018, 'test_samples_per_second': 55.566, 'test_steps_per_second': 55.566})
2
###############################################################
abusive language category in Korean: offensive


In [72]:


while True:
    sentence = input('input sentence: ')
    try_one = tokenizer(sentence, truncation=True, padding="max_length")
    pred_try = custom_trainer.predict([try_one])
    
    answer = np.argmax(pred_try.predictions[0])
    print('문장:', sentence)
    print('abusive language category in Korean:', id2label[answer])
    
    print('Try another sentence? (y/n)')
    ask = input()
    if ask == 'n' or ask == 'N' or ask == 'no' or ask == 'No' or ask == 'NO':
        break

문장: 한국어로 이게 유충 뭘ㄲ1가
abusive language category in Korean: normal
Try another sentence? (y/n)


문장: 유충은 괜찮아?
abusive language category in Korean: normal
Try another sentence? (y/n)


문장: 한국 남자는 한1남 이라고도 함 ㅋㅋ ㅗㅗ
abusive language category in Korean: L1_hate
Try another sentence? (y/n)


: 

In [None]:
custom_trainer = CustomTrainer(
    model=model,  # make sure your model is for multi-class classification
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    # Add other parameters if needed
)



while True:
    sentence = input('input sentence: ')
    try_one = tokenizer(sentence, truncation=True, padding="max_length")
    pred_try = custom_trainer.predict([try_one])
    
    answer = np.argmax(pred_try.predictions[0])
    print('문장:', sentence)
    print('abusive language category in Korean:', id2label[answer])
    
    print('Try another sentence? (y/n)')
    ask = input()
    if ask == 'n' or ask == 'N' or ask == 'no' or ask == 'No' or ask == 'NO':
        break