In [1]:
from transformers import AutoTokenizer, RobertaForSequenceClassification, PreTrainedTokenizerFast

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import torch
device = torch.device("cuda:2")

In [5]:

tokenizer = AutoTokenizer.from_pretrained("jason9693/soongsil-bert-small")
model = RobertaForSequenceClassification.from_pretrained("jason9693/soongsil-bert-small").to(device)

Some weights of the model checkpoint at jason9693/soongsil-bert-small were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at jason9693/soongsil-bert-small and are newly initialized: ['classifier.dense.weight', 'classifier.

In [6]:
from datasets import load_dataset

kor_hate_dataset = load_dataset("kor_hate")
kor_unsmile_dataset = load_dataset("smilegate-ai/kor_unsmile")

Using custom data configuration default
Reusing dataset kor_hate (/home/codertimo/.cache/huggingface/datasets/kor_hate/default/1.1.0/9362ba4de9bf62ad689be73f9d39f851d8bdc67c3245085d07cf65f972dba95b)
100%|██████████| 2/2 [00:00<00:00, 506.71it/s]
Using custom data configuration smilegate-ai--kor_unsmile-1dba960877497f9f
Reusing dataset parquet (/home/codertimo/.cache/huggingface/datasets/parquet/smilegate-ai--kor_unsmile-1dba960877497f9f/0.0.0/0b6d5799bb726b24ad7fc7be720c170d8e497f575d02d47537de9a5bac074901)
100%|██████████| 2/2 [00:00<00:00, 457.05it/s]


In [7]:
kor_hate_dataset["train"][6]

{'comments': '100년안에 남녀간 성전쟁 한번 크게 치룬 후 일부다처제, 여성의 정치참여 금지, 여성 투표권 삭제가 세계의 공통문화로 자리잡을듯. 암탉이 너무 울어댐.',
 'contain_gender_bias': 1,
 'bias': 1,
 'hate': 0}

In [8]:
kor_unsmile_dataset["train"][0]

{'문장': '일안하는 시간은 쉬고싶어서 그런게 아닐까',
 '여성/가족': 0,
 '남성': 0,
 '성소수자': 0,
 '인종/국적': 0,
 '연령': 0,
 '지역': 0,
 '종교': 0,
 '기타 혐오': 0,
 '악플/욕설': 0,
 'clean': 1,
 '개인지칭': 0,
 'labels': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]}

In [9]:
merged_dataset = []
for data in kor_hate_dataset["train"]:
    text = data["comments"]
    is_bad = bool(data["contain_gender_bias"] + data["bias"] + data["hate"])
    merged_dataset.append({"text": text, "is_bad": is_bad})
for data in kor_unsmile_dataset["train"]:
    text = data["문장"]
    is_bad = not bool(data["clean"])
    merged_dataset.append({"text": text, "is_bad": is_bad})

In [10]:
len(merged_dataset)

22901

In [11]:
from torch.utils.data import Dataset, DataLoader
from typing import List, Any, Dict, Tuple
import torch
from torch.nn.utils.rnn import pad_sequence

class AbusingDataset(Dataset):
    def __init__(self, data_list: List[Dict[str, Any]], tokenizer: PreTrainedTokenizerFast, max_length: int = 64):
        self.data_list = data_list
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self) -> int:
        return len(self.data_list)
    
    def __getitem__(self, index: int):
        data = self.data_list[index]
        tokenized_text = self.tokenizer(data["text"], max_length=self.max_length, truncation=True, return_tensors="pt")
        label = torch.tensor(int(data["is_bad"]))
        return tokenized_text.input_ids.squeeze(0), tokenized_text.attention_mask.squeeze(0), label

def collate_fn(inputs: List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]):
    padded_input_ids = pad_sequence([input_ids for input_ids, _, _ in inputs]).transpose(0, 1)
    padded_attention_mask = pad_sequence([attention_mask for _, attention_mask, _ in inputs]).transpose(0, 1)
    labels = torch.stack([label for _, _, label in inputs])
    return padded_input_ids, padded_attention_mask, labels


In [12]:
from torch.optim import Adam
import numpy as np

abusing_dataset = AbusingDataset(merged_dataset, tokenizer)
dataloader = DataLoader(abusing_dataset, batch_size=32, collate_fn=collate_fn, shuffle=True)

EPOCHS = 2
optimizer = Adam(model.parameters(), lr=1e-5)
global_step = 1
total_steps = len(dataloader) * EPOCHS
interval_loss = []
interval_corrects = []

for epoch in range(EPOCHS):
    for inputs in dataloader:
        input_ids, attention_mask, labels = (tensor.to(device) for tensor in inputs)
        model_output = model(input_ids, attention_mask, labels=labels)
        
        model.zero_grad()
        model_output.loss.backward()
        optimizer.step()

        if global_step % 100 == 0:
            avg_loss = torch.mean(torch.stack(interval_loss))
            avg_acc = np.mean(interval_corrects)
            print(f"epoch: {epoch} step: {global_step}/{total_steps} loss: {avg_loss} acc: {avg_acc}")
            interval_loss.clear()
            interval_corrects.clear()
        global_step += 1
        interval_loss.append(model_output.loss)
        interval_corrects.extend(model_output.logits.argmax(-1).eq(labels).long().tolist())

epoch: 0 step: 100/1432 loss: 0.4342561960220337 acc: 0.8203914141414141
epoch: 0 step: 200/1432 loss: 0.36914733052253723 acc: 0.8425
epoch: 0 step: 300/1432 loss: 0.36581411957740784 acc: 0.8375
epoch: 0 step: 400/1432 loss: 0.35316985845565796 acc: 0.848125
epoch: 0 step: 500/1432 loss: 0.3507958948612213 acc: 0.845
epoch: 0 step: 600/1432 loss: 0.344075471162796 acc: 0.851875
epoch: 0 step: 700/1432 loss: 0.330769419670105 acc: 0.850625
epoch: 1 step: 800/1432 loss: 0.24692405760288239 acc: 0.9046723110693007
epoch: 1 step: 900/1432 loss: 0.2454180121421814 acc: 0.900625
epoch: 1 step: 1000/1432 loss: 0.235290065407753 acc: 0.908125
epoch: 1 step: 1100/1432 loss: 0.23407284915447235 acc: 0.909375
epoch: 1 step: 1200/1432 loss: 0.23416663706302643 acc: 0.905
epoch: 1 step: 1300/1432 loss: 0.23853227496147156 acc: 0.9003125
epoch: 1 step: 1400/1432 loss: 0.21741698682308197 acc: 0.91375


In [13]:
def check_bad_text(input_text):
    model.eval()
    with torch.no_grad():
        model_inputs = tokenizer(input_text, max_length=64, truncation=True, return_tensors="pt")
        input_ids = model_inputs.input_ids.to(device)
        attention_mask = model_inputs.attention_mask.to(device)
        model_output = model(input_ids, attention_mask)
        output_argmax = model_output.logits.argmax(-1)[0]
        prob = model_output.logits[0].softmax(-1)[1]
    model.train()
    return {"text": input_text, "bad_text_prob": prob.item(), "is_bad": output_argmax.item()}

In [14]:
test_texts = ["와 진짜 맛있어 보임 ㄹㅇ ㅋㅋㅋㅋ", "헐 대박 그거 어떻게 해?", "또라이 새끼", "ㅉㅉ 내가 발로해도 그것보단 잘하겠다"]
for text in test_texts:
    print(check_bad_text(text))

{'text': '와 진짜 맛있어 보임 ㄹㅇ ㅋㅋㅋㅋ', 'bad_text_prob': 0.16767928004264832, 'is_bad': 0}
{'text': '헐 대박 그거 어떻게 해?', 'bad_text_prob': 0.12916354835033417, 'is_bad': 0}
{'text': '또라이 새끼', 'bad_text_prob': 0.9777447581291199, 'is_bad': 1}
{'text': 'ㅉㅉ 내가 발로해도 그것보단 잘하겠다', 'bad_text_prob': 0.6949053406715393, 'is_bad': 1}
