In [8]:
# !pip install hanja
# !pip install mxnet
# !pip install gluonnlp pandas tqdm
# !pip install sentencepiece
# !pip install transformers
# !pip install torch
# !pip install hanja
# !pip install imblearn
# !pip install WordCloud
# !pip install seaborn 

In [1]:
from transformers import BertTokenizerFast, AlbertModel, BertModel, AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
#import hanja
from imblearn.over_sampling import RandomOverSampler
from transformers.optimization import get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup
from transformers import AdamW
from collections import defaultdict

import gc
from tqdm import tqdm


In [2]:
tokenizer_bert_kor_base = BertTokenizerFast.from_pretrained("kykim/albert-kor-base")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'AlbertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'AlbertTokenizer'. 
The class this function is called from is 'BertTokenizerFast'.


In [3]:
# electra-kor-base
from transformers import ElectraTokenizerFast, ElectraModel, AdamW, ElectraForSequenceClassification
# tokenizer : kykim/bert-kor-base

def preprocessing(title):
  x = title[:]
  x = re.sub('><', ' ', x)
  x = re.sub('[ㄱ-ㅎ!@#$%^&*(),./?1-9]', ' ', x)
  return x.strip()

class KLUEDataset_train(Dataset):
  def __init__(self, DataFrame) :   
    self.dataset = DataFrame
    self.tokenizer = tokenizer

    # print(self.dataset.describe())

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.loc[idx, ['title', 'topic_idx']].values
    text = row[0]
    y = row[1]

    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=64,
        pad_to_max_length=True,
        add_special_tokens=True
    )

    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]

    return input_ids, attention_mask, y
class KLUEDataset_test(Dataset):
  def __init__(self, DataFrame):
    self.dataset = DataFrame
    self.tokenizer = tokenizer

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.loc[idx, ['title']].values
    text = row[0]

    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=64,
        pad_to_max_length=True,
        add_special_tokens=True
    )

    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]

    return input_ids, attention_mask


In [4]:
PRE_TRAINED_MODEL_NAME = "kykim/electra-kor-base"
tokenizer = ElectraTokenizerFast.from_pretrained(PRE_TRAINED_MODEL_NAME)
device = torch.device("cuda")


In [4]:
df = pd.read_table('data/한국어욕설데이터.txt', sep='|', names=['sentence','lbl'])
df = df[df['lbl'] == 0]
df = df.dropna()
df['sentence'] = df['sentence'].apply(preprocessing)
df = df[df['sentence'] != ""]

df['범죄'] = 0
df['혐오'] = 0
df['선정'] = 0
df['폭력'] = 0
df['비난'] = 0
df['욕설'] = 0
df['차별'] = 0
df['lbl'] = 1
df.columns = ['sentence', '비도덕아님','범죄','혐오','선정','폭력','비난','욕설','차별']
#pd.concat([origin_train, df])

In [23]:

# Train data
origin_train = pd.read_csv('data/train_ml2.csv')
origin_train = origin_train.dropna()
origin_train = origin_train.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_train.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
origin_train = pd.concat([origin_train, df]) # df는 한국어욕설데이터
#origin_train['sentence'] = origin_train['sentence'].apply(preprocessing)

# Test data
origin_test = pd.read_csv("data/test_ml2.csv")
origin_test = origin_test.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_test.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
#test['sentence'] = test['sentence'].apply(preprocessing)

label = ['범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']

for lbl in label : 
    
    train = origin_train.copy()
    test = origin_test.copy()
    # 모델 조건에 맞추고, lbl 에 맞추어서 필터링
    titles_t = train.sentence.to_numpy().reshape(-1,1)
    labels_t = train[lbl].to_numpy().reshape(-1,1)
    train = pd.DataFrame({'title':titles_t.reshape(-1), 'topic_idx':labels_t.reshape(-1)})

    titles_t = test.sentence.to_numpy().reshape(-1,1)
    labels_t = test[lbl].to_numpy().reshape(-1,1)
    df_valid = pd.DataFrame({'title':titles_t.reshape(-1), 'topic_idx':labels_t.reshape(-1)})

    # 위의 모델과 다른 부분
    train_dataset = KLUEDataset_train(train)
    test_dataset = KLUEDataset_test(df_valid)
    gc.collect()
    torch.cuda.empty_cache()
    EPOCHS = 10
    batch_size = 64
    train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    total_steps = len(train_data_loader) * EPOCHS

    # 모델 학습 파라미터
    warmup_ratio = 0.1
    NUM_OF_MODELS = 1
    import random
    losses = []
    accuracy = []

    model = ElectraForSequenceClassification.from_pretrained("kykim/electra-kor-base", num_labels=2).to(device)
    optimizer = AdamW(model.parameters(), lr=1e-4)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1, num_training_steps=total_steps)

    for i in range(EPOCHS):
        total_loss = 0.0
        correct = 0
        total = 0
        batches = 0

        model.train()

        for input_ids_batch, attention_masks_batch, y_batch in tqdm(train_data_loader):
            optimizer.zero_grad()
            y_batch = y_batch.to(device)
            y_batch = y_batch.long()
            y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0]
            loss = F.cross_entropy(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            scheduler.step()

            total_loss += loss.item()

            _, predicted = torch.max(y_pred, 1)
            correct += (predicted == y_batch).sum()
            total += len(y_batch)

            batches += 1
            if batches % 100 == 0:
                print("Batch Loss: ", total_loss, "Accuracy: ", correct.float() / total)
            
        losses.append(total_loss)
        accuracy.append(correct.float() / total)
        print("Train Loss :", total_loss, "Accuracy :", correct.float() / total)

    answer = []
    # torch.save(model, './Electra_kor_base.model')
    model.eval()

    for input_ids_batch, attention_masks_batch in tqdm(test_data_loader):
        y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
        _, predicted = torch.max(y_pred, 1)
        answer.append(predicted.item())
            
    globals()['{}'.format(lbl)] = answer
    gc.collect()
    torch.cuda.empty_cache()


Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at kykim/electra-kor-base and are newly initialized: ['classifier.out_proj.weight', 'clas

Batch Loss:  13.394590187817812 Accuracy:  tensor(0.9681, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.48it/s]

Batch Loss:  22.990321865305305 Accuracy:  tensor(0.9745, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 24.494018560275435 Accuracy : tensor(0.9749, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  10.399199925363064 Accuracy:  tensor(0.9787, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  20.356837118044496 Accuracy:  tensor(0.9793, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 21.497702619992197 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  9.522767862305045 Accuracy:  tensor(0.9808, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  20.059528788551688 Accuracy:  tensor(0.9796, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 21.674819530919194 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.54it/s]

Batch Loss:  9.07141046691686 Accuracy:  tensor(0.9820, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  20.021215527318418 Accuracy:  tensor(0.9796, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 21.52270078379661 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.54it/s]

Batch Loss:  10.208657464012504 Accuracy:  tensor(0.9791, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  20.187014561146498 Accuracy:  tensor(0.9794, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 21.405871193856 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  9.448361773043871 Accuracy:  tensor(0.9811, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  20.212850536219776 Accuracy:  tensor(0.9795, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 21.67826235946268 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  10.72208053432405 Accuracy:  tensor(0.9778, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  20.521180799230933 Accuracy:  tensor(0.9789, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 21.329128820449114 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  10.243052943609655 Accuracy:  tensor(0.9791, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  19.665470778010786 Accuracy:  tensor(0.9800, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 21.357605037279427 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  9.775055287405849 Accuracy:  tensor(0.9802, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  19.77451529726386 Accuracy:  tensor(0.9798, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 21.451709885150194 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.54it/s]

Batch Loss:  9.458491452038288 Accuracy:  tensor(0.9809, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  19.7303069755435 Accuracy:  tensor(0.9798, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 21.319204131141305 Accuracy : tensor(0.9797, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 50.50it/s]
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at kykim/electra-kor-base and are new

Batch Loss:  47.0563845038414 Accuracy:  tensor(0.7809, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  88.581309735775 Accuracy:  tensor(0.7950, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 94.32521495223045 Accuracy : tensor(0.7972, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  32.62237613648176 Accuracy:  tensor(0.8566, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  65.94053096324205 Accuracy:  tensor(0.8548, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 70.86337616294622 Accuracy : tensor(0.8549, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  22.92359720915556 Accuracy:  tensor(0.9059, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  45.9977412968874 Accuracy:  tensor(0.9041, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 49.165005192160606 Accuracy : tensor(0.9053, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  12.020408356562257 Accuracy:  tensor(0.9573, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  25.257835006341338 Accuracy:  tensor(0.9536, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 26.667392695322633 Accuracy : tensor(0.9541, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  8.272591437213123 Accuracy:  tensor(0.9737, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  14.695837871171534 Accuracy:  tensor(0.9756, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 15.473997076973319 Accuracy : tensor(0.9763, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  4.010596518404782 Accuracy:  tensor(0.9881, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  7.715357953216881 Accuracy:  tensor(0.9883, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 8.281810711603612 Accuracy : tensor(0.9882, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  2.300095522077754 Accuracy:  tensor(0.9947, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  4.503092067898251 Accuracy:  tensor(0.9945, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 4.864259565598331 Accuracy : tensor(0.9943, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  1.6949046028312296 Accuracy:  tensor(0.9966, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  2.7279835678637028 Accuracy:  tensor(0.9972, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 2.95983108819928 Accuracy : tensor(0.9972, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.49it/s]

Batch Loss:  1.239050724718254 Accuracy:  tensor(0.9978, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  2.2971128584467806 Accuracy:  tensor(0.9978, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 2.3767106248415075 Accuracy : tensor(0.9979, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  0.9967615827918053 Accuracy:  tensor(0.9975, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  1.8383060483611189 Accuracy:  tensor(0.9981, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 2.1810205062502064 Accuracy : tensor(0.9980, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 49.53it/s]
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at kykim/electra-kor-base and are new

Batch Loss:  23.50068925321102 Accuracy:  tensor(0.9348, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.50it/s]

Batch Loss:  47.02940307557583 Accuracy:  tensor(0.9362, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 50.27205331623554 Accuracy : tensor(0.9368, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  22.55376448109746 Accuracy:  tensor(0.9412, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  44.71983714029193 Accuracy:  tensor(0.9419, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 48.31474718078971 Accuracy : tensor(0.9415, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  22.819447185844183 Accuracy:  tensor(0.9400, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.48it/s]

Batch Loss:  45.08046588674188 Accuracy:  tensor(0.9411, device='cuda:0')


100%|██████████| 215/215 [01:02<00:00,  3.46it/s]


Train Loss : 48.233496118336916 Accuracy : tensor(0.9415, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  22.11922850459814 Accuracy:  tensor(0.9423, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.51it/s]

Batch Loss:  45.02270310372114 Accuracy:  tensor(0.9409, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 47.91277450695634 Accuracy : tensor(0.9415, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  22.07236248627305 Accuracy:  tensor(0.9423, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  44.622188825160265 Accuracy:  tensor(0.9416, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 47.989206206053495 Accuracy : tensor(0.9415, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  22.785250663757324 Accuracy:  tensor(0.9398, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  44.74864358454943 Accuracy:  tensor(0.9413, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 48.08792742341757 Accuracy : tensor(0.9415, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  22.235218595713377 Accuracy:  tensor(0.9419, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  44.96869555488229 Accuracy:  tensor(0.9410, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 48.05977076664567 Accuracy : tensor(0.9415, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  21.853119388222694 Accuracy:  tensor(0.9433, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  44.47278710082173 Accuracy:  tensor(0.9419, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 48.099608954042196 Accuracy : tensor(0.9415, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  21.986756645143032 Accuracy:  tensor(0.9427, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  44.41491534560919 Accuracy:  tensor(0.9419, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 47.984928175807 Accuracy : tensor(0.9415, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  22.712071806192398 Accuracy:  tensor(0.9398, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.42it/s]

Batch Loss:  44.12435494735837 Accuracy:  tensor(0.9423, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 47.83074194192886 Accuracy : tensor(0.9415, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 51.20it/s]
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at kykim/electra-kor-base and are new

Batch Loss:  46.36316466331482 Accuracy:  tensor(0.7875, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  81.99146345257759 Accuracy:  tensor(0.8161, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 87.25703075528145 Accuracy : tensor(0.8178, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  25.990232847630978 Accuracy:  tensor(0.9003, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  48.794319950044155 Accuracy:  tensor(0.9055, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 52.523977391421795 Accuracy : tensor(0.9041, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  12.827987689524889 Accuracy:  tensor(0.9577, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  26.103999696671963 Accuracy:  tensor(0.9544, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 28.004656298086047 Accuracy : tensor(0.9543, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  6.307475694455206 Accuracy:  tensor(0.9811, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  13.646443798672408 Accuracy:  tensor(0.9781, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 14.407546328846365 Accuracy : tensor(0.9785, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  3.8260517407907173 Accuracy:  tensor(0.9889, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  7.965575432521291 Accuracy:  tensor(0.9881, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 8.506888228585012 Accuracy : tensor(0.9881, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  2.2750217302236706 Accuracy:  tensor(0.9937, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.42it/s]

Batch Loss:  4.553480244008824 Accuracy:  tensor(0.9937, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.51it/s]


Train Loss : 4.6959621323039755 Accuracy : tensor(0.9940, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:33,  3.48it/s]

Batch Loss:  1.951757914852351 Accuracy:  tensor(0.9947, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.52it/s]

Batch Loss:  3.22637835223577 Accuracy:  tensor(0.9959, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.50it/s]


Train Loss : 3.358025370136602 Accuracy : tensor(0.9959, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.49it/s]

Batch Loss:  1.2082629675569478 Accuracy:  tensor(0.9969, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.53it/s]

Batch Loss:  2.135661248204997 Accuracy:  tensor(0.9974, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.50it/s]


Train Loss : 2.201530531368917 Accuracy : tensor(0.9975, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  0.6448022128606681 Accuracy:  tensor(0.9983, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  1.314362420962425 Accuracy:  tensor(0.9984, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 1.4056459939747583 Accuracy : tensor(0.9984, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.54it/s]

Batch Loss:  0.49814942374359816 Accuracy:  tensor(0.9986, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  1.0015607314999215 Accuracy:  tensor(0.9987, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 1.1938743024074938 Accuracy : tensor(0.9986, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 51.07it/s]
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at kykim/electra-kor-base and are new

Batch Loss:  17.435232065618038 Accuracy:  tensor(0.9605, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  30.08481414988637 Accuracy:  tensor(0.9618, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 32.19913222640753 Accuracy : tensor(0.9612, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:33,  3.43it/s]

Batch Loss:  10.659193409606814 Accuracy:  tensor(0.9617, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  19.332766788080335 Accuracy:  tensor(0.9648, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 20.766924487426877 Accuracy : tensor(0.9648, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.54it/s]

Batch Loss:  7.13263523299247 Accuracy:  tensor(0.9752, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.51it/s]

Batch Loss:  14.041650191880763 Accuracy:  tensor(0.9759, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.51it/s]


Train Loss : 15.038507909048349 Accuracy : tensor(0.9762, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  4.658771038404666 Accuracy:  tensor(0.9861, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  9.139307399163954 Accuracy:  tensor(0.9857, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 9.836343200760894 Accuracy : tensor(0.9858, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  3.233680237783119 Accuracy:  tensor(0.9912, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  5.862843474489637 Accuracy:  tensor(0.9917, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 6.253172796336003 Accuracy : tensor(0.9915, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:33,  3.44it/s]

Batch Loss:  2.1217625429853797 Accuracy:  tensor(0.9941, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.52it/s]

Batch Loss:  3.6002569453557953 Accuracy:  tensor(0.9950, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.51it/s]


Train Loss : 3.8929995842045173 Accuracy : tensor(0.9950, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  1.6251119509688579 Accuracy:  tensor(0.9964, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  2.595559060689993 Accuracy:  tensor(0.9970, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 2.776247813773807 Accuracy : tensor(0.9970, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  1.2622307221754454 Accuracy:  tensor(0.9966, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  2.395354354288429 Accuracy:  tensor(0.9973, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 2.4735270154778846 Accuracy : tensor(0.9974, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  0.7193045411841013 Accuracy:  tensor(0.9987, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  1.4515662391786464 Accuracy:  tensor(0.9985, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 1.488726326613687 Accuracy : tensor(0.9986, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  0.4442236915929243 Accuracy:  tensor(0.9992, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  1.1106484264018945 Accuracy:  tensor(0.9990, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 1.2281552861095406 Accuracy : tensor(0.9990, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 51.10it/s]
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at kykim/electra-kor-base and are new

Batch Loss:  67.40865382552147 Accuracy:  tensor(0.5559, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.46it/s]

Batch Loss:  136.46658340096474 Accuracy:  tensor(0.5483, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.49it/s]


Train Loss : 146.88415762782097 Accuracy : tensor(0.5458, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.50it/s]

Batch Loss:  69.26430350542068 Accuracy:  tensor(0.5280, device='cuda:0')


 93%|█████████▎| 200/215 [00:58<00:04,  3.46it/s]

Batch Loss:  138.39229601621628 Accuracy:  tensor(0.5277, device='cuda:0')


100%|██████████| 215/215 [01:02<00:00,  3.45it/s]


Train Loss : 148.79272037744522 Accuracy : tensor(0.5264, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.50it/s]

Batch Loss:  69.1099768280983 Accuracy:  tensor(0.5395, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.51it/s]

Batch Loss:  138.38515651226044 Accuracy:  tensor(0.5297, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.50it/s]


Train Loss : 148.75412434339523 Accuracy : tensor(0.5303, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  69.23446935415268 Accuracy:  tensor(0.5264, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  138.42568403482437 Accuracy:  tensor(0.5296, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 148.7521454691887 Accuracy : tensor(0.5311, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  69.12759065628052 Accuracy:  tensor(0.5355, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  138.22710579633713 Accuracy:  tensor(0.5334, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 148.59634137153625 Accuracy : tensor(0.5329, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  69.05109322071075 Accuracy:  tensor(0.5353, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  138.0417736172676 Accuracy:  tensor(0.5350, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 148.47334051132202 Accuracy : tensor(0.5334, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.55it/s]

Batch Loss:  68.95970290899277 Accuracy:  tensor(0.5337, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  136.1171628832817 Accuracy:  tensor(0.5449, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 146.55067497491837 Accuracy : tensor(0.5412, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  68.86162382364273 Accuracy:  tensor(0.5333, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  137.35627341270447 Accuracy:  tensor(0.5391, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 147.5697061419487 Accuracy : tensor(0.5389, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  68.36399787664413 Accuracy:  tensor(0.5484, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  134.05816394090652 Accuracy:  tensor(0.5578, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 143.88519167900085 Accuracy : tensor(0.5634, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  61.63839638233185 Accuracy:  tensor(0.6497, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  121.55303835868835 Accuracy:  tensor(0.6665, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 130.72609615325928 Accuracy : tensor(0.6663, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 51.55it/s]
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at kykim/electra-kor-base and are new

Batch Loss:  11.196747434791178 Accuracy:  tensor(0.9758, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  19.367962005082518 Accuracy:  tensor(0.9801, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 20.535777980927378 Accuracy : tensor(0.9804, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  8.432736861519516 Accuracy:  tensor(0.9837, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  16.54228451102972 Accuracy:  tensor(0.9841, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 17.764419600367546 Accuracy : tensor(0.9843, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  8.355577474460006 Accuracy:  tensor(0.9837, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  16.207139627076685 Accuracy:  tensor(0.9844, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 17.441075582057238 Accuracy : tensor(0.9843, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  7.58441302459687 Accuracy:  tensor(0.9856, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  16.149737973697484 Accuracy:  tensor(0.9845, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 17.680459030903876 Accuracy : tensor(0.9843, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  8.021815697662532 Accuracy:  tensor(0.9847, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  15.895370747894049 Accuracy:  tensor(0.9848, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 17.508100809529424 Accuracy : tensor(0.9843, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  7.845048529095948 Accuracy:  tensor(0.9850, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  16.206845394335687 Accuracy:  tensor(0.9844, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 17.51593950483948 Accuracy : tensor(0.9843, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  8.026616112329066 Accuracy:  tensor(0.9845, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.40it/s]

Batch Loss:  16.053840718232095 Accuracy:  tensor(0.9845, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 17.495554560795426 Accuracy : tensor(0.9843, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  8.249515539966524 Accuracy:  tensor(0.9841, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  16.177296007052064 Accuracy:  tensor(0.9844, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 17.389624001458287 Accuracy : tensor(0.9843, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:33,  3.48it/s]

Batch Loss:  8.346483597531915 Accuracy:  tensor(0.9837, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  16.183232481591403 Accuracy:  tensor(0.9844, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 17.389760157093406 Accuracy : tensor(0.9843, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  9.094253836199641 Accuracy:  tensor(0.9819, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  16.235189348459244 Accuracy:  tensor(0.9842, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 17.316950658336282 Accuracy : tensor(0.9843, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 51.40it/s]
Some weights of the model checkpoint at kykim/electra-kor-base were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at kykim/electra-kor-base and are new

Batch Loss:  11.375748574733734 Accuracy:  tensor(0.9723, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  19.86803522054106 Accuracy:  tensor(0.9780, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 21.4413458481431 Accuracy : tensor(0.9781, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:33,  3.48it/s]

Batch Loss:  8.650474117137492 Accuracy:  tensor(0.9833, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  16.926445461809635 Accuracy:  tensor(0.9837, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 18.28882807586342 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  9.291640089824796 Accuracy:  tensor(0.9816, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  16.426771674770862 Accuracy:  tensor(0.9841, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 18.074561886955053 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.50it/s]

Batch Loss:  8.48924916703254 Accuracy:  tensor(0.9836, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  16.948956962674856 Accuracy:  tensor(0.9836, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 18.269525412470102 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  7.613265208899975 Accuracy:  tensor(0.9855, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  17.226066193543375 Accuracy:  tensor(0.9831, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 18.048325183801353 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  8.661855267360806 Accuracy:  tensor(0.9830, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  16.89760028105229 Accuracy:  tensor(0.9835, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 18.05113382358104 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  8.790638709440827 Accuracy:  tensor(0.9827, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  16.7643469190225 Accuracy:  tensor(0.9837, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 18.049488897435367 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  8.285851253196597 Accuracy:  tensor(0.9839, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  17.05088130570948 Accuracy:  tensor(0.9834, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 18.065665747970343 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  7.918878165073693 Accuracy:  tensor(0.9847, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  16.504520430229604 Accuracy:  tensor(0.9839, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 17.964671134017408 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  8.25918399170041 Accuracy:  tensor(0.9839, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  16.317902948707342 Accuracy:  tensor(0.9841, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 17.985238322988153 Accuracy : tensor(0.9836, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 51.29it/s]


In [24]:

# origin_test = pd.read_csv("test_ml2.csv")
# origin_test = origin_test.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
# origin_test.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']

# kykim/electra-kor-base
model_name = "electra-kor-base"

test_label = []
for idx, row in origin_test.iloc[:, [1,2,3,4,5,6,7,8]].iterrows() :
  test_label.append(list(row))

# 각 모델별 결과를 합친 것 비교해보기
count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, 비도덕아님, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("8개 Label 예측 accuracy :", accuracy)

result = {
    "범죄" : 범죄, 
    "혐오" : 혐오, 
    "선정" : 선정, 
    "비도덕아님" : 비도덕아님, 
    "폭력" : 폭력, 
    "비난" : 비난, 
    "욕설" : 욕설, 
    "차별" : 차별
}
result = pd.DataFrame(result)
result.to_csv("result/label8_{}.csv".format(model_name))


# 비도덕아님은 예측하지 않게 진행
# 비도덕아님은 다른 케이스가 모두 False 인 경우
t = []
for i, data in enumerate(zip(범죄, 혐오, 선정, 폭력, 비난, 욕설, 차별)) :
    if sum(list(data)) == 0  :
        t.append(1)
    else :
        t.append(0)


count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, t, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("7개 Label 예측 accuracy :", accuracy)      

result = pd.DataFrame(result)
result.to_csv("result/label7_{}.csv".format(model_name))


425
8개 Label 예측 accuracy : 0.4094412331406551
474
7개 Label 예측 accuracy : 0.45664739884393063


In [17]:
j=0
c=0
for i in test_label :
    if i[0] == 범죄[j] :
        c += 1
print(c/1038)

0.9903660886319846


In [None]:
# 12.11 에 와서 -> 이게 바뀌는지 봐야함.
# 535
# 8개 Label 예측 accuracy : 0.5154142581888247
# 666
# 7개 Label 예측 accuracy : 0.6416184971098265

In [8]:
# 데이터 저장 형식
result[:5]

Unnamed: 0,범죄,혐오,선정,비도덕아님,폭력,비난,욕설,차별
0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,1,0,0
2,0,0,0,0,0,1,0,0
3,0,0,0,0,0,1,0,0
4,0,0,0,0,0,1,0,0


In [23]:
# 모델 저장하는 방법 -> 용량이 너무 큼
# model_name = 'Electra_kor_base'
# torch.save(model, "model/{}_{}.pt".format(model_name,lbl))

In [24]:
# !pip install git+https://git@github.com/SKTBrain/KoBERT.git@master

sh: 0: getcwd() failed: No such file or directory
The folder you are executing pip from can no longer be found.


In [36]:

class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return len(self.labels)
    
    

class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=2,
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device),return_dict=False)
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc
import gc

In [37]:
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
from tqdm import tqdm, tqdm_notebook
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model


device = torch.device("cuda")
bertmodel, vocab = get_pytorch_kobert_model()



# Train data
origin_train = pd.read_csv('train_ml2.csv')
origin_train = origin_train.dropna()
origin_train = origin_train.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_train.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
#origin_train['sentence'] = origin_train['sentence'].apply(preprocessing)

# Test data
origin_test = pd.read_csv("test_ml2.csv")
origin_test = origin_test.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_test.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
#test['sentence'] = test['sentence'].apply(preprocessing)

label = ['범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']

# label이 8개 이렇게 되어있고

max_len = 64 # 
batch_size = 64
warmup_ratio = 0.1
num_epochs = 10
max_grad_norm = 1
log_interval = 50
learning_rate =  1e-5


for lbl in label : 
        
    
    train = origin_train.copy()
    test = origin_test.copy()
    
    titles_t = train.sentence.to_numpy().reshape(-1,1)
    labels_t = train[lbl].to_numpy().reshape(-1,1)
    train = pd.DataFrame({'title':titles_t.reshape(-1), 'topic_idx':labels_t.reshape(-1)})

    titles_t = test.sentence.to_numpy().reshape(-1,1)
    labels_t = test[lbl].to_numpy().reshape(-1,1)
    df_valid = pd.DataFrame({'title':titles_t.reshape(-1), 'topic_idx':labels_t.reshape(-1)})
    
    dataset = []
    for title, label in zip(train.title.to_list(), train.topic_idx.to_list()):
      dataset.append([title,label])
    dataset_train = dataset[:].copy()
    dataset_test = random.sample(dataset,2000).copy() # random 으로 2000개 뽑게하기

    len(dataset_train), len(dataset_test)

    tokenizer = get_tokenizer()
    tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

## Setting parameters
    data_train = BERTDataset(dataset_train, 0, 1, tok, max_len, True, False)
    data_test = BERTDataset(dataset_test, 0, 1, tok, max_len, True, False)

    train_dataloader = torch.utils.data.DataLoader(data_train,  shuffle=True,batch_size=batch_size, num_workers=4)
    test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, num_workers=4)

    gc.collect()
    torch.cuda.empty_cache()

    valid_dataset = []
    for title, label in zip(df_valid.title.to_list(), np.zeros(len(df_valid))):
      valid_dataset.append([title,label])
    real_test_data = BERTDataset(valid_dataset, 0, 1, tok, max_len, True, False)
    real_test_dataloader = torch.utils.data.DataLoader(real_test_data, batch_size=1, num_workers=4)



    NUM_OF_MODELS = 1
    import random
    for NUM in range(NUM_OF_MODELS):
      dr_r = 0.5
      print("epoch: ",NUM+1)
      print("drop out rate: ",dr_r)
      model = BERTClassifier(bertmodel,  dr_rate=dr_r ).to(device)
      # Prepare optimizer and schedule (linear warmup and decay)
      no_decay = ['bias', 'LayerNorm.weight']
      optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
      ]
      optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
      loss_fn = nn.CrossEntropyLoss()
      t_total = len(train_dataloader) * num_epochs
      warmup_step = int(t_total * warmup_ratio)
      scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)
      for e in range(num_epochs):
          train_acc = 0.0
          test_acc = 0.0
          model.train()
          for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(train_dataloader)):
              optimizer.zero_grad()
              token_ids = token_ids.long().to(device)
              segment_ids = segment_ids.long().to(device)
              valid_length= valid_length
              label = label.long().to(device)
              out = model(token_ids, valid_length, segment_ids)
              loss = loss_fn(out, label)
              loss.backward()
              torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
              optimizer.step()
              scheduler.step()  # Update learning rate schedule
              train_acc += calc_accuracy(out, label)
              if batch_id % log_interval == 0:
                  print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
          print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
          model.eval()
          for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):
              token_ids = token_ids.long().to(device)
              segment_ids = segment_ids.long().to(device)
              valid_length= valid_length
              label = label.long().to(device)
              out = model(token_ids, valid_length, segment_ids)
              test_acc += calc_accuracy(out, label)
          print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))
      answer=[]
      model.eval()
      for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(real_test_dataloader)):
              token_ids = token_ids.long().to(device)
              segment_ids = segment_ids.long().to(device)
              valid_length= valid_length
              label = label.long().to(device)
              out = model(token_ids, valid_length, segment_ids)
              max_vals, max_indices = torch.max(out, 1)
              answer.append(max_indices[0].item())
      globals()['{}'.format(lbl)] = answer
      gc.collect()
      torch.cuda.empty_cache()

using cached model
using cached model
using cached model
epoch:  1
drop out rate:  0.5


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(train_dataloader)):


  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):


  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(real_test_dataloader)):


  0%|          | 0/1038 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/1038 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/1038 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/1038 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/1038 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/1038 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/1038 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/157 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/32 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

  0%|          | 0/1038 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [38]:

# origin_test = pd.read_csv("test_ml2.csv")
# origin_test = origin_test.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
# origin_test.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']

# kykim/electra-kor-base
model_name = "kobert"

test_label = []
for idx, row in origin_test.iloc[:, [1,2,3,4,5,6,7,8]].iterrows() :
  test_label.append(list(row))

# 각 모델별 결과를 합친 것 비교해보기
count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, 비도덕아님, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("8개 Label 예측 accuracy :", accuracy)

result = {
    "범죄" : 범죄, 
    "혐오" : 혐오, 
    "선정" : 선정, 
    "비도덕아님" : 비도덕아님, 
    "폭력" : 폭력, 
    "비난" : 비난, 
    "욕설" : 욕설, 
    "차별" : 차별
}
result = pd.DataFrame(result)
result.to_csv("predict_result/max64_label8_{}.csv".format(model_name))


# 비도덕아님은 예측하지 않게 진행
# 비도덕아님은 다른 케이스가 모두 False 인 경우
t = []
for i, data in enumerate(zip(범죄, 혐오, 선정, 폭력, 비난, 욕설, 차별)) :
    if sum(list(data)) == 0  :
        t.append(1)
    else :
        t.append(0)


count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, t, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("7개 Label 예측 accuracy :", accuracy)      

result = pd.DataFrame(result)
result.to_csv("predict_result/max64_abel7_{}.csv".format(model_name))


591
8개 Label 예측 accuracy : 0.569364161849711
658
7개 Label 예측 accuracy : 0.6339113680154143


In [7]:
class KLUEDataset_train(Dataset):
  def __init__(self, DataFrame):
    self.dataset = DataFrame
    self.tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

    # print(self.dataset.describe())

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.loc[idx, ['title', 'topic_idx']].values
    text = row[0]
    y = row[1]

    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=64,
        pad_to_max_length=True,
        add_special_tokens=True
    )

    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]

    return input_ids, attention_mask, y
class KLUEDataset_test(Dataset):
  def __init__(self, DataFrame):
    self.dataset = DataFrame
    self.tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")
    # print(self.dataset.describe())

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.loc[idx, ['title']].values
    text = row[0]

    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=32,
        pad_to_max_length=True,
        add_special_tokens=True
    )

    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]

    return input_ids, attention_mask

In [49]:
# koelectra


# Train data
origin_train = pd.read_csv('train_ml2.csv')
origin_train = origin_train.dropna()
origin_train = origin_train.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_train.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
origin_train = pd.concat([origin_train, df]) # df는 한국어욕설데이터

#origin_train['sentence'] = origin_train['sentence'].apply(preprocessing)

# Test data
origin_test = pd.read_csv("test_ml2.csv")
origin_test = origin_test.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_test.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
#test['sentence'] = test['sentence'].apply(preprocessing)

label = ['범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']

device = torch.device("cuda")
bertmodel, vocab = get_pytorch_kobert_model()

# 모델 학습 파라미터
epochs = 10
batch_size = 64
warmup_ratio=0.1

for lbl in label : 


    train = origin_train.copy()
    test = origin_test.copy()
    # 모델 조건에 맞추고, lbl 에 맞추어서 필터링
    titles_t = train.sentence.to_numpy().reshape(-1,1)
    labels_t = train[lbl].to_numpy().reshape(-1,1)
    train = pd.DataFrame({'title':titles_t.reshape(-1), 'topic_idx':labels_t.reshape(-1)})

    titles_t = test.sentence.to_numpy().reshape(-1,1)
    labels_t = test[lbl].to_numpy().reshape(-1,1)
    df_valid = pd.DataFrame({'title':titles_t.reshape(-1), 'topic':labels_t.reshape(-1)})


    train_dataset = KLUEDataset_train(train)
    test_dataset = KLUEDataset_test(df_valid)


    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    t_total = len(train_loader) * epochs
    gc.collect()
    torch.cuda.empty_cache()

    losses = []
    accuracy = []
    model = ElectraForSequenceClassification.from_pretrained('monologg/koelectra-base-v3-discriminator', num_labels=2).to(device)
    optimizer = AdamW(model.parameters(), lr=1e-4)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1, num_training_steps=t_total)

    for i in range(epochs):
      total_loss = 0.0
      correct = 0
      total = 0
      batches = 0

      model.train()

      for input_ids_batch, attention_masks_batch, y_batch in tqdm(train_loader):
        optimizer.zero_grad()
        y_batch = y_batch.to(device)
        y_batch = y_batch.long()
        y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0]
        loss = F.cross_entropy(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

        _, predicted = torch.max(y_pred, 1)
        correct += (predicted == y_batch).sum()
        total += len(y_batch)

        batches += 1
        if batches % 100 == 0:
          print("Batch Loss: ", total_loss, "Accuracy: ", correct.float() / total)
          
      losses.append(total_loss)
      accuracy.append(correct.float() / total)
      print("Train Loss :", total_loss, "Accuracy :", correct.float() / total)

    answer = []
    model.eval()
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
      y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
      _, predicted = torch.max(y_pred, 1)
      answer.append(predicted.item())

    globals()['{}'.format(lbl)] = answer

using cached model
using cached model


Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

Batch Loss:  15.288895254954696 Accuracy:  tensor(0.9639, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 22.593319612555206 Accuracy : tensor(0.9668, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  12.712724027223885 Accuracy:  tensor(0.9725, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 20.097397065721452 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  12.942782822996378 Accuracy:  tensor(0.9716, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 19.958248691633344 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  12.910424133762717 Accuracy:  tensor(0.9720, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 20.257972663268447 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  13.327732952311635 Accuracy:  tensor(0.9705, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 19.965213684365153 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.56it/s]

Batch Loss:  12.703487074002624 Accuracy:  tensor(0.9725, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 20.10347017645836 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.62it/s]

Batch Loss:  11.864140184596181 Accuracy:  tensor(0.9745, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 20.039602670818567 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  13.5395314283669 Accuracy:  tensor(0.9698, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 19.93440292775631 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  13.06418752670288 Accuracy:  tensor(0.9712, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 19.93947683647275 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  12.814404709264636 Accuracy:  tensor(0.9720, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 19.99951527826488 Accuracy : tensor(0.9721, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 77.06it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  57.760930955410004 Accuracy:  tensor(0.7058, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 86.19218555092812 Accuracy : tensor(0.7297, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  41.84155675768852 Accuracy:  tensor(0.8130, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 65.05005413293839 Accuracy : tensor(0.8145, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  28.04600566625595 Accuracy:  tensor(0.8884, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 45.087244153022766 Accuracy : tensor(0.8827, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  16.98375542834401 Accuracy:  tensor(0.9384, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.62it/s]


Train Loss : 27.348981399089098 Accuracy : tensor(0.9341, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  12.178300315514207 Accuracy:  tensor(0.9548, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.62it/s]


Train Loss : 17.47797435708344 Accuracy : tensor(0.9588, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  6.724168768152595 Accuracy:  tensor(0.9747, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 11.040841035544872 Accuracy : tensor(0.9727, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  4.3312088653910905 Accuracy:  tensor(0.9787, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 6.543422510847449 Accuracy : tensor(0.9810, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  2.714802481350489 Accuracy:  tensor(0.9866, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 4.564054099377245 Accuracy : tensor(0.9866, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  2.5595095983007923 Accuracy:  tensor(0.9891, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 3.641851803055033 Accuracy : tensor(0.9896, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.56it/s]

Batch Loss:  2.5360665838234127 Accuracy:  tensor(0.9887, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 4.156946417875588 Accuracy : tensor(0.9883, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 75.72it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  29.61694336682558 Accuracy:  tensor(0.9166, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.57it/s]


Train Loss : 45.5176515057683 Accuracy : tensor(0.9178, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  28.472519859671593 Accuracy:  tensor(0.9191, device='cuda:0')


100%|██████████| 157/157 [00:44<00:00,  3.57it/s]


Train Loss : 44.436636567115784 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:16,  3.55it/s]

Batch Loss:  28.073619604110718 Accuracy:  tensor(0.9200, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 44.18334402143955 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.41it/s]

Batch Loss:  27.469634041190147 Accuracy:  tensor(0.9222, device='cuda:0')


100%|██████████| 157/157 [00:44<00:00,  3.49it/s]


Train Loss : 44.10988922417164 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:29<00:16,  3.39it/s]

Batch Loss:  29.099820353090763 Accuracy:  tensor(0.9153, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.45it/s]


Train Loss : 44.115248151123524 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.46it/s]

Batch Loss:  27.836469665169716 Accuracy:  tensor(0.9205, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.48it/s]


Train Loss : 43.80466619879007 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:29<00:16,  3.41it/s]

Batch Loss:  27.57131016999483 Accuracy:  tensor(0.9214, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.46it/s]


Train Loss : 43.84715238958597 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:29<00:16,  3.39it/s]

Batch Loss:  26.971751891076565 Accuracy:  tensor(0.9237, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.47it/s]


Train Loss : 43.96348003298044 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:29<00:16,  3.52it/s]

Batch Loss:  27.997977398335934 Accuracy:  tensor(0.9198, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.44it/s]


Train Loss : 43.78875092417002 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:17,  3.28it/s]

Batch Loss:  27.8783957362175 Accuracy:  tensor(0.9200, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.47it/s]


Train Loss : 43.90380384773016 Accuracy : tensor(0.9198, device='cuda:0')


100%|██████████| 1038/1038 [00:15<00:00, 66.86it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  40.61841091513634 Accuracy:  tensor(0.8181, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.45it/s]


Train Loss : 60.1235159188509 Accuracy : tensor(0.8299, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:15,  3.60it/s]

Batch Loss:  23.205195389688015 Accuracy:  tensor(0.9050, device='cuda:0')


100%|██████████| 157/157 [00:44<00:00,  3.56it/s]


Train Loss : 37.07322799414396 Accuracy : tensor(0.9037, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  11.03757930546999 Accuracy:  tensor(0.9628, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 18.307035990059376 Accuracy : tensor(0.9592, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  5.337846607901156 Accuracy:  tensor(0.9822, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 8.17071466264315 Accuracy : tensor(0.9829, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  2.8227990829618648 Accuracy:  tensor(0.9916, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.62it/s]


Train Loss : 5.0146864500129595 Accuracy : tensor(0.9903, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  1.5623149495804682 Accuracy:  tensor(0.9966, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 2.2957825862104073 Accuracy : tensor(0.9966, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  1.2286376578267664 Accuracy:  tensor(0.9970, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 1.6668928664876148 Accuracy : tensor(0.9976, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  0.6899726871051826 Accuracy:  tensor(0.9981, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.62it/s]


Train Loss : 1.4236043368582614 Accuracy : tensor(0.9979, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  0.5529598881257698 Accuracy:  tensor(0.9991, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 0.745619865541812 Accuracy : tensor(0.9993, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  0.523677988210693 Accuracy:  tensor(0.9992, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 0.7382146347663365 Accuracy : tensor(0.9993, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 76.58it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  18.393063232302666 Accuracy:  tensor(0.9416, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.57it/s]


Train Loss : 25.66583321802318 Accuracy : tensor(0.9464, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  10.318311570212245 Accuracy:  tensor(0.9634, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 15.939173456281424 Accuracy : tensor(0.9641, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  6.754561989568174 Accuracy:  tensor(0.9777, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 10.193436105269939 Accuracy : tensor(0.9791, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  3.7418945456156507 Accuracy:  tensor(0.9898, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 6.914555635186844 Accuracy : tensor(0.9870, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  2.568342829006724 Accuracy:  tensor(0.9916, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 4.041350951418281 Accuracy : tensor(0.9918, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  1.5178962076024618 Accuracy:  tensor(0.9958, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 2.4613745429960545 Accuracy : tensor(0.9955, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  0.7784098747360986 Accuracy:  tensor(0.9981, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 1.3662483229709323 Accuracy : tensor(0.9980, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  0.5875144973979332 Accuracy:  tensor(0.9987, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.58it/s]


Train Loss : 0.8677902331983205 Accuracy : tensor(0.9989, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  0.27626513663562946 Accuracy:  tensor(0.9995, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 0.5162992714031134 Accuracy : tensor(0.9994, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:15,  3.60it/s]

Batch Loss:  0.25821368911419995 Accuracy:  tensor(0.9992, device='cuda:0')


100%|██████████| 157/157 [00:44<00:00,  3.55it/s]


Train Loss : 0.4756806503573898 Accuracy : tensor(0.9993, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 74.30it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  56.10804325342178 Accuracy:  tensor(0.7259, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 86.61087748408318 Accuracy : tensor(0.7310, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  41.6412316262722 Accuracy:  tensor(0.8269, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 65.4357185959816 Accuracy : tensor(0.8265, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  27.357877880334854 Accuracy:  tensor(0.9002, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 44.21025660634041 Accuracy : tensor(0.8964, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:16,  3.54it/s]

Batch Loss:  18.774073775857687 Accuracy:  tensor(0.9369, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 28.620267940685153 Accuracy : tensor(0.9379, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  11.136368814855814 Accuracy:  tensor(0.9650, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 17.48600714467466 Accuracy : tensor(0.9653, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  6.332625784911215 Accuracy:  tensor(0.9830, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 10.598265287466347 Accuracy : tensor(0.9810, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  4.405359475873411 Accuracy:  tensor(0.9880, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 6.377321615815163 Accuracy : tensor(0.9893, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:16,  3.50it/s]

Batch Loss:  2.3509975457563996 Accuracy:  tensor(0.9945, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 4.596474684774876 Accuracy : tensor(0.9931, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  2.353221276309341 Accuracy:  tensor(0.9953, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 3.588311219587922 Accuracy : tensor(0.9954, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  1.9441037664655596 Accuracy:  tensor(0.9959, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 2.9724361828994006 Accuracy : tensor(0.9959, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 77.71it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  12.79942156560719 Accuracy:  tensor(0.9758, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.58it/s]


Train Loss : 16.806584771256894 Accuracy : tensor(0.9782, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  10.410325517877936 Accuracy:  tensor(0.9772, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 16.335363627411425 Accuracy : tensor(0.9777, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:15,  3.58it/s]

Batch Loss:  9.831368526443839 Accuracy:  tensor(0.9802, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 16.523315500468016 Accuracy : tensor(0.9785, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  10.554940344765782 Accuracy:  tensor(0.9778, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 15.711633271537721 Accuracy : tensor(0.9785, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  9.270356346853077 Accuracy:  tensor(0.9742, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 12.8575752642937 Accuracy : tensor(0.9770, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  6.025044774170965 Accuracy:  tensor(0.9812, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 9.125474881613627 Accuracy : tensor(0.9816, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:16,  3.53it/s]

Batch Loss:  5.144832787569612 Accuracy:  tensor(0.9820, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 7.0757166284602135 Accuracy : tensor(0.9843, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  2.884013335686177 Accuracy:  tensor(0.9917, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 4.417236692854203 Accuracy : tensor(0.9909, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  2.1047051191562787 Accuracy:  tensor(0.9936, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 3.339549877215177 Accuracy : tensor(0.9930, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:16,  3.55it/s]

Batch Loss:  1.8588071737904102 Accuracy:  tensor(0.9930, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 2.976672148157377 Accuracy : tensor(0.9929, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 76.10it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  14.136485654860735 Accuracy:  tensor(0.9717, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 19.426707946695387 Accuracy : tensor(0.9752, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  11.113577902317047 Accuracy:  tensor(0.9767, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 17.00074844248593 Accuracy : tensor(0.9775, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  10.931007764302194 Accuracy:  tensor(0.9772, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 17.046647581271827 Accuracy : tensor(0.9775, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  11.299542313441634 Accuracy:  tensor(0.9762, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 17.11162727046758 Accuracy : tensor(0.9775, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  10.71101238578558 Accuracy:  tensor(0.9778, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 17.11605992168188 Accuracy : tensor(0.9775, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  10.613087646663189 Accuracy:  tensor(0.9781, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 16.946553617715836 Accuracy : tensor(0.9775, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:15,  3.58it/s]

Batch Loss:  10.505693417042494 Accuracy:  tensor(0.9783, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.57it/s]


Train Loss : 16.904724905267358 Accuracy : tensor(0.9775, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  11.01854045689106 Accuracy:  tensor(0.9769, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 17.041482841596007 Accuracy : tensor(0.9775, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  11.084174055606127 Accuracy:  tensor(0.9767, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 16.977703448385 Accuracy : tensor(0.9775, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  10.530230436474085 Accuracy:  tensor(0.9781, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 16.96262983419001 Accuracy : tensor(0.9775, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 77.60it/s]


In [50]:

model_name = "ko-electra"

test_label = []
for idx, row in origin_test.iloc[:, [1,2,3,4,5,6,7,8]].iterrows() :
  test_label.append(list(row))

# 각 모델별 결과를 합친 것 비교해보기
count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, 비도덕아님, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("8개 Label 예측 accuracy :", accuracy)

result = {
    "범죄" : 범죄, 
    "혐오" : 혐오, 
    "선정" : 선정, 
    "비도덕아님" : 비도덕아님, 
    "폭력" : 폭력, 
    "비난" : 비난, 
    "욕설" : 욕설, 
    "차별" : 차별
}
result = pd.DataFrame(result)
result.to_csv("predict_result/label8_{}.csv".format(model_name))


# 비도덕아님은 예측하지 않게 진행
# 비도덕아님은 다른 케이스가 모두 False 인 경우
t = []
for i, data in enumerate(zip(범죄, 혐오, 선정, 폭력, 비난, 욕설, 차별)) :
    if sum(list(data)) == 0  :
        t.append(1)
    else :
        t.append(0)


count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, t, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("7개 Label 예측 accuracy :", accuracy)      

result = pd.DataFrame(result)
result.to_csv("predict_result/label7_{}.csv".format(model_name))


603
8개 Label 예측 accuracy : 0.5809248554913294
720
7개 Label 예측 accuracy : 0.6936416184971098


In [5]:
# koelectra v1

class KLUEDataset_train(Dataset):
  def __init__(self, DataFrame):
    self.dataset = DataFrame
    self.tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-base-discriminator")

    # print(self.dataset.describe())

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.loc[idx, ['title', 'topic_idx']].values
    text = row[0]
    y = row[1]
    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=64,
        pad_to_max_length=True,
        add_special_tokens=True
    )
    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]
    return input_ids, attention_mask, y
class KLUEDataset_test(Dataset):
  def __init__(self, DataFrame):
    self.dataset = DataFrame
    self.tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-base-discriminator")
    # print(self.dataset.describe())

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.loc[idx, ['title']].values
    text = row[0]

    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=64,
        pad_to_max_length=True,
        add_special_tokens=True
    )
    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]
    return input_ids, attention_mask

In [6]:
# koelectra v1
from transformers import ElectraTokenizerFast, ElectraModel, AdamW, ElectraForSequenceClassification


import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
from tqdm import tqdm, tqdm_notebook
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model


device = torch.device("cuda")
bertmodel, vocab = get_pytorch_kobert_model()

# Train data
origin_train = pd.read_csv('data/train_ml2.csv')
origin_train = origin_train.dropna()
origin_train = origin_train.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_train.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
origin_train = pd.concat([origin_train, df]) # df는 한국어욕설데이터

#origin_train['sentence'] = origin_train['sentence'].apply(preprocessing)

# Test data
origin_test = pd.read_csv("data/test_ml2.csv")
origin_test = origin_test.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_test.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
#test['sentence'] = test['sentence'].apply(preprocessing)

device = torch.device("cuda")
bertmodel, vocab = get_pytorch_kobert_model()

# 모델 학습 파라미터
epochs = 10
batch_size = 64
warmup_ratio=0.1

label = ['범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']

for lbl in label : 

    train = origin_train.copy()
    test = origin_test.copy()
    # 모델 조건에 맞추고, lbl 에 맞추어서 필터링
    titles_t = train.sentence.to_numpy().reshape(-1,1)
    labels_t = train[lbl].to_numpy().reshape(-1,1)
    train = pd.DataFrame({'title':titles_t.reshape(-1), 'topic_idx':labels_t.reshape(-1)})

    titles_t = test.sentence.to_numpy().reshape(-1,1)
    labels_t = test[lbl].to_numpy().reshape(-1,1)
    df_valid = pd.DataFrame({'title':titles_t.reshape(-1), 'topic':labels_t.reshape(-1)})



    train_dataset = KLUEDataset_train(train)
    test_dataset = KLUEDataset_test(df_valid)
    # 모델 학습 파라미터

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    t_total = len(train_loader) * epochs
    losses = []
    accuracy = []
    model = ElectraForSequenceClassification.from_pretrained('monologg/koelectra-base-discriminator', num_labels=2).to(device)
    optimizer = AdamW(model.parameters(), lr=1e-4)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1, num_training_steps=t_total)
    for i in range(epochs):
      total_loss = 0.0
      correct = 0
      total = 0
      batches = 0

      model.train()
      for input_ids_batch, attention_masks_batch, y_batch in tqdm(train_loader):
        optimizer.zero_grad()
        y_batch = y_batch.to(device)
        y_batch = y_batch.long()
        y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0]
        loss = F.cross_entropy(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

        _, predicted = torch.max(y_pred, 1)
        correct += (predicted == y_batch).sum()
        total += len(y_batch)

        batches += 1
        if batches % 100 == 0:
          print("Batch Loss: ", total_loss, "Accuracy: ", correct.float() / total)
          
      losses.append(total_loss)
      accuracy.append(correct.float() / total)
      print("Train Loss :", total_loss, "Accuracy :", correct.float() / total)

    answer = []
    model.eval()
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
      y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
      _, predicted = torch.max(y_pred, 1)
      answer.append(predicted.item())
      globals()['{}'.format(lbl)] = answer
# 이거 테스트중이었음

using cached model
using cached model
using cached model
using cached model


Some weights of the model checkpoint at monologg/koelectra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-discriminator and are newly initialized: ['clas

Batch Loss:  12.777726512402296 Accuracy:  tensor(0.9600, device='cuda:0')


 93%|█████████▎| 200/215 [01:46<00:08,  1.83it/s]

Batch Loss:  21.900274272076786 Accuracy:  tensor(0.9702, device='cuda:0')


100%|██████████| 215/215 [01:53<00:00,  1.89it/s]


Train Loss : 23.421209746040404 Accuracy : tensor(0.9705, device='cuda:0')


 47%|████▋     | 100/215 [00:51<01:00,  1.90it/s]

Batch Loss:  9.401167838368565 Accuracy:  tensor(0.9803, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:07,  1.90it/s]

Batch Loss:  20.111822140868753 Accuracy:  tensor(0.9792, device='cuda:0')


100%|██████████| 215/215 [01:50<00:00,  1.95it/s]


Train Loss : 21.21963636437431 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:51<00:57,  2.00it/s]

Batch Loss:  9.966106531210244 Accuracy:  tensor(0.9798, device='cuda:0')


 93%|█████████▎| 200/215 [01:45<00:08,  1.82it/s]

Batch Loss:  19.993431243114173 Accuracy:  tensor(0.9798, device='cuda:0')


100%|██████████| 215/215 [01:53<00:00,  1.90it/s]


Train Loss : 21.550136405974627 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:57,  1.99it/s]

Batch Loss:  10.056166681461036 Accuracy:  tensor(0.9794, device='cuda:0')


 93%|█████████▎| 200/215 [01:45<00:08,  1.81it/s]

Batch Loss:  20.12093628384173 Accuracy:  tensor(0.9795, device='cuda:0')


100%|██████████| 215/215 [01:53<00:00,  1.90it/s]


Train Loss : 21.419107845053077 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:03,  1.82it/s]

Batch Loss:  10.517523690126836 Accuracy:  tensor(0.9783, device='cuda:0')


 93%|█████████▎| 200/215 [01:47<00:08,  1.81it/s]

Batch Loss:  19.679871649481356 Accuracy:  tensor(0.9802, device='cuda:0')


100%|██████████| 215/215 [01:54<00:00,  1.87it/s]


Train Loss : 21.606104259379208 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:53<00:58,  1.98it/s]

Batch Loss:  11.129998600110412 Accuracy:  tensor(0.9769, device='cuda:0')


 93%|█████████▎| 200/215 [01:48<00:08,  1.82it/s]

Batch Loss:  20.21841048821807 Accuracy:  tensor(0.9795, device='cuda:0')


100%|██████████| 215/215 [01:56<00:00,  1.85it/s]


Train Loss : 21.626021288335323 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:47<00:44,  2.56it/s]

Batch Loss:  9.768702745437622 Accuracy:  tensor(0.9803, device='cuda:0')


 93%|█████████▎| 200/215 [01:16<00:04,  3.54it/s]

Batch Loss:  20.151908487081528 Accuracy:  tensor(0.9795, device='cuda:0')


100%|██████████| 215/215 [01:24<00:00,  2.56it/s]


Train Loss : 21.57625900954008 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:03,  1.82it/s]

Batch Loss:  10.636714775115252 Accuracy:  tensor(0.9780, device='cuda:0')


 93%|█████████▎| 200/215 [01:46<00:08,  1.82it/s]

Batch Loss:  19.76974535919726 Accuracy:  tensor(0.9799, device='cuda:0')


100%|██████████| 215/215 [01:53<00:00,  1.90it/s]


Train Loss : 21.507695818319917 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:50<00:59,  1.95it/s]

Batch Loss:  10.561986839398742 Accuracy:  tensor(0.9781, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:08,  1.82it/s]

Batch Loss:  20.09586744941771 Accuracy:  tensor(0.9795, device='cuda:0')


100%|██████████| 215/215 [01:49<00:00,  1.96it/s]


Train Loss : 21.369960295036435 Accuracy : tensor(0.9797, device='cuda:0')


 47%|████▋     | 100/215 [00:51<00:59,  1.95it/s]

Batch Loss:  10.237069953233004 Accuracy:  tensor(0.9789, device='cuda:0')


 93%|█████████▎| 200/215 [01:41<00:07,  1.90it/s]

Batch Loss:  19.522468907758594 Accuracy:  tensor(0.9802, device='cuda:0')


100%|██████████| 215/215 [01:48<00:00,  1.98it/s]


Train Loss : 21.53675044514239 Accuracy : tensor(0.9797, device='cuda:0')


100%|██████████| 1038/1038 [00:22<00:00, 45.55it/s]
Some weights of the model checkpoint at monologg/koelectra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-

Batch Loss:  48.331900864839554 Accuracy:  tensor(0.7805, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:07,  1.98it/s]

Batch Loss:  89.74592047929764 Accuracy:  tensor(0.7979, device='cuda:0')


100%|██████████| 215/215 [01:50<00:00,  1.95it/s]


Train Loss : 95.58185052871704 Accuracy : tensor(0.8006, device='cuda:0')


 47%|████▋     | 100/215 [00:53<00:58,  1.97it/s]

Batch Loss:  30.872314915060997 Accuracy:  tensor(0.8681, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:08,  1.80it/s]

Batch Loss:  63.50795492529869 Accuracy:  tensor(0.8634, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.91it/s]


Train Loss : 68.41384023427963 Accuracy : tensor(0.8634, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:03,  1.81it/s]

Batch Loss:  19.1328012868762 Accuracy:  tensor(0.9253, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:07,  1.93it/s]

Batch Loss:  38.75518920645118 Accuracy:  tensor(0.9243, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.91it/s]


Train Loss : 41.99622553959489 Accuracy : tensor(0.9240, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:00,  1.90it/s]

Batch Loss:  10.53536593168974 Accuracy:  tensor(0.9633, device='cuda:0')


 93%|█████████▎| 200/215 [01:47<00:08,  1.81it/s]

Batch Loss:  21.776394926011562 Accuracy:  tensor(0.9602, device='cuda:0')


100%|██████████| 215/215 [01:55<00:00,  1.85it/s]


Train Loss : 23.709095515310764 Accuracy : tensor(0.9593, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:58,  1.98it/s]

Batch Loss:  5.657984076300636 Accuracy:  tensor(0.9814, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.93it/s]

Batch Loss:  11.940302682342008 Accuracy:  tensor(0.9802, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.94it/s]


Train Loss : 13.066967832157388 Accuracy : tensor(0.9800, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:03,  1.81it/s]

Batch Loss:  2.6520104807568714 Accuracy:  tensor(0.9908, device='cuda:0')


 93%|█████████▎| 200/215 [01:45<00:07,  1.99it/s]

Batch Loss:  6.345915360201616 Accuracy:  tensor(0.9893, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.91it/s]


Train Loss : 6.695523479313124 Accuracy : tensor(0.9896, device='cuda:0')


 47%|████▋     | 100/215 [00:39<00:32,  3.55it/s]

Batch Loss:  1.7622270808788016 Accuracy:  tensor(0.9948, device='cuda:0')


 93%|█████████▎| 200/215 [01:18<00:08,  1.83it/s]

Batch Loss:  3.3712469087040517 Accuracy:  tensor(0.9950, device='cuda:0')


100%|██████████| 215/215 [01:26<00:00,  2.50it/s]


Train Loss : 3.5645295594877098 Accuracy : tensor(0.9950, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:58,  1.98it/s]

Batch Loss:  0.8354720786446705 Accuracy:  tensor(0.9972, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:07,  1.90it/s]

Batch Loss:  1.6440777807147242 Accuracy:  tensor(0.9973, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.93it/s]


Train Loss : 1.6622349558892893 Accuracy : tensor(0.9975, device='cuda:0')


 47%|████▋     | 100/215 [00:54<01:03,  1.81it/s]

Batch Loss:  0.6220033165300265 Accuracy:  tensor(0.9981, device='cuda:0')


 93%|█████████▎| 200/215 [01:46<00:08,  1.81it/s]

Batch Loss:  1.1722856558626518 Accuracy:  tensor(0.9981, device='cuda:0')


100%|██████████| 215/215 [01:54<00:00,  1.87it/s]


Train Loss : 1.201913778524613 Accuracy : tensor(0.9983, device='cuda:0')


 47%|████▋     | 100/215 [00:50<00:58,  1.98it/s]

Batch Loss:  0.4276115884713363 Accuracy:  tensor(0.9987, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:07,  1.91it/s]

Batch Loss:  0.6703037606494036 Accuracy:  tensor(0.9991, device='cuda:0')


100%|██████████| 215/215 [01:49<00:00,  1.96it/s]


Train Loss : 0.6839729103376158 Accuracy : tensor(0.9991, device='cuda:0')


100%|██████████| 1038/1038 [00:23<00:00, 44.87it/s]
Some weights of the model checkpoint at monologg/koelectra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-

Batch Loss:  22.46255343221128 Accuracy:  tensor(0.9403, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.98it/s]

Batch Loss:  43.520525926724076 Accuracy:  tensor(0.9387, device='cuda:0')


100%|██████████| 215/215 [01:50<00:00,  1.95it/s]


Train Loss : 46.42304162122309 Accuracy : tensor(0.9391, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:04,  1.79it/s]

Batch Loss:  15.077887877821922 Accuracy:  tensor(0.9517, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.99it/s]

Batch Loss:  29.674049647524953 Accuracy:  tensor(0.9511, device='cuda:0')


100%|██████████| 215/215 [01:50<00:00,  1.94it/s]


Train Loss : 31.067272095009685 Accuracy : tensor(0.9520, device='cuda:0')


 47%|████▋     | 100/215 [00:50<00:57,  2.01it/s]

Batch Loss:  8.117813757620752 Accuracy:  tensor(0.9719, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:08,  1.82it/s]

Batch Loss:  15.965253228321671 Accuracy:  tensor(0.9721, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.92it/s]


Train Loss : 17.122617546468973 Accuracy : tensor(0.9722, device='cuda:0')


 47%|████▋     | 100/215 [00:51<00:58,  1.98it/s]

Batch Loss:  3.6116903064539656 Accuracy:  tensor(0.9878, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:07,  1.98it/s]

Batch Loss:  7.44939779129345 Accuracy:  tensor(0.9883, device='cuda:0')


100%|██████████| 215/215 [01:49<00:00,  1.97it/s]


Train Loss : 8.104621872422285 Accuracy : tensor(0.9880, device='cuda:0')


 47%|████▋     | 100/215 [00:51<01:02,  1.84it/s]

Batch Loss:  1.3696848677936941 Accuracy:  tensor(0.9958, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:07,  1.98it/s]

Batch Loss:  3.198961521324236 Accuracy:  tensor(0.9956, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.93it/s]


Train Loss : 3.3405225327587686 Accuracy : tensor(0.9958, device='cuda:0')


 47%|████▋     | 100/215 [00:51<01:04,  1.78it/s]

Batch Loss:  1.2156445406435523 Accuracy:  tensor(0.9955, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:07,  1.99it/s]

Batch Loss:  1.886500288383104 Accuracy:  tensor(0.9969, device='cuda:0')


100%|██████████| 215/215 [01:49<00:00,  1.96it/s]


Train Loss : 1.9483270759810694 Accuracy : tensor(0.9969, device='cuda:0')


 47%|████▋     | 100/215 [00:32<00:32,  3.54it/s]

Batch Loss:  0.4002561711677117 Accuracy:  tensor(0.9987, device='cuda:0')


 93%|█████████▎| 200/215 [01:19<00:07,  1.88it/s]

Batch Loss:  0.7627465455298079 Accuracy:  tensor(0.9991, device='cuda:0')


100%|██████████| 215/215 [01:26<00:00,  2.48it/s]


Train Loss : 0.7858792310580611 Accuracy : tensor(0.9991, device='cuda:0')


 47%|████▋     | 100/215 [00:54<01:04,  1.79it/s]

Batch Loss:  0.16655831399839371 Accuracy:  tensor(0.9997, device='cuda:0')


 93%|█████████▎| 200/215 [01:46<00:08,  1.85it/s]

Batch Loss:  0.45838975613878574 Accuracy:  tensor(0.9996, device='cuda:0')


100%|██████████| 215/215 [01:54<00:00,  1.88it/s]


Train Loss : 0.47540653371834196 Accuracy : tensor(0.9996, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:57,  1.99it/s]

Batch Loss:  0.059954324293357786 Accuracy:  tensor(0.9998, device='cuda:0')


 93%|█████████▎| 200/215 [01:45<00:07,  1.98it/s]

Batch Loss:  0.281703787506558 Accuracy:  tensor(0.9997, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.91it/s]


Train Loss : 0.2905827175709419 Accuracy : tensor(0.9997, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:04,  1.77it/s]

Batch Loss:  0.026463239177246578 Accuracy:  tensor(1., device='cuda:0')


 93%|█████████▎| 200/215 [01:47<00:08,  1.81it/s]

Batch Loss:  0.3270786075372598 Accuracy:  tensor(0.9998, device='cuda:0')


100%|██████████| 215/215 [01:55<00:00,  1.86it/s]


Train Loss : 0.3332452446120442 Accuracy : tensor(0.9999, device='cuda:0')


100%|██████████| 1038/1038 [00:23<00:00, 43.97it/s]
Some weights of the model checkpoint at monologg/koelectra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-

Batch Loss:  47.22756616771221 Accuracy:  tensor(0.7759, device='cuda:0')


 93%|█████████▎| 200/215 [01:40<00:08,  1.84it/s]

Batch Loss:  85.08081030845642 Accuracy:  tensor(0.8039, device='cuda:0')


100%|██████████| 215/215 [01:48<00:00,  1.98it/s]


Train Loss : 90.48828753829002 Accuracy : tensor(0.8074, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:03,  1.82it/s]

Batch Loss:  23.42756437510252 Accuracy:  tensor(0.9083, device='cuda:0')


 93%|█████████▎| 200/215 [01:46<00:08,  1.82it/s]

Batch Loss:  46.87388812750578 Accuracy:  tensor(0.9108, device='cuda:0')


100%|██████████| 215/215 [01:54<00:00,  1.88it/s]


Train Loss : 50.02951394766569 Accuracy : tensor(0.9110, device='cuda:0')


 47%|████▋     | 100/215 [00:53<00:59,  1.94it/s]

Batch Loss:  11.434660909697413 Accuracy:  tensor(0.9566, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.99it/s]

Batch Loss:  23.534381160512567 Accuracy:  tensor(0.9569, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.93it/s]


Train Loss : 25.364377869293094 Accuracy : tensor(0.9565, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:04,  1.79it/s]

Batch Loss:  5.864486259408295 Accuracy:  tensor(0.9819, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:07,  2.00it/s]

Batch Loss:  12.10451267240569 Accuracy:  tensor(0.9812, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.91it/s]


Train Loss : 13.162150064017624 Accuracy : tensor(0.9808, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:03,  1.82it/s]

Batch Loss:  3.9156666786875576 Accuracy:  tensor(0.9889, device='cuda:0')


 93%|█████████▎| 200/215 [01:47<00:07,  1.99it/s]

Batch Loss:  7.1652503605000675 Accuracy:  tensor(0.9896, device='cuda:0')


100%|██████████| 215/215 [01:55<00:00,  1.87it/s]


Train Loss : 7.673523718491197 Accuracy : tensor(0.9894, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:59,  1.94it/s]

Batch Loss:  1.66568942082813 Accuracy:  tensor(0.9959, device='cuda:0')


 93%|█████████▎| 200/215 [01:39<00:05,  2.62it/s]

Batch Loss:  2.9707821473130025 Accuracy:  tensor(0.9957, device='cuda:0')


100%|██████████| 215/215 [01:45<00:00,  2.04it/s]


Train Loss : 3.3696391137200408 Accuracy : tensor(0.9956, device='cuda:0')


 47%|████▋     | 100/215 [00:29<00:32,  3.54it/s]

Batch Loss:  0.9526056633912958 Accuracy:  tensor(0.9978, device='cuda:0')


 93%|█████████▎| 200/215 [01:24<00:08,  1.82it/s]

Batch Loss:  1.7158858315087855 Accuracy:  tensor(0.9978, device='cuda:0')


100%|██████████| 215/215 [01:32<00:00,  2.33it/s]


Train Loss : 1.8494453078019433 Accuracy : tensor(0.9979, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:00,  1.89it/s]

Batch Loss:  0.5558505724475253 Accuracy:  tensor(0.9987, device='cuda:0')


 93%|█████████▎| 200/215 [01:47<00:08,  1.82it/s]

Batch Loss:  1.049204900249606 Accuracy:  tensor(0.9988, device='cuda:0')


100%|██████████| 215/215 [01:55<00:00,  1.87it/s]


Train Loss : 1.1643182421394158 Accuracy : tensor(0.9988, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:03,  1.81it/s]

Batch Loss:  0.31627696557552554 Accuracy:  tensor(0.9997, device='cuda:0')


 93%|█████████▎| 200/215 [01:45<00:07,  2.00it/s]

Batch Loss:  0.6318650758767035 Accuracy:  tensor(0.9995, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.90it/s]


Train Loss : 0.6857861744356342 Accuracy : tensor(0.9995, device='cuda:0')


 47%|████▋     | 100/215 [00:54<01:01,  1.86it/s]

Batch Loss:  0.13311555245309137 Accuracy:  tensor(0.9997, device='cuda:0')


 93%|█████████▎| 200/215 [01:47<00:08,  1.82it/s]

Batch Loss:  0.5270802750601433 Accuracy:  tensor(0.9996, device='cuda:0')


100%|██████████| 215/215 [01:55<00:00,  1.87it/s]


Train Loss : 0.537576308066491 Accuracy : tensor(0.9996, device='cuda:0')


100%|██████████| 1038/1038 [00:23<00:00, 43.63it/s]
Some weights of the model checkpoint at monologg/koelectra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-

Batch Loss:  18.30556029640138 Accuracy:  tensor(0.9423, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:08,  1.84it/s]

Batch Loss:  34.92463306337595 Accuracy:  tensor(0.9518, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.92it/s]


Train Loss : 36.845169082283974 Accuracy : tensor(0.9531, device='cuda:0')


 47%|████▋     | 100/215 [00:51<00:57,  2.00it/s]

Batch Loss:  16.23821857571602 Accuracy:  tensor(0.9617, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:08,  1.80it/s]

Batch Loss:  33.24108758196235 Accuracy:  tensor(0.9607, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.91it/s]


Train Loss : 35.31617000699043 Accuracy : tensor(0.9613, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:58,  1.98it/s]

Batch Loss:  15.257616680115461 Accuracy:  tensor(0.9652, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:08,  1.81it/s]

Batch Loss:  32.93057508394122 Accuracy:  tensor(0.9613, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.92it/s]


Train Loss : 35.62513496726751 Accuracy : tensor(0.9613, device='cuda:0')


 47%|████▋     | 100/215 [00:51<01:03,  1.81it/s]

Batch Loss:  16.469515381380916 Accuracy:  tensor(0.9616, device='cuda:0')


 93%|█████████▎| 200/215 [01:45<00:08,  1.81it/s]

Batch Loss:  33.32134300284088 Accuracy:  tensor(0.9610, device='cuda:0')


100%|██████████| 215/215 [01:53<00:00,  1.90it/s]


Train Loss : 35.60976024903357 Accuracy : tensor(0.9613, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:57,  1.99it/s]

Batch Loss:  17.136661948636174 Accuracy:  tensor(0.9592, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:07,  2.00it/s]

Batch Loss:  32.85359385423362 Accuracy:  tensor(0.9614, device='cuda:0')


100%|██████████| 215/215 [01:49<00:00,  1.96it/s]


Train Loss : 35.34084049798548 Accuracy : tensor(0.9613, device='cuda:0')


 47%|████▋     | 100/215 [00:51<00:57,  1.99it/s]

Batch Loss:  17.309708800166845 Accuracy:  tensor(0.9586, device='cuda:0')


 93%|█████████▎| 200/215 [01:35<00:04,  3.46it/s]

Batch Loss:  32.8697782214731 Accuracy:  tensor(0.9615, device='cuda:0')


100%|██████████| 215/215 [01:39<00:00,  2.15it/s]


Train Loss : 35.5366451125592 Accuracy : tensor(0.9613, device='cuda:0')


 47%|████▋     | 100/215 [00:36<00:59,  1.94it/s]

Batch Loss:  16.661671232432127 Accuracy:  tensor(0.9608, device='cuda:0')


 93%|█████████▎| 200/215 [01:27<00:07,  2.00it/s]

Batch Loss:  33.00523229688406 Accuracy:  tensor(0.9612, device='cuda:0')


100%|██████████| 215/215 [01:34<00:00,  2.28it/s]


Train Loss : 35.316172663122416 Accuracy : tensor(0.9613, device='cuda:0')


 47%|████▋     | 100/215 [00:50<00:57,  1.99it/s]

Batch Loss:  16.177169021219015 Accuracy:  tensor(0.9619, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.97it/s]

Batch Loss:  33.03803589195013 Accuracy:  tensor(0.9609, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.94it/s]


Train Loss : 35.36160176247358 Accuracy : tensor(0.9613, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:03,  1.80it/s]

Batch Loss:  15.573574807494879 Accuracy:  tensor(0.9638, device='cuda:0')


 93%|█████████▎| 200/215 [01:48<00:08,  1.80it/s]

Batch Loss:  32.799939051270485 Accuracy:  tensor(0.9613, device='cuda:0')


100%|██████████| 215/215 [01:56<00:00,  1.85it/s]


Train Loss : 35.46180013567209 Accuracy : tensor(0.9613, device='cuda:0')


 47%|████▋     | 100/215 [00:50<00:59,  1.95it/s]

Batch Loss:  16.427731156349182 Accuracy:  tensor(0.9613, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:07,  1.99it/s]

Batch Loss:  32.83023238182068 Accuracy:  tensor(0.9613, device='cuda:0')


100%|██████████| 215/215 [01:50<00:00,  1.95it/s]


Train Loss : 35.16975026205182 Accuracy : tensor(0.9613, device='cuda:0')


100%|██████████| 1038/1038 [00:23<00:00, 44.64it/s]
Some weights of the model checkpoint at monologg/koelectra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-

Batch Loss:  55.76719790697098 Accuracy:  tensor(0.7180, device='cuda:0')


 93%|█████████▎| 200/215 [01:40<00:07,  1.99it/s]

Batch Loss:  103.9152489900589 Accuracy:  tensor(0.7462, device='cuda:0')


100%|██████████| 215/215 [01:48<00:00,  1.99it/s]


Train Loss : 111.19267296791077 Accuracy : tensor(0.7473, device='cuda:0')


 47%|████▋     | 100/215 [00:53<00:58,  1.98it/s]

Batch Loss:  36.310831129550934 Accuracy:  tensor(0.8509, device='cuda:0')


 93%|█████████▎| 200/215 [01:46<00:07,  1.90it/s]

Batch Loss:  72.12588213384151 Accuracy:  tensor(0.8501, device='cuda:0')


100%|██████████| 215/215 [01:54<00:00,  1.88it/s]


Train Loss : 78.1029659807682 Accuracy : tensor(0.8491, device='cuda:0')


 47%|████▋     | 100/215 [00:53<01:03,  1.81it/s]

Batch Loss:  22.85793460160494 Accuracy:  tensor(0.9166, device='cuda:0')


 93%|█████████▎| 200/215 [01:46<00:08,  1.81it/s]

Batch Loss:  47.068468533456326 Accuracy:  tensor(0.9128, device='cuda:0')


100%|██████████| 215/215 [01:54<00:00,  1.88it/s]


Train Loss : 50.97918226569891 Accuracy : tensor(0.9118, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:57,  1.99it/s]

Batch Loss:  14.521763369441032 Accuracy:  tensor(0.9495, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.92it/s]

Batch Loss:  28.80623017437756 Accuracy:  tensor(0.9484, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.93it/s]


Train Loss : 31.540777722373605 Accuracy : tensor(0.9475, device='cuda:0')


 47%|████▋     | 100/215 [00:53<00:59,  1.94it/s]

Batch Loss:  7.962052587419748 Accuracy:  tensor(0.9744, device='cuda:0')


 93%|█████████▎| 200/215 [01:46<00:07,  1.99it/s]

Batch Loss:  16.355655004736036 Accuracy:  tensor(0.9734, device='cuda:0')


100%|██████████| 215/215 [01:53<00:00,  1.89it/s]


Train Loss : 17.436490256804973 Accuracy : tensor(0.9734, device='cuda:0')


 47%|████▋     | 100/215 [00:52<00:57,  1.99it/s]

Batch Loss:  4.681921312585473 Accuracy:  tensor(0.9855, device='cuda:0')


 93%|█████████▎| 200/215 [01:29<00:04,  3.54it/s]

Batch Loss:  8.817194905597717 Accuracy:  tensor(0.9866, device='cuda:0')


100%|██████████| 215/215 [01:33<00:00,  2.31it/s]


Train Loss : 9.649010455235839 Accuracy : tensor(0.9862, device='cuda:0')


 47%|████▋     | 100/215 [00:44<01:04,  1.77it/s]

Batch Loss:  2.5159228404518217 Accuracy:  tensor(0.9927, device='cuda:0')


 93%|█████████▎| 200/215 [01:34<00:07,  1.99it/s]

Batch Loss:  4.933894220972434 Accuracy:  tensor(0.9924, device='cuda:0')


100%|██████████| 215/215 [01:41<00:00,  2.11it/s]


Train Loss : 5.19711915589869 Accuracy : tensor(0.9925, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:03,  1.81it/s]

Batch Loss:  0.6949457508744672 Accuracy:  tensor(0.9984, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.98it/s]

Batch Loss:  1.9719031587592326 Accuracy:  tensor(0.9975, device='cuda:0')


100%|██████████| 215/215 [01:50<00:00,  1.95it/s]


Train Loss : 2.1049155771615915 Accuracy : tensor(0.9974, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:03,  1.82it/s]

Batch Loss:  0.8413008989300579 Accuracy:  tensor(0.9973, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.97it/s]

Batch Loss:  1.362345947563881 Accuracy:  tensor(0.9981, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.93it/s]


Train Loss : 1.646629146678606 Accuracy : tensor(0.9979, device='cuda:0')


 47%|████▋     | 100/215 [00:51<00:58,  1.98it/s]

Batch Loss:  0.6808555188181344 Accuracy:  tensor(0.9981, device='cuda:0')


 93%|█████████▎| 200/215 [01:42<00:07,  1.99it/s]

Batch Loss:  1.047902698337566 Accuracy:  tensor(0.9986, device='cuda:0')


100%|██████████| 215/215 [01:49<00:00,  1.97it/s]


Train Loss : 1.0796113209798932 Accuracy : tensor(0.9987, device='cuda:0')


100%|██████████| 1038/1038 [00:23<00:00, 43.53it/s]
Some weights of the model checkpoint at monologg/koelectra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-

Batch Loss:  9.858957876218483 Accuracy:  tensor(0.9802, device='cuda:0')


 93%|█████████▎| 200/215 [01:45<00:08,  1.81it/s]

Batch Loss:  15.991172659676522 Accuracy:  tensor(0.9830, device='cuda:0')


100%|██████████| 215/215 [01:53<00:00,  1.89it/s]


Train Loss : 16.839347826316953 Accuracy : tensor(0.9832, device='cuda:0')


 47%|████▋     | 100/215 [00:51<00:57,  1.99it/s]

Batch Loss:  6.008600966073573 Accuracy:  tensor(0.9844, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.97it/s]

Batch Loss:  11.732449802104384 Accuracy:  tensor(0.9839, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.93it/s]


Train Loss : 12.401665941346437 Accuracy : tensor(0.9842, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:01,  1.86it/s]

Batch Loss:  4.6338148838840425 Accuracy:  tensor(0.9856, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:08,  1.81it/s]

Batch Loss:  9.61358656513039 Accuracy:  tensor(0.9855, device='cuda:0')


100%|██████████| 215/215 [01:52<00:00,  1.91it/s]


Train Loss : 10.443018348538317 Accuracy : tensor(0.9851, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:02,  1.85it/s]

Batch Loss:  3.659022996900603 Accuracy:  tensor(0.9878, device='cuda:0')


 93%|█████████▎| 200/215 [01:43<00:07,  1.98it/s]

Batch Loss:  7.436668572714552 Accuracy:  tensor(0.9870, device='cuda:0')


100%|██████████| 215/215 [01:50<00:00,  1.94it/s]


Train Loss : 7.879082808503881 Accuracy : tensor(0.9872, device='cuda:0')


 47%|████▋     | 100/215 [00:52<01:01,  1.87it/s]

Batch Loss:  2.4585170233331155 Accuracy:  tensor(0.9908, device='cuda:0')


 93%|█████████▎| 200/215 [01:44<00:07,  1.97it/s]

Batch Loss:  4.717686599644367 Accuracy:  tensor(0.9916, device='cuda:0')


100%|██████████| 215/215 [01:51<00:00,  1.93it/s]


Train Loss : 5.074955784511985 Accuracy : tensor(0.9916, device='cuda:0')


 47%|████▋     | 100/215 [00:51<00:44,  2.57it/s]

Batch Loss:  1.5115296217700234 Accuracy:  tensor(0.9944, device='cuda:0')


 93%|█████████▎| 200/215 [01:22<00:04,  3.54it/s]

Batch Loss:  2.9796218777628383 Accuracy:  tensor(0.9949, device='cuda:0')


100%|██████████| 215/215 [01:26<00:00,  2.47it/s]


Train Loss : 3.180943425031728 Accuracy : tensor(0.9950, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.56it/s]

Batch Loss:  0.8601032025180757 Accuracy:  tensor(0.9970, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.56it/s]

Batch Loss:  1.5200362858595327 Accuracy:  tensor(0.9976, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.56it/s]


Train Loss : 1.5852277501981007 Accuracy : tensor(0.9976, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.54it/s]

Batch Loss:  0.915450617481838 Accuracy:  tensor(0.9975, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  1.4072600325162057 Accuracy:  tensor(0.9980, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.55it/s]


Train Loss : 1.4205852806480834 Accuracy : tensor(0.9982, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.54it/s]

Batch Loss:  0.2959955183614511 Accuracy:  tensor(0.9992, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.52it/s]

Batch Loss:  0.7134506541478913 Accuracy:  tensor(0.9990, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.55it/s]


Train Loss : 0.7254246324591804 Accuracy : tensor(0.9991, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  0.3189363760611741 Accuracy:  tensor(0.9992, device='cuda:0')


 93%|█████████▎| 200/215 [00:57<00:04,  3.51it/s]

Batch Loss:  0.5903016484662658 Accuracy:  tensor(0.9994, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.50it/s]


Train Loss : 0.5976981253916165 Accuracy : tensor(0.9994, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 50.59it/s]
Some weights of the model checkpoint at monologg/koelectra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-

Batch Loss:  10.898288934491575 Accuracy:  tensor(0.9702, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  18.712604754138738 Accuracy:  tensor(0.9773, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.55it/s]


Train Loss : 20.03465054417029 Accuracy : tensor(0.9777, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.55it/s]

Batch Loss:  8.790229792241007 Accuracy:  tensor(0.9834, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  17.172366954851896 Accuracy:  tensor(0.9837, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 18.456515736412257 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.52it/s]

Batch Loss:  8.66492182109505 Accuracy:  tensor(0.9830, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.55it/s]

Batch Loss:  17.032687070779502 Accuracy:  tensor(0.9834, device='cuda:0')


100%|██████████| 215/215 [01:01<00:00,  3.52it/s]


Train Loss : 18.113370288163424 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  7.993951645679772 Accuracy:  tensor(0.9848, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.53it/s]

Batch Loss:  17.209298129193485 Accuracy:  tensor(0.9834, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 18.33841574843973 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  8.19875185098499 Accuracy:  tensor(0.9842, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  17.099743641912937 Accuracy:  tensor(0.9834, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.53it/s]


Train Loss : 18.179169076494873 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  9.376487129367888 Accuracy:  tensor(0.9812, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.57it/s]

Batch Loss:  17.211141604930162 Accuracy:  tensor(0.9831, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 18.12938462290913 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.54it/s]

Batch Loss:  8.146935250610113 Accuracy:  tensor(0.9842, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.51it/s]

Batch Loss:  16.734467754140496 Accuracy:  tensor(0.9837, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 18.069193502888083 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.53it/s]

Batch Loss:  8.167915150523186 Accuracy:  tensor(0.9842, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.43it/s]

Batch Loss:  16.477327403612435 Accuracy:  tensor(0.9840, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 18.104964594356716 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.55it/s]

Batch Loss:  7.737026574090123 Accuracy:  tensor(0.9852, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.54it/s]

Batch Loss:  17.017794058658183 Accuracy:  tensor(0.9833, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.55it/s]


Train Loss : 17.979604688473046 Accuracy : tensor(0.9836, device='cuda:0')


 47%|████▋     | 100/215 [00:28<00:32,  3.51it/s]

Batch Loss:  8.41386310197413 Accuracy:  tensor(0.9836, device='cuda:0')


 93%|█████████▎| 200/215 [00:56<00:04,  3.55it/s]

Batch Loss:  16.621037179604173 Accuracy:  tensor(0.9838, device='cuda:0')


100%|██████████| 215/215 [01:00<00:00,  3.54it/s]


Train Loss : 18.018944900482893 Accuracy : tensor(0.9836, device='cuda:0')


100%|██████████| 1038/1038 [00:20<00:00, 51.69it/s]


In [8]:

model_name = "ko-electra-v1"

test_label = []
for idx, row in origin_test.iloc[:, [1,2,3,4,5,6,7,8]].iterrows() :
  test_label.append(list(row))

# 각 모델별 결과를 합친 것 비교해보기
count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, 비도덕아님, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("8개 Label 예측 accuracy :", accuracy)

result = {
    "범죄" : 범죄, 
    "혐오" : 혐오, 
    "선정" : 선정, 
    "비도덕아님" : 비도덕아님, 
    "폭력" : 폭력, 
    "비난" : 비난, 
    "욕설" : 욕설, 
    "차별" : 차별
}
result = pd.DataFrame(result)
result.to_csv("result/test_label8_{}.csv".format(model_name))


# 비도덕아님은 예측하지 않게 진행
# 비도덕아님은 다른 케이스가 모두 False 인 경우
t = []
for i, data in enumerate(zip(범죄, 혐오, 선정, 폭력, 비난, 욕설, 차별)) :
    if sum(list(data)) == 0  :
        t.append(1)
    else :
        t.append(0)


count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, t, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("7개 Label 예측 accuracy :", accuracy)      

result = pd.DataFrame(result)
result.to_csv("result/test_label7_{}.csv".format(model_name))


615
8개 Label 예측 accuracy : 0.5924855491329479
705
7개 Label 예측 accuracy : 0.6791907514450867


In [11]:
pred = []
for i, data in enumerate(zip(범죄, 혐오, 선정, t, 폭력, 비난, 욕설, 차별)) :
    pred.append(data)

In [1]:
from sklearn.metrics import f1_score
f1_score(test_label, pred, average='micro')



NameError: name 'test_label' is not defined

In [26]:
data2 = pd.read_csv('unethical_ml.csv')
test_label = []
for idx, row in data2.iloc[:, [1,2,3,4,5,6,7,8]].iterrows() :
  test_label.append(list(row))

# 각 모델별 결과를 합친 것 비교해보기
count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, 비도덕아님, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(data)
print("accuracy :", accuracy)


373
accuracy : 46.625


In [57]:
# KoElectra_v2

class KLUEDataset_train(Dataset):
  def __init__(self, DataFrame):
    self.dataset = DataFrame
    self.tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-base-v2-discriminator")


  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.loc[idx, ['title', 'topic_idx']].values
    text = row[0]
    y = row[1]

    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=64,
        pad_to_max_length=True,
        add_special_tokens=True
    )

    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]

    return input_ids, attention_mask, y
class KLUEDataset_test(Dataset):
  def __init__(self, DataFrame):
    self.dataset = DataFrame
    self.tokenizer = AutoTokenizer.from_pretrained("monologg/koelectra-base-v2-discriminator")

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    row = self.dataset.loc[idx, ['title']].values
    text = row[0]

    inputs = self.tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        max_length=64,
        pad_to_max_length=True,
        add_special_tokens=True
    )

    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]
    return input_ids, attention_mask

In [58]:
# KoElectra_v2

label = ['범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']


# Train data
origin_train = pd.read_csv('train_ml2.csv')
origin_train = origin_train.dropna()
origin_train = origin_train.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_train.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
#origin_train['sentence'] = origin_train['sentence'].apply(preprocessing)

# Test data
origin_test = pd.read_csv("test_ml2.csv")
origin_test = origin_test.loc[:,['sentence', '범죄', '혐오', '선정', '비도덕 아님', '폭력', '비난', '욕설', '차별']]
origin_test.columns = ['sentence', '범죄', '혐오', '선정', '비도덕아님', '폭력', '비난', '욕설', '차별']
#test['sentence'] = test['sentence'].apply(preprocessing)

device = torch.device("cuda")
bertmodel, vocab = get_pytorch_kobert_model()

# 모델 학습 파라미터
epochs = 10
batch_size = 64
warmup_ratio=0.1


for lbl in label : 


    train = origin_train.copy()
    test = origin_test.copy()
    # 모델 조건에 맞추고, lbl 에 맞추어서 필터링
    titles_t = train.sentence.to_numpy().reshape(-1,1)
    labels_t = train[lbl].to_numpy().reshape(-1,1)
    train = pd.DataFrame({'title':titles_t.reshape(-1), 'topic_idx':labels_t.reshape(-1)})

    titles_t = test.sentence.to_numpy().reshape(-1,1)
    labels_t = test[lbl].to_numpy().reshape(-1,1)
    df_valid = pd.DataFrame({'title':titles_t.reshape(-1), 'topic':labels_t.reshape(-1)})



    train_dataset = KLUEDataset_train(train)
    test_dataset = KLUEDataset_test(df_valid)
    # 모델 학습 파라미터

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    t_total = len(train_loader) * epochs
    losses = []
    accuracy = []
    model = ElectraForSequenceClassification.from_pretrained('monologg/koelectra-base-v2-discriminator', num_labels=2).to(device)
    optimizer = AdamW(model.parameters(), lr=1e-4)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1, num_training_steps=t_total)
    for i in range(epochs):
      total_loss = 0.0
      correct = 0
      total = 0
      batches = 0

      model.train()
      for input_ids_batch, attention_masks_batch, y_batch in tqdm(train_loader):
        optimizer.zero_grad()
        y_batch = y_batch.to(device)
        y_batch = y_batch.long()        
        y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0]
        loss = F.cross_entropy(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

        _, predicted = torch.max(y_pred, 1)
        correct += (predicted == y_batch).sum()
        total += len(y_batch)

        batches += 1
        if batches % 100 == 0:
          print("Batch Loss: ", total_loss, "Accuracy: ", correct.float() / total)
          
      losses.append(total_loss)
      accuracy.append(correct.float() / total)
      print("Train Loss :", total_loss, "Accuracy :", correct.float() / total)

    answer = []
    model.eval()
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
      y_pred = model(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0]
      _, predicted = torch.max(y_pred, 1)
      answer.append(predicted.item())
      globals()['{}'.format(lbl)] = answer


using cached model
using cached model


Some weights of the model checkpoint at monologg/koelectra-base-v2-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v2-discriminator and are newly initialized: 

Batch Loss:  14.530044578015804 Accuracy:  tensor(0.9702, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 21.95918634533882 Accuracy : tensor(0.9703, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  12.07103738002479 Accuracy:  tensor(0.9733, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 19.87998294364661 Accuracy : tensor(0.9719, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  13.076903010718524 Accuracy:  tensor(0.9714, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 20.042758614756167 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  13.17312951758504 Accuracy:  tensor(0.9709, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 19.947078458964825 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.56it/s]

Batch Loss:  13.483899045735598 Accuracy:  tensor(0.9703, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 20.079895071685314 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  12.610316216945648 Accuracy:  tensor(0.9728, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 20.136982621625066 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  11.974968457594514 Accuracy:  tensor(0.9744, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 20.099873926490545 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  12.150077298283577 Accuracy:  tensor(0.9739, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 20.160492196679115 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  11.70938959158957 Accuracy:  tensor(0.9750, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 19.95251581631601 Accuracy : tensor(0.9721, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  13.53576778061688 Accuracy:  tensor(0.9700, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 20.005981650203466 Accuracy : tensor(0.9721, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 75.18it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v2-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  56.581947058439255 Accuracy:  tensor(0.7172, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 86.32197353243828 Accuracy : tensor(0.7297, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  41.769131883978844 Accuracy:  tensor(0.8152, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 66.80132545530796 Accuracy : tensor(0.8093, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  30.73378424346447 Accuracy:  tensor(0.8787, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 49.36503028869629 Accuracy : tensor(0.8753, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  17.11698478832841 Accuracy:  tensor(0.9387, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 28.595229253172874 Accuracy : tensor(0.9337, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.41it/s]

Batch Loss:  10.144256833940744 Accuracy:  tensor(0.9664, device='cuda:0')


100%|██████████| 157/157 [00:44<00:00,  3.51it/s]


Train Loss : 15.586783142760396 Accuracy : tensor(0.9652, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.42it/s]

Batch Loss:  5.606469890568405 Accuracy:  tensor(0.9791, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.47it/s]


Train Loss : 9.269822402391583 Accuracy : tensor(0.9789, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:15,  3.58it/s]

Batch Loss:  3.1515844163950533 Accuracy:  tensor(0.9908, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.46it/s]


Train Loss : 4.812331788241863 Accuracy : tensor(0.9912, device='cuda:0')


 64%|██████▎   | 100/157 [00:29<00:16,  3.38it/s]

Batch Loss:  1.97071344521828 Accuracy:  tensor(0.9942, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.46it/s]


Train Loss : 2.7458730632206425 Accuracy : tensor(0.9951, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.39it/s]

Batch Loss:  1.3873600327060558 Accuracy:  tensor(0.9967, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.47it/s]


Train Loss : 1.9665887457085773 Accuracy : tensor(0.9968, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.44it/s]

Batch Loss:  1.0462292456650175 Accuracy:  tensor(0.9964, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.48it/s]


Train Loss : 1.7638789961347356 Accuracy : tensor(0.9965, device='cuda:0')


100%|██████████| 1038/1038 [00:14<00:00, 70.68it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v2-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  29.145152777433395 Accuracy:  tensor(0.9144, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.46it/s]


Train Loss : 41.72563200443983 Accuracy : tensor(0.9187, device='cuda:0')


 64%|██████▎   | 100/157 [00:29<00:16,  3.41it/s]

Batch Loss:  26.325902776792645 Accuracy:  tensor(0.9227, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.44it/s]


Train Loss : 42.841969864442945 Accuracy : tensor(0.9212, device='cuda:0')


 64%|██████▎   | 100/157 [00:29<00:16,  3.41it/s]

Batch Loss:  28.388098433613777 Accuracy:  tensor(0.9189, device='cuda:0')


100%|██████████| 157/157 [00:45<00:00,  3.47it/s]


Train Loss : 44.12547888606787 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  27.739390783011913 Accuracy:  tensor(0.9209, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 43.975321136415005 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  27.14086451381445 Accuracy:  tensor(0.9237, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 44.18264498561621 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  28.404995411634445 Accuracy:  tensor(0.9183, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 44.00905820727348 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:15,  3.60it/s]

Batch Loss:  27.66266455501318 Accuracy:  tensor(0.9211, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 43.990429274737835 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:16,  3.56it/s]

Batch Loss:  28.083802357316017 Accuracy:  tensor(0.9194, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 44.15658935159445 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  28.079507745802402 Accuracy:  tensor(0.9194, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 43.854242995381355 Accuracy : tensor(0.9198, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:16,  3.51it/s]

Batch Loss:  27.62753865122795 Accuracy:  tensor(0.9212, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 43.831753343343735 Accuracy : tensor(0.9198, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 75.31it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v2-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  43.25823110342026 Accuracy:  tensor(0.8020, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 63.287134513258934 Accuracy : tensor(0.8187, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  25.801068268716335 Accuracy:  tensor(0.8966, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 40.82146564871073 Accuracy : tensor(0.8943, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:16,  3.55it/s]

Batch Loss:  14.737176835536957 Accuracy:  tensor(0.9436, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 23.82711546495557 Accuracy : tensor(0.9423, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.56it/s]

Batch Loss:  6.480218029115349 Accuracy:  tensor(0.9789, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 9.448093842249364 Accuracy : tensor(0.9797, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  4.1361854516435415 Accuracy:  tensor(0.9862, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 6.147773798322305 Accuracy : tensor(0.9864, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  1.8080764156766236 Accuracy:  tensor(0.9945, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 2.985152239270974 Accuracy : tensor(0.9946, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  1.2905126787372865 Accuracy:  tensor(0.9962, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 1.9435520463157445 Accuracy : tensor(0.9967, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  0.7806070158549119 Accuracy:  tensor(0.9978, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 1.1362446896382608 Accuracy : tensor(0.9979, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  0.38940864786854945 Accuracy:  tensor(0.9987, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 0.5961499115801416 Accuracy : tensor(0.9988, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  0.3935961306269746 Accuracy:  tensor(0.9991, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 0.6851572050363757 Accuracy : tensor(0.9990, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 76.96it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v2-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  17.322016721591353 Accuracy:  tensor(0.9497, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.58it/s]


Train Loss : 25.466649862006307 Accuracy : tensor(0.9492, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  9.661444399505854 Accuracy:  tensor(0.9655, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 14.941874384880066 Accuracy : tensor(0.9657, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  6.176703690551221 Accuracy:  tensor(0.9803, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.57it/s]


Train Loss : 10.144570152740926 Accuracy : tensor(0.9797, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.56it/s]

Batch Loss:  4.480364377843216 Accuracy:  tensor(0.9881, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.58it/s]


Train Loss : 6.677043828414753 Accuracy : tensor(0.9877, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  3.333566113258712 Accuracy:  tensor(0.9906, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 5.014520716271363 Accuracy : tensor(0.9914, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.60it/s]

Batch Loss:  1.9082393595017493 Accuracy:  tensor(0.9950, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 3.3640540193300694 Accuracy : tensor(0.9946, device='cuda:0')


 64%|██████▎   | 100/157 [00:28<00:16,  3.54it/s]

Batch Loss:  1.3979874172946438 Accuracy:  tensor(0.9978, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 1.9358950440073386 Accuracy : tensor(0.9980, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.61it/s]

Batch Loss:  0.8840744962799363 Accuracy:  tensor(0.9983, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 1.8181827767984942 Accuracy : tensor(0.9982, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  0.9601940960274078 Accuracy:  tensor(0.9977, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 1.496824125526473 Accuracy : tensor(0.9979, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.57it/s]

Batch Loss:  0.4487311415723525 Accuracy:  tensor(0.9989, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 1.2189425473916344 Accuracy : tensor(0.9982, device='cuda:0')


100%|██████████| 1038/1038 [00:13<00:00, 74.85it/s]
Some weights of the model checkpoint at monologg/koelectra-base-v2-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelect

Batch Loss:  59.02898758649826 Accuracy:  tensor(0.6984, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 90.16484460234642 Accuracy : tensor(0.7099, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  48.64135843515396 Accuracy:  tensor(0.7808, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 77.76829022169113 Accuracy : tensor(0.7657, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.58it/s]

Batch Loss:  38.07223039865494 Accuracy:  tensor(0.8423, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.59it/s]


Train Loss : 66.93031620979309 Accuracy : tensor(0.8063, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  28.997509494423866 Accuracy:  tensor(0.8836, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.61it/s]


Train Loss : 46.465379029512405 Accuracy : tensor(0.8828, device='cuda:0')


 64%|██████▎   | 100/157 [00:27<00:15,  3.59it/s]

Batch Loss:  19.0667005777359 Accuracy:  tensor(0.9336, device='cuda:0')


100%|██████████| 157/157 [00:43<00:00,  3.60it/s]


Train Loss : 29.7562470510602 Accuracy : tensor(0.9352, device='cuda:0')


  8%|▊         | 12/157 [00:03<00:40,  3.56it/s]

In [29]:
model_name = "ko-electra-v2"

test_label = []
for idx, row in origin_test.iloc[:, [1,2,3,4,5,6,7,8]].iterrows() :
  test_label.append(list(row))

# 각 모델별 결과를 합친 것 비교해보기
count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, 비도덕아님, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("8개 Label 예측 accuracy :", accuracy)

result = {
    "범죄" : 범죄, 
    "혐오" : 혐오, 
    "선정" : 선정, 
    "비도덕아님" : 비도덕아님, 
    "폭력" : 폭력, 
    "비난" : 비난, 
    "욕설" : 욕설, 
    "차별" : 차별
}
result = pd.DataFrame(result)
result.to_csv("predict_result/label8_{}.csv".format(model_name))


# 비도덕아님은 예측하지 않게 진행
# 비도덕아님은 다른 케이스가 모두 False 인 경우
t = []
for i, data in enumerate(zip(범죄, 혐오, 선정, 폭력, 비난, 욕설, 차별)) :
    if sum(list(data)) == 0  :
        t.append(1)
    else :
        t.append(0)


count = 0
for i, data in enumerate(zip(범죄, 혐오, 선정, t, 폭력, 비난, 욕설, 차별)) :
    if list(data) == test_label[i] :
      count +=1
print(count)

accuracy = count / len(origin_test)
print("7개 Label 예측 accuracy :", accuracy)      

result = pd.DataFrame(result)
result.to_csv("predict_result/label7_{}.csv".format(model_name))


376
accuracy : 47.0


In [None]:
from sklearn.metrics import f1_score

y_true # 정답레이블 -> np.array(y_true)
result # 예측한 애
f1_score(y_true, result, average='micro')