In [1]:
import datasets 
from datasets import load_dataset

from transformers import AutoTokenizer
from transformers import BertForSequenceClassification, DataCollatorWithPadding
import torch
model = BertForSequenceClassification.from_pretrained("klue/bert-base",num_labels=2)
tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")

dataset = load_dataset("nsmc")
print(dataset)

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized

  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'document', 'label'],
        num_rows: 150000
    })
    test: Dataset({
        features: ['id', 'document', 'label'],
        num_rows: 50000
    })
})


In [2]:
train = dataset['train']
cols = train.column_names
cols

for i in range(5):
    for col in cols:
        print(col, ":", train[col][i])
    print('\n')

id : 9976970
document : 아 더빙.. 진짜 짜증나네요 목소리
label : 0


id : 3819312
document : 흠...포스터보고 초딩영화줄....오버연기조차 가볍지 않구나
label : 1


id : 10265843
document : 너무재밓었다그래서보는것을추천한다
label : 0


id : 9045019
document : 교도소 이야기구먼 ..솔직히 재미는 없다..평점 조정
label : 0


id : 6483659
document : 사이몬페그의 익살스런 연기가 돋보였던 영화!스파이더맨에서 늙어보이기만 했던 커스틴 던스트가 너무나도 이뻐보였다
label : 1




In [3]:
def transform(data):
    return tokenizer(
        data['document'],
#         data['sentence2'],
        truncation = True,
#         padding = 'max_length',
        return_token_type_ids = False,
        )

In [4]:
hf_dataset = dataset.map(transform, batched=True)

# train & validation & test split
hf_train_dataset = hf_dataset['train']
# hf_val_dataset = hf_dataset['validation']
hf_test_dataset = hf_dataset['test']
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)



Loading cached processed dataset at /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-a288ec65e207023b.arrow
Loading cached processed dataset at /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-a447321391324fec.arrow


In [5]:
hf_train_dataset = hf_train_dataset.remove_columns(["document", "id"])
hf_test_dataset = hf_test_dataset.remove_columns(["document", "id"])


In [6]:
small_dataset = hf_train_dataset.select([0, 10, 20, 30, 40, 50])


In [7]:
tt = hf_train_dataset.train_test_split(test_size=0.05)
train_dataset = tt["train"]
validation_dataset = tt["test"]

Loading cached split indices for dataset at /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-ca1f1d1686438807.arrow and /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-ffcce9497da87073.arrow


In [8]:
import os
import numpy as np
from transformers import Trainer, TrainingArguments

output_dir = os.getenv('HOME')+'/aiffel/transformers'

training_arguments = TrainingArguments(
    output_dir,                                         # output이 저장될 경로
    evaluation_strategy="steps",           #evaluation하는 빈도
    learning_rate = 2e-5,                         #learning_rate
    per_device_train_batch_size = 64,   # 각 device 당 batch size
    per_device_eval_batch_size = 32,    # evaluation 시에 batch size
    num_train_epochs = 3,                     # train 시킬 총 epochs
#     weight_decay = 0.01,                        # weight decay
#     label_names=["label"],
    fp16=True,
    group_by_length =True,
    save_steps=500,
    save_total_limit=2,
    save_strategy="epoch",  # Save the best model checkpoint
)

In [9]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    predictions = pred.predictions.argmax(axis=1)
    
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, pos_label=1)
    recall = recall_score(labels, predictions, pos_label=1)
    f1 = f1_score(labels, predictions, pos_label=1)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }


In [10]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()


In [11]:
trainer = Trainer(
    model=model,           # 학습시킬 model
    args=training_arguments,           # TrainingArguments을 통해 설정한 arguments
    train_dataset=train_dataset,    # training dataset
    eval_dataset=validation_dataset,       # evaluation dataset
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)
trainer.train()

Using amp fp16 backend
***** Running training *****
  Num examples = 142500
  Num Epochs = 3
  Instantaneous batch size per device = 64
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 6681


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,0.3335,0.284925,0.876,0.855391,0.90194,0.878049
1000,0.2785,0.256574,0.8924,0.891193,0.891433,0.891313
1500,0.267,0.248743,0.898,0.904474,0.887662,0.895989
2000,0.2464,0.241884,0.8984,0.88653,0.911369,0.898778
2500,0.2138,0.241563,0.902667,0.889295,0.917565,0.903209
3000,0.189,0.25084,0.902667,0.908717,0.89305,0.900815
3500,0.1898,0.246981,0.902,0.888744,0.916756,0.902533
4000,0.1858,0.240441,0.9056,0.904416,0.904903,0.904659
4500,0.1747,0.263167,0.904267,0.894987,0.913793,0.904292
5000,0.1371,0.266477,0.904533,0.900535,0.907328,0.903918


  nn.utils.clip_grad_norm_(
***** Running Evaluation *****
  Num examples = 7500
  Batch size = 32
***** Running Evaluation *****
  Num examples = 7500
  Batch size = 32
***** Running Evaluation *****
  Num examples = 7500
  Batch size = 32
***** Running Evaluation *****
  Num examples = 7500
  Batch size = 32
Saving model checkpoint to /aiffel/aiffel/transformers/checkpoint-2227
Configuration saved in /aiffel/aiffel/transformers/checkpoint-2227/config.json
Model weights saved in /aiffel/aiffel/transformers/checkpoint-2227/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 7500
  Batch size = 32
***** Running Evaluation *****
  Num examples = 7500
  Batch size = 32
***** Running Evaluation *****
  Num examples = 7500
  Batch size = 32
***** Running Evaluation *****
  Num examples = 7500
  Batch size = 32
  nn.utils.clip_grad_norm_(
Saving model checkpoint to /aiffel/aiffel/transformers/checkpoint-4454
Configuration saved in /aiffel/aiffel/transformers/checkpoint-4454/con

TrainOutput(global_step=6681, training_loss=0.19905552794843748, metrics={'train_runtime': 1407.3977, 'train_samples_per_second': 303.752, 'train_steps_per_second': 4.747, 'total_flos': 5183265179485680.0, 'train_loss': 0.19905552794843748, 'epoch': 3.0})

In [12]:

trainer.evaluate(hf_test_dataset)


***** Running Evaluation *****
  Num examples = 50000
  Batch size = 32


{'eval_loss': 0.27031320333480835,
 'eval_accuracy': 0.90704,
 'eval_precision': 0.9028775566285872,
 'eval_recall': 0.9136376276168912,
 'eval_f1': 0.9082257236504364,
 'eval_runtime': 222.143,
 'eval_samples_per_second': 225.08,
 'eval_steps_per_second': 7.036,
 'epoch': 3.0}