# Set up environment

In [None]:
# Uncomment below section and run in case of re-connecting Colab

!pip install git+https://github.com/haven-jeon/PyKoSpacing.git
!pip install transformers
!pip install git+https://github.com/ssut/py-hanspell.git

from google.colab import drive
drive.mount('/content/drive')

# 영도야 여긴 너 경로에 맞게 바꿔야 할거야
%cd drive/MyDrive/MBTI
!pwd

In [2]:
import time
import datetime
import random
from tqdm import tqdm

from dataloader import MBTIDataset

import pandas as pd
pd.set_option('display.width', 180)
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from sklearn.model_selection import KFold
from transformers import DataCollatorWithPadding, BertForSequenceClassification, BertConfig, AdamW

# Environment

In [3]:
# Setup
env_dict = {
    # ==== Arguments for dataset =====
    'train_path'        : './data/hackathon_train_v1.csv',
    'question_path'     : './data/question_filtered.csv',
    'pretrained_url'    : "klue/bert-base",
    'padding_per_batch' : True,
    # ==== Arguments for dataloader =====
    'shuffle'           : False,            # turn off 'shuffle' since we use sampler in Dataloader
    # ==== Arguments for training =====
    'target'            : 'I/E',
    'lm'                : 'bert',
    'classifier'        : 'mlp',
    'batch_size'        : 64,
    'epoch'             : 5,
    'lr'                : 3e-3,
    'decay_rate'        : 1e-7,
    'dropout'           : 0.1,
    'hidden_dim'        : [192, 48, 12]     # 일단 설정해둔 숫자들 (cls token의 dimension인 768 을 4로 나눈 값들)
}

# Random seed
seed_val = 1234
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [6]:
# 지워질 Cell!!!!!
import os
import re
import time
import datetime
import pickle
from tqdm import tqdm
from typing import Union

import torch
import pandas as pd
from torch.utils.data import Dataset
from pykospacing import Spacing
from hanspell import spell_checker
from transformers import AutoTokenizer

#TODO:
# 1. train / text 다른 로직이 필요 (데이터 형식이 조금 다름)
# 2. 다른 Text preprocess 방식 도입 검토
#       ref : https://ebbnflow.tistory.com/246

class MBTIIDataset(Dataset):
    def __init__(
        self,
        data_path     : Union[str, pd.DataFrame],
        question_path : Union[str, pd.DataFrame],
        txt_preprocess: bool            = True,
        normalize     : bool            = True,
        pretrained_url: str             = "klue/bert-base",
        padding_per_batch               = True,
        is_binary_classification: bool  = True,
        is_bert       : bool            = True,
        is_train      : bool            = True
        ):
        """DataLoader for MBTI dataset

        Args:
            data_path (str): Data file path. Both csv and parguet files are allowed.
            question_path (str): Question file path. Both csv and parguet files are allowed.
            txt_preprocess (bool, optional): Text preprocessing pipeline. (e.g. fixing grammar, removing punctuations). Defaults to True.
            normalize (bool, optional): Normalize numeric attribute. Defaults to True.
            is_binary_classification (bool, optional): Target of task. You can choose btw Multi-class classificaiton
                and 4 binary classification problem. Defaults to True.
            is_bert (bool, optional): Using BERT for language model or not. Defaults to True.
            is_train (bool, optional): Whether given data is for training or not. Defaults to True.
        """
        
        def resolve_path(path:str)->pd.DataFrame:
            if path.endswith('.csv'):
                try:
                    df = pd.read_csv(path)
                except:
                    df = pd.read_csv(path, encoding='cp949')
            else:
                df = pd.read_parquet(path)
            return df

        data = None
        question_data = None
        label_cols = ['I/E', 'S/N', 'T/F', 'J/P']
        # if given data_path is pd.Dataframe, we assume preprocessing is already applied to given Dataframe
        # so that it can skip all the processes below
        if not isinstance(data_path, pd.DataFrame):
            data = resolve_path(data_path)
            question_data = resolve_path(question_path)

            self.question_data = question_data

            # preprocess data
            if txt_preprocess:
                self.preprocess_txt(data)
            if normalize:
                data['Age'] = (data['Age'] - data['Age'].mean()) / data['Age'].std()

            # make dataset suitable for binary classification (only for training data - test data doesn't contain 'MBTI' field)
            if is_train and is_binary_classification:
                self.prepare_binary_classification(data)
                # if method right above works successfully, then data should contain same # 0 and 1.
                for col in label_cols:
                    value_counted = data[col].value_counts()
                    assert value_counted[0] == value_counted[1]

            # tokenize
            self.tokenizer = AutoTokenizer.from_pretrained(pretrained_url)
            self.padding_per_batch = padding_per_batch
            self.tokenize(data)

        else:
            data = data_path

        # set columns for both training and inference
        #TODO: 테스트 데이터를 고려해서 유저 정보를 학습에 활용하지 않는 상황. 필요시 고쳐야 함
        self.cat_col    = ['Gender']
        self.num_col    = ['Age']
        self.label_cols = label_cols
        self.is_train   = is_train
        self.data       = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        selected_data = self.data.iloc[idx]

        cat_input = torch.tensor(selected_data[self.cat_col])                               # [batch size   x   # categorical features]
        num_input = torch.tensor(selected_data[self.num_col])                               # [batch size   x   # numerical features]

        sample              = selected_data['QandA']                                        # [batch size   x   sequence length]
        sample['cat_input'] = cat_input
        sample['num_input'] = num_input

        # Include label only for training cases
        if self.is_train:
            for col in self.label_cols:
              label = torch.tensor(selected_data[col])                            # [batch size   x   1]
              sample[col] = label

        return sample

    # ======================
    #    Helper Functions
    # ======================

    def fix_grammar(self, answer: str) -> str:
        answer = spell_checker.check(answer)
        return answer.checked

    def fix_spacing(self, answer: str) -> str:
        answer  = answer.replace(" ", '')
        spacing = Spacing()
        return spacing(answer)

    def remove_punctuation(self, answer: str) -> str:
        answer = re.sub(r'[@%\\*=()/~#&\+á?\xc3\xa1\-\|\.\:\;\!\-\,\_\~\$\'\"]', '', answer)
        answer = re.sub(r'\s+', ' ', answer)        # remove extra space
        answer = re.sub(r"^\s+", '', answer)        # remove space from start
        answer = re.sub(r'\s+$', '', answer)        # remove space from the end
        return answer

    def preprocess_txt(self, data: pd.DataFrame):
        try:
            print('=============== fix_grammar ===============')
            data['Answer'] = data['Answer'].apply(self.fix_grammar)         #FIXME: 해당 패키지의 서버가 가끔 응답 오류가 남...
        except:
            pass
        tqdm.pandas()
        print('=============== fix_spacing ===============')
        data['Answer'] = data['Answer'].progress_apply(self.fix_spacing)
        print('=============== remove_punctuation ===============')
        data['Answer'] = data['Answer'].progress_apply(self.remove_punctuation)

    def prepare_binary_classification(self, data: pd.DataFrame):
        one_list = ['E', 'N', 'F', 'P']
        zero_list = ['I', 'S', 'T', 'J']

        print('=============== prepare_binary_classification ===============')
        for idx, mbti in tqdm(enumerate(one_list)):
            data[mbti] = data['MBTI'].str               \
                .contains(mbti)                         \
                .replace({True: 1, False: 0})

            new_name = zero_list[idx] + '/' + mbti
            data.rename(columns = {mbti:new_name}, inplace=True)

    def tokenize(self, data: pd.DataFrame):
        
        #TODO: tokenizing tqdm 적용해두기
        def tokenize_per_sentence(series: pd.Series) -> str:
            selected_question = self.question_data.iloc[series['Q_number'] - 1].Question
            selected_answer = series['Answer']

            padding = False if self.padding_per_batch else 'longest'
            #TODO: 필요시 max_length 조절 필요
            return self.tokenizer(selected_question,
                                  selected_answer,
                                  padding=padding)

        tqdm.pandas()
        print('=============== tokenize ===============')
        data['QandA'] =  data.progress_apply(tokenize_per_sentence, axis=1)


In [4]:
# Garbage collect
import torch, gc
gc.collect()
torch.cuda.empty_cache()

# Prepare dataset

In [18]:
# Uncomment & run this cell only if there is no preprocessed data

# Dataset
# train_dataset = MBTIIDataset(
#     data_path           = env_dict['train_path'],
#     question_path       = env_dict['question_path'],
#     pretrained_url      = env_dict['pretrained_url'],
#     padding_per_batch   = env_dict['padding_per_batch'],
#     txt_preprocess      = True,
#     is_train            = True
# )

# print(len(train_dataset))
# print(train_dataset.data.head())



100%|██████████| 11520/11520 [10:07<00:00, 18.97it/s]




100%|██████████| 11520/11520 [00:00<00:00, 31081.21it/s]




4it [00:00, 65.24it/s]




100%|██████████| 11520/11520 [00:06<00:00, 1737.02it/s]

11520
   Data_ID  User_ID  Gender       Age  MBTI  Q_number                                             Answer  I/E  S/N  T/F  J/P                                        QandA
0        1        1       1 -0.372581  INFP         1  <아니다> 어릴 때 왕따 당한 경험이 있고 외부 활동을 좋아하지 않기 때문에 소수의...    0    1    1    1  [input_ids, token_type_ids, attention_mask]
1        2        1       1 -0.372581  INFP         2  <중립> 다양한 관심사를 탐구하진 않지만 대체로 자연과 역사에 관련된 것을 좋아하며...    0    1    1    1  [input_ids, token_type_ids, attention_mask]
2        3        1       1 -0.372581  INFP         3  <그렇다> 감정이입이 잘 되어 코미디 영화에서 사람이 울고 있을 때도 울기 때문에 ...    0    1    1    1  [input_ids, token_type_ids, attention_mask]
3        4        1       1 -0.372581  INFP         4  <중립> 대비책을 세우긴 하는데 세우다가 마는 편입니다일의 변수가 생길 수 있고 변...    0    1    1    1  [input_ids, token_type_ids, attention_mask]
4        5        1       1 -0.372581  INFP         5  <아니다> 평정심을 유지 못하는 편입니다 머릿속은 백지화가 된 상태로 말도 제대로 ...    0    1    1    1  [input_ids, token_




In [59]:
# Uncomment & run this cell only if there is no preprocessed data

# # save preprocessed pd.Dataframe & tokenizer as pickle data format

data_path = './data/' + 'base_data_2.pickle'
# with open(data_path, 'wb') as handle:
#     pickle.dump(train_dataset.data, handle)

# with open('./data/base_tokenizer.pickle', 'wb') as handle:
#     pickle.dump(train_dataset.tokenizer, handle)

In [60]:
# Load data as pd.DataFrame & tokenizer

with open(data_path, 'rb') as handle:
    df = pickle.load(handle)

with open('./data/base_tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

train_dataset = MBTIIDataset(
    data_path           = df,
    question_path       = env_dict['question_path'],
    pretrained_url      = env_dict['pretrained_url'],
    padding_per_batch   = env_dict['padding_per_batch'],
    is_train            = True
)

print(len(train_dataset))
print(train_dataset.data.head())

11520
   Data_ID  User_ID  Gender       Age  MBTI  Q_number                                             Answer  I/E  S/N  T/F  J/P                                              QandA
0        1        1       1 -0.372581  INFP         1  <아니다> 어릴 때 왕따 당한 경험이 있고 외부 활동을 좋아하지 않기 때문에 소수의...    0    1    1    1  [input_ids, token_type_ids, attention_mask, ca...
1        2        1       1 -0.372581  INFP         2  <중립> 다양한 관심사를 탐구하진 않지만 대체로 자연과 역사에 관련된 것을 좋아하며...    0    1    1    1  [input_ids, token_type_ids, attention_mask, ca...
2        3        1       1 -0.372581  INFP         3  <그렇다> 감정이입이 잘 되어 코미디 영화에서 사람이 울고 있을 때도 울기 때문에 ...    0    1    1    1  [input_ids, token_type_ids, attention_mask, ca...
3        4        1       1 -0.372581  INFP         4  <중립> 대비책을 세우긴 하는데 세우다가 마는 편입니다일의 변수가 생길 수 있고 변...    0    1    1    1  [input_ids, token_type_ids, attention_mask, ca...
4        5        1       1 -0.372581  INFP         5  <아니다> 평정심을 유지 못하는 편입니다 머릿속은 백지화가 된 상태로 말도 제대로 ...    0    1

In [62]:
# define collator function when padding per batch is needed
#TODO: data_collator가 아닌 torch의 Packing 을 이용하는 것과 성능 비교가 필요함
data_collator = DataCollatorWithPadding(tokenizer=tokenizer) if env_dict['padding_per_batch'] else None

# Dataloader
train_dataloader = DataLoader(
    train_dataset,
    batch_size  = env_dict['batch_size'],
    shuffle     = env_dict['shuffle'],
    collate_fn  = data_collator
)

In [63]:
# Example result from dataloader
next(iter(train_dataloader))

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'input_ids': tensor([[    2,  7267, 11187,  ...,     0,     0,     0],
        [    2,  3936,  3641,  ...,     0,     0,     0],
        [    2,  3656,  3611,  ...,     0,     0,     0],
        ...,
        [    2,  4051,  4362,  ...,     0,     0,     0],
        [    2,  3656,  3611,  ...,     0,     0,     0],
        [    2,  3971,  3746,  ...,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), 'cat_input': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

# Prepare language model

In [64]:
# GPU preparation
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print('No GPU available, using CPU instead.')

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [81]:
from mlp import MLPClassifier

class BertWithMlp(BertForSequenceClassification):
    def __init__(
        self,
        config,
        input_dim = None,
        hidden_dim = None,
        num_classes = 2,
        dropout = 0.1
        ):

        # ====================
        #      BERT Setup
        # ====================

        # resulting BERT model is stored in 'self.bert'.
        super().__init__(config)

        self.num_labels = config.num_labels

        # 나중에 config 내부에 해당 field 값을 넣어주면 됨 (영상 참고)
        combined_feat_dim = config.text_feat_dim + config.cat_feat_dim + config.num_feat_dim
        print("combined_feat_dim :", combined_feat_dim)

        self.mlp = MLPClassifier(
            combined_feat_dim,
            None,
            hidden_dim,
            num_classes=num_classes,
            dropout=dropout
        )
        self.dropout = nn.Dropout(p=dropout, inplace=False)
        self.bn = nn.BatchNorm1d(config.num_feat_dim)

    def forward(
        self,
        input_ids = None,
        attention_mask = None,
        token_type_ids = None,
        position_ids = None,
        head_mask = None,
        inputs_embeds = None,
        labels = None,
        output_attentions = None,
        cat_feats = None,
        num_feats = None
    ):
        # ====================
        #     BERT forward
        # ====================
        #TODO: 더 많은 인자 추가해주기
        logits = self.bert(
            input_ids,
            token_type_ids=token_type_ids,
            attention_mask=attention_mask)

        cls = logits[1]
        print("logits :", logits[0].shape, logits[1].shape)
        # Apply dropout to cls
        cls = self.dropout(cls)
        # Apply batch normalization to numerical features
        # num_feats = self.bn(num_feats)

        print("cls shape :", cls.shape)
        print("cat shape :", cat_feats.shape)
        print("num shape :", num_feats.shape)

        # ====================
        #      MLP forward
        # ====================
        all_feats = torch.cat((cls, cat_feats.view(-1, 1), num_feats.view(-1, 1)), dim=1)
        output = self.mlp(all_feats)

        return output


In [82]:
# Update config file
from transformers import BertConfig

#TODO: num_labels 인자가 필요한지 알아봐야 함
config = BertConfig.from_pretrained(
    env_dict['pretrained_url'],
    num_labels = 2
  )

config.num_feat_dim = 1
config.cat_feat_dim = 1
config.text_feat_dim = config.hidden_size

print(config)

BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "cat_feat_dim": 1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_feat_dim": 1,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "text_feat_dim": 768,
  "transformers_version": "4.27.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 32000
}



In [83]:
# Prepare model
model = BertWithMlp.from_pretrained(
    env_dict['pretrained_url'],
    config      = config,
    hidden_dim  = env_dict['hidden_dim'],
    dropout     = env_dict['dropout']
    )
model.cuda()

# Apply weight decaying except for bias & layer normalization term
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

# Prepare optimizer
optimizer = AdamW(optimizer_grouped_parameters, lr=env_dict['lr'])

combined_feat_dim : 770


Some weights of the model checkpoint at klue/bert-base were not used when initializing BertWithMlp: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertWithMlp from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertWithMlp from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertWithMlp were not initialized from the model checkpoint at klue/bert-base and are newly initialized: 

In [84]:
# Freeze Encoder, use head's parameters only
free_encoder = True

if free_encoder:
  for param in model.base_model.parameters():
    param.requires_grad = False

# Train

In [70]:
def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [71]:
# Set the target of training right before training loop
env_dict['target_of_training'] = 'I/E'

In [85]:
# Training Loop for 영도
model.train()

for epoch in range(env_dict['epoch']):
  t0 = time.time()
  epoch_loss = 0
  for step, batch in tqdm(enumerate(train_dataloader)):

    input_ids       = batch['input_ids'].to(device)
    token_type_ids  = batch['token_type_ids'].to(device)
    attention_mask  = batch['attention_mask'].to(device)
    cat_input       = batch['cat_input'].to(device)
    num_input       = batch['num_input'].to(device)
    labels  = batch[env_dict['target_of_training']].to(device)

    # Clear prior gradients
    model.zero_grad()

    # Forward
    output = model(input_ids,
                    token_type_ids=token_type_ids,
                    attention_mask=attention_mask,
                    cat_feats=cat_input,
                    num_feats=num_input)

    # Calculate loss - CELoss 함수 짜서 실험 좀 부탁해유~

    print(output)

    break


    print("Step loss: {0:.2f}".format(loss))

    loss.backward()
    optimizer.step()

  avg_train_loss = total_loss / len(train_dataloader)
  print("Epoch loss: {0:.2f}".format(loss))
  print("Training epoch took: {:}".format(format_time(time.time() - t0)))

0it [00:00, ?it/s]

logits : torch.Size([64, 104, 768]) torch.Size([64, 768])
cls shape : torch.Size([64, 768])
cat shape : torch.Size([64])
num shape : torch.Size([64])





RuntimeError: ignored

# Save trained model

In [None]:
# Save
#TODO: haperparams가 이름에 드러날 수 있는 저장경로 생각해보기
save_path = './models/' + env_dict['lm'] + 'with' + env_dict['classifier'] + '.pt'
torch.save(model.state_dict(), save_path)

In [None]:
# 저장된 모델을 다른 파일에 불러와서 Test 하길 권장! (test.py 만들어도 좋아)

# K-Fold Cross Validation (수정 중)

In [None]:
# Training with cross validation (ref : https://velog.io/@pppanghyun/6.-%EA%B5%90%EC%B0%A8-%EA%B2%80%EC%A6%9DCross-Validation)
#TODO: Scheduler, Gradient Clipping

kfold     = KFold(n_splits=5, shuffle=True)
criterion = torch.nn.MSELoss()

validation_loss = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(train_dataset)):

    #TODO: Fold 끼리 겹칠 수 있음. 겹치지 않는 방식 고려 필요
    # Make indices for both training and validation
    train_subsampler  = SubsetRandomSampler(train_idx)
    val_subsampler    = SubsetRandomSampler(val_idx)

    # Define dataloader using sampler
    train_dataloder = DataLoader(
        train_dataset,
        batch_size  = env_dict['batch_size'],
        shuffle     = env_dict['shuffle'],
        sampler     = train_subsampler,
        collate_fn  = data_collator
    )
    val_dataloder   = DataLoader(
        train_dataset,
        batch_size  = env_dict['batch_size'],
        shuffle     = env_dict['shuffle'],
        sampler     = val_subsampler,
        collate_fn  = data_collator
    )

    # ===================
    #    Training Loop
    # ===================

    optimizer = AdamW(model.parameters(), lr=env_dict['lr'], weight_decay=env_dict['decay_rate'])

    model.train()

    for epoch in range(env_dict['epoch']):
      t0 = time.time()
      epoch_loss = 0
      for step, batch in tqdm(enumerate(train_dataloader)):

        input_ids       = batch['input_ids'].to(device)
        token_type_ids  = batch['token_type_ids'].to(device)
        attention_mask  = batch['attention_mask'].to(device)
        labels  = batch[env_dict['target_of_training']].to(device)

        # Clear prior gradients
        model.zero_grad()

        # Forward
        output = model(input_ids,
                       token_type_ids=token_type_ids,
                       attention_mask=attention_mask,
                       cat_feats=batch['cat_input'],
                       num_feats=batch['num_input'])

        # Calculate loss

        # loss    = outputs.loss     # Default : CELoss

        print("Step loss: {0:.2f}".format(loss))

        loss.backward()
        optimizer.step()
      
      avg_train_loss = total_loss / len(train_dataloader)
      print("Epoch loss: {0:.2f}".format(loss))
      print("Training epoch took: {:}".format(format_time(time.time() - t0)))

#     train_rmse = evaluation(trainloader) # 학습 데이터의 RMSE
#     val_rmse = evaluation(valloader)
#     print("k-fold", fold," Train Loss: %.4f, Validation Loss: %.4f" %(train_rmse, val_rmse)) 
#     validation_loss.append(val_rmse)

## Calculate validation score

# validation_loss = np.array(validation_loss)
# mean = np.mean(validation_loss)
# std = np.std(validation_loss)
# print("Validation Score: %.4f, ± %.4f" %(mean, std))