In [None]:
!pip install mxnet
!pip install gluonnlp pandas tqdm
!pip install sentencepiece
!pip install transformers==3
!pip install torch

In [None]:
!pip install git+https://git@github.com/SKTBrain/KoBERT.git@master

In [3]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm import tqdm, tqdm_notebook

In [4]:
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model

In [5]:
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [6]:
# GPU
device = torch.device("cuda:0")

In [8]:
bertmodel, vocab = get_pytorch_kobert_model()

/content/.cache/kobert_v1.zip[██████████████████████████████████████████████████]
/content/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece[██████████████████████████████████████████████████]


In [9]:
# google drive mount

from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


In [24]:
# 데이터 불러오기

import pandas as pd

# dataset = pd.read_table('/gdrive/My Drive/KoBERT/data/data_final.txt')
dataset = pd.read_csv('/gdrive/My Drive/KoBERT/data/data_mecab.csv')

In [25]:
dataset

Unnamed: 0,sentence,emotion
0,제 감정 이상 해진 것 같 남편 보 화 치밀 오르 감정 조절 안 되,9
1,더 이상 내 감정 내 컨트롤 못 하,4
2,하루 종일 오르락내리락 롤러코스터 타 기분 이,4
3,꼭 롤러코스터 타 것 같,2
4,롤러코스터 타 것 기분 왔 갔 해요,5
...,...,...
51235,부동산 임대 소득 현재 여유 롭 살 수 있 좋,1
51236,폐결핵 이미 완치 된 것 같 약 한 달 더 먹 하 아직 안 나 게 아닌지 걱정 돼,7
51237,연애 하 싶 소개팅 나가 꽝 이 이러 난 결혼 못 하 늙 죽 거 야,4
51238,은행 대출 막혀서 생활비 구할 수 없 이제 어떻게 살 하나 막막 해,9


In [27]:
data_list = []
for q, label in zip(dataset['sentence'], dataset['emotion']):
  data = []
  data.append(q)
  data.append(str(label))

  data_list.append(data)

In [28]:
# train/test 분리

from sklearn.model_selection import train_test_split

train, test = train_test_split(data_list, test_size=0.2, random_state=42)
print("train:", len(train))
print("test:", len(test))

train: 40992
test: 10248


In [29]:
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

using cached model. /content/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [15]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len, pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair) 

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))

In [35]:
# Setting parameters
max_len = 128
batch_size = 64
warmup_ratio = 0.1
num_epochs = 20
max_grad_norm = 1
log_interval = 200
learning_rate = 5e-5

In [30]:
data_train = BERTDataset(train, 0, 1, tok, max_len, True, False)
data_test = BERTDataset(test, 0, 1, tok, max_len, True, False)

In [31]:
train_dataloader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, num_workers=5)
test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, num_workers=5)

  cpuset_checked))


In [32]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes = 10,
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)
      
model = BERTClassifier(bertmodel, dr_rate=0.5).to(device)

In [33]:
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

In [21]:
# 옵티마이저 선언
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss() # softmax용 Loss Function 정하기 <- binary classification도 해당 loss function 사용 가능

t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

In [22]:
# 학습 평가 지표인 accuracy 계산 -> 얼마나 타겟값을 많이 맞추었는가
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

In [36]:
# 모델 학습 시작
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # gradient clipping
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    
    model.eval() # 평가 모드로 변경
    
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(tqdm_notebook(test_dataloader)):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  import sys


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.8744150400161743 train acc 0.46875
epoch 1 batch id 201 loss 0.9101685285568237 train acc 0.7199937810945274
epoch 1 batch id 401 loss 0.8114732503890991 train acc 0.7495324189526185
epoch 1 batch id 601 loss 0.5110966563224792 train acc 0.77051268718802
epoch 1 train acc 0.7745709828393136


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 1 test acc 0.7224378881987578


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.0183757543563843 train acc 0.71875
epoch 2 batch id 201 loss 0.7480026483535767 train acc 0.7902674129353234
epoch 2 batch id 401 loss 0.720160186290741 train acc 0.8089541770573566
epoch 2 batch id 601 loss 0.39360663294792175 train acc 0.822067387687188
epoch 2 train acc 0.8243710998439937


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 2 test acc 0.7329192546583851


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 0.9627354145050049 train acc 0.734375
epoch 3 batch id 201 loss 0.7382872104644775 train acc 0.8233830845771144
epoch 3 batch id 401 loss 0.6733642220497131 train acc 0.8375545511221946
epoch 3 batch id 601 loss 0.44142845273017883 train acc 0.8461678452579035
epoch 3 train acc 0.8479426677067082


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 3 test acc 0.7407802795031055


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.9824383854866028 train acc 0.71875
epoch 4 batch id 201 loss 0.7487949728965759 train acc 0.8349657960199005
epoch 4 batch id 401 loss 0.6308459043502808 train acc 0.8453475685785536
epoch 4 batch id 601 loss 0.42691299319267273 train acc 0.8524334442595674
epoch 4 train acc 0.8539147815912637


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 4 test acc 0.7397127329192547


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.8400049209594727 train acc 0.75
epoch 5 batch id 201 loss 0.7038241624832153 train acc 0.8388526119402985
epoch 5 batch id 401 loss 0.6435211300849915 train acc 0.8472178927680798
epoch 5 batch id 601 loss 0.3752870559692383 train acc 0.8525634359400999
epoch 5 train acc 0.8541097893915757


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 5 test acc 0.7328222049689441


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 0.8977912664413452 train acc 0.796875
epoch 6 batch id 201 loss 0.6185299754142761 train acc 0.8325559701492538
epoch 6 batch id 401 loss 0.5891726613044739 train acc 0.8447241271820449
epoch 6 batch id 601 loss 0.331173300743103 train acc 0.8531094009983361
epoch 6 train acc 0.8544998049921997


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 6 test acc 0.7226319875776398


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 0.8266000151634216 train acc 0.75
epoch 7 batch id 201 loss 0.5569044947624207 train acc 0.8387748756218906
epoch 7 batch id 401 loss 0.5283476114273071 train acc 0.8500623441396509
epoch 7 batch id 601 loss 0.2707606256008148 train acc 0.8585170549084858
epoch 7 train acc 0.8602037831513261


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 7 test acc 0.7128299689440993


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 0.7678354382514954 train acc 0.828125
epoch 8 batch id 201 loss 0.5849301218986511 train acc 0.8442164179104478
epoch 8 batch id 401 loss 0.54410320520401 train acc 0.8544653990024937
epoch 8 batch id 601 loss 0.2999778389930725 train acc 0.861662853577371
epoch 8 train acc 0.8627632605304212


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 8 test acc 0.6959433229813664


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 0.9379571676254272 train acc 0.765625
epoch 9 batch id 201 loss 0.5622926950454712 train acc 0.8528451492537313
epoch 9 batch id 401 loss 0.47852274775505066 train acc 0.8624922069825436
epoch 9 batch id 601 loss 0.29042935371398926 train acc 0.8687603993344426
epoch 9 train acc 0.8701979329173167


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 9 test acc 0.6767274844720497


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 0.8443938493728638 train acc 0.765625
epoch 10 batch id 201 loss 0.49837109446525574 train acc 0.8566542288557214
epoch 10 batch id 401 loss 0.4627828598022461 train acc 0.8648690773067331
epoch 10 batch id 601 loss 0.1930885761976242 train acc 0.8712822379367721
epoch 10 train acc 0.8725867784711389


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 10 test acc 0.6805124223602484


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 11 batch id 1 loss 0.7850670218467712 train acc 0.8125
epoch 11 batch id 201 loss 0.6016894578933716 train acc 0.8662157960199005
epoch 11 batch id 401 loss 0.3826838731765747 train acc 0.8746882793017456
epoch 11 batch id 601 loss 0.34353891015052795 train acc 0.8783277870216306
epoch 11 train acc 0.8799726989079563


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 11 test acc 0.6751746894409938


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 12 batch id 1 loss 0.7763394117355347 train acc 0.8125
epoch 12 batch id 201 loss 0.5111217498779297 train acc 0.8811411691542289
epoch 12 batch id 401 loss 0.4715305268764496 train acc 0.8872350374064838
epoch 12 batch id 601 loss 0.2843920886516571 train acc 0.8915869384359401
epoch 12 train acc 0.891965678627145


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 12 test acc 0.6660520186335404


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 13 batch id 1 loss 0.5188632607460022 train acc 0.875
epoch 13 batch id 201 loss 0.3454277515411377 train acc 0.8966884328358209
epoch 13 batch id 401 loss 0.3774476647377014 train acc 0.9024703865336658
epoch 13 batch id 601 loss 0.29281631112098694 train acc 0.9049500831946755
epoch 13 train acc 0.905981864274571


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 13 test acc 0.641983695652174


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 14 batch id 1 loss 0.5834866762161255 train acc 0.859375
epoch 14 batch id 201 loss 0.3517748713493347 train acc 0.9100590796019901
epoch 14 batch id 401 loss 0.2857697010040283 train acc 0.9180174563591023
epoch 14 batch id 601 loss 0.15183749794960022 train acc 0.9200811148086523
epoch 14 train acc 0.920339313572543


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 14 test acc 0.6527562111801242


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 15 batch id 1 loss 0.4526015520095825 train acc 0.890625
epoch 15 batch id 201 loss 0.24956072866916656 train acc 0.9357898009950248
epoch 15 batch id 401 loss 0.27087146043777466 train acc 0.9410068578553616
epoch 15 batch id 601 loss 0.09993461519479752 train acc 0.9430116472545758
epoch 15 train acc 0.9433989859594384


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 15 test acc 0.6492624223602484


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 16 batch id 1 loss 0.5109366178512573 train acc 0.90625
epoch 16 batch id 201 loss 0.2302553653717041 train acc 0.9507929104477612
epoch 16 batch id 401 loss 0.16925856471061707 train acc 0.9556577306733167
epoch 16 batch id 601 loss 0.1244409829378128 train acc 0.9577527038269551
epoch 16 train acc 0.9579758190327613


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 16 test acc 0.6549883540372671


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 17 batch id 1 loss 0.4315446615219116 train acc 0.9375
epoch 17 batch id 201 loss 0.1716277152299881 train acc 0.9654073383084577
epoch 17 batch id 401 loss 0.08738712966442108 train acc 0.9684382793017456
epoch 17 batch id 601 loss 0.12268802523612976 train acc 0.9694519550748752
epoch 17 train acc 0.9696031591263651


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 17 test acc 0.6632375776397516


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 18 batch id 1 loss 0.32436925172805786 train acc 0.953125
epoch 18 batch id 201 loss 0.21510034799575806 train acc 0.974735696517413
epoch 18 batch id 401 loss 0.09640687704086304 train acc 0.9779847256857855
epoch 18 batch id 601 loss 0.0070385099388659 train acc 0.9785513727121464
epoch 18 train acc 0.9784028861154446


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 18 test acc 0.6710986024844721


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 19 batch id 1 loss 0.3044425845146179 train acc 0.953125
epoch 19 batch id 201 loss 0.26234567165374756 train acc 0.982353855721393
epoch 19 batch id 401 loss 0.05665682256221771 train acc 0.9836736284289277
epoch 19 batch id 601 loss 0.058916978538036346 train acc 0.9839070299500832
epoch 19 train acc 0.983765600624025


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 19 test acc 0.6738159937888198


  0%|          | 0/641 [00:00<?, ?it/s]

epoch 20 batch id 1 loss 0.31514042615890503 train acc 0.9375
epoch 20 batch id 201 loss 0.10832855850458145 train acc 0.9855410447761194
epoch 20 batch id 401 loss 0.014529179781675339 train acc 0.9865960099750624
epoch 20 batch id 601 loss 0.008914564736187458 train acc 0.98681884359401
epoch 20 train acc 0.9864957098283932


  0%|          | 0/161 [00:00<?, ?it/s]

epoch 20 test acc 0.6743982919254659


In [38]:
def predict(predict_sentence):

    data = [predict_sentence, '0']
    dataset_another = [data]

    another_test = BERTDataset(dataset_another, 0, 1, tok, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(another_test, batch_size=batch_size, num_workers=5)
    
    model.eval()

    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)

        valid_length= valid_length
        label = label.long().to(device)

        out = model(token_ids, valid_length, segment_ids)


        test_eval=[]
        for i in out:
            logits=i
            logits = logits.detach().cpu().numpy()

            if np.argmax(logits) == 0:
                test_eval.append("괴로움")
            elif np.argmax(logits) == 1:
                test_eval.append("기쁨")
            elif np.argmax(logits) == 2:
                test_eval.append("긴장됨")
            elif np.argmax(logits) == 3:
                test_eval.append("놀람")
            elif np.argmax(logits) == 4:
                test_eval.append("비참함")
            elif np.argmax(logits) == 5:
                test_eval.append("우울함")
            elif np.argmax(logits) == 6:
                test_eval.append("중립")
            elif np.argmax(logits) == 7:
                test_eval.append("지루함")
            elif np.argmax(logits) == 8:
                test_eval.append("피로함")
            elif np.argmax(logits) == 9:
                test_eval.append("화남")

        print(">> 입력하신 내용에서 " + test_eval[0] + "이 느껴집니다.\n")

In [None]:
!curl -s https://raw.githubusercontent.com/teddylee777/machine-learning/master/99-Misc/01-Colab/mecab-colab.sh | bash

In [55]:
from konlpy.tag import Mecab

m = Mecab()
remove_tags = ['JKS', 'JKC', 'JKG', 'JKO', 'JKB', 'JKV', 'JKQ', 'JX', 'JC', 'EP', 'EF', 'EC', 'ETN', 'ETM']

def remove_mecab(sentence):
    result = ''
    removed = [x[0] for x in m.pos(sentence) if x[1] not in remove_tags]
    result = ' '.join(removed)
    return result

In [59]:
m.pos('친구는 여행갔는데 나는 일만해')

[('친구', 'NNG'),
 ('는', 'JX'),
 ('여행', 'NNG'),
 ('갔', 'VV+EP'),
 ('는데', 'EC'),
 ('나', 'NP'),
 ('는', 'JX'),
 ('일', 'NNG'),
 ('만', 'JX'),
 ('해', 'VV+EC')]

In [58]:
while True:
    input_sentence = input("챗봇에게 하고싶은 말을 입력해주세요 : ")
    if input_sentence == "종료" :
        break
    # predict(input_sentence)
    sentence = input_sentence.split('.')
    for i in range(len(sentence)):
      if sentence[i] != '':
        removed_sentence = ''
        removed_sentence = remove_mecab(sentence[i])
        # print(removed_sentence)
        predict(removed_sentence)
        
    print("\n")

챗봇에게 하고싶은 말을 입력해주세요 : 아침부터 차도 막히고 일도 많고 힘들어
>> 입력하신 내용에서 우울함이 느껴집니다.



챗봇에게 하고싶은 말을 입력해주세요 : 어제도 오늘같고 오늘도 내일같고 매일매일이 똑같은 상황. 옆에 있는 친구는 여행도 가고 신나게 잘 사는데, 나는 매일 힘들고 지치네
>> 입력하신 내용에서 괴로움이 느껴집니다.

>> 입력하신 내용에서 기쁨이 느껴집니다.



챗봇에게 하고싶은 말을 입력해주세요 : 어제도 오늘같고 오늘도 내일같고 매일매일이 똑같은 상황이야 옆에 있는 친구는 여행도 가고 신나게 잘 사는데, 나는 매일 힘들고 지치네
>> 입력하신 내용에서 기쁨이 느껴집니다.



챗봇에게 하고싶은 말을 입력해주세요 : 친구는 여행갔는데 나는 일만해
>> 입력하신 내용에서 기쁨이 느껴집니다.



챗봇에게 하고싶은 말을 입력해주세요 : 옆에서 상사가 자꾸 머라고 잔소리 하네
>> 입력하신 내용에서 화남이 느껴집니다.



챗봇에게 하고싶은 말을 입력해주세요 : 누가 자꾸 내 발을 밞고 지나가
>> 입력하신 내용에서 화남이 느껴집니다.



챗봇에게 하고싶은 말을 입력해주세요 : 하루종일 회의가 연속이네
>> 입력하신 내용에서 지루함이 느껴집니다.



챗봇에게 하고싶은 말을 입력해주세요 : 멀 잘못 먹었는지 계속 속이 더부룩해
>> 입력하신 내용에서 중립이 느껴집니다.



챗봇에게 하고싶은 말을 입력해주세요 : 속이 더부룩하고 답답해
>> 입력하신 내용에서 괴로움이 느껴집니다.





KeyboardInterrupt: ignored

In [37]:
tok('오늘 정말 힘든 하루였어')

['▁오늘', '▁정말', '▁힘든', '▁하루', '였', '어']

In [40]:
tok('죽고싶다')

['▁죽', '고', '싶', '다']

In [41]:
tok('죽고싶어')

['▁죽', '고', '싶', '어']

In [48]:
tok.convert_tokens_to_ids(tok('죽고싶어'))

[4244, 5439, 6751, 6855]

In [49]:
tok.convert_tokens_to_ids(tok('오늘 정말 힘든 하루였어'))

[3419, 4102, 5212, 4937, 6944, 6855]