In [22]:
import pathlib

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
import pandas as pd

from models.kobert.utils import get_tokenizer
from models.kobert.pytorch_kobert import get_pytorch_kobert_model

##GPU 사용 시
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

bertmodel, vocab = get_pytorch_kobert_model()

d = {"Surprise" : 7, "Happiness": 6, "Disgust": 5, "Neutral": 4, "Fear": 3, "Angry": 2, "Sadness": 1}

tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [np.int32(i[label_idx]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))

## Setting parameters
max_len = 64
batch_size = 64
warmup_ratio = 0.1
num_epochs = 5
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5

class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=7,
                 dr_rate=None,
                 paras=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

model = torch.load('checkpoints/kobert_emotion_classification.pth')


# 테스트 문장 예측
# def test(test_sentence):
test_sentence = "기분 최고야"
test_label = 7 # 실제 질병

unseen_test = pd.DataFrame([[test_sentence, test_label]], columns = [['발화문', '상황']])
unseen_values = unseen_test.values
test_set = BERTDataset(unseen_values, 0, 1, tok, max_len, True, False)
test_input = DataLoader(test_set, batch_size=1, num_workers=5)

for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_input):
    token_ids = token_ids.long().to(device)
    segment_ids = segment_ids.long().to(device)
    valid_length= valid_length
    out = model(token_ids, valid_length, segment_ids)
    print(out)

ModuleNotFoundError: No module named 'gluonnlp'

In [20]:
!deactivate

In [21]:
!pip list

Package           Version
----------------- -----------
certifi           2020.12.5
chardet           3.0.4
click             7.1.2
Cython            0.29.23
filelock          3.0.12
gluonnlp          0.10.0
graphviz          0.8.4
idna              2.6
joblib            1.0.1
mxnet             1.7.0.post2
numpy             1.16.6
packaging         20.9
pandas            1.2.4
pip               21.1.1
pyparsing         2.4.7
python-dateutil   2.8.1
pytz              2021.1
regex             2021.4.4
requests          2.18.4
sacremoses        0.0.45
sentencepiece     0.1.95
setuptools        41.2.0
six               1.16.0
tokenizers        0.8.1rc1
torch             1.8.1+cu102
tqdm              4.60.0
transformers      3.0.2
typing-extensions 3.10.0.0
urllib3           1.22
