In [73]:
import sentencepiece as spm
import pandas as pd
import torch
import torch.utils.data as data
from torch.utils.data import DataLoader, random_split
from torch import nn
import time
import gc

#load sentencepiece model
vocab_file = "VocabModel/kowiki.model"
vocab = spm.SentencePieceProcessor()
vocab.load(vocab_file)
print(len(vocab))

8007


In [79]:
max_size = 128
batch_size = 512

class CustomDataSetPadding(data.Dataset):
    def __init__(self, file_name):
        self.data = pd.read_csv(file_name)
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        line = self.data.loc[index]
        sentence = line["Sentence"]
        emotion = line["Emotion"]
        emotion = emotion[1:-1].split(", ")
        for i in range(len(emotion)):
            emotion[i] = float(emotion[i])
        a = emotion.index(max(emotion))
        # 라벨을 one hot encoding 형태로 변환
        emotion = [0, 0, 0, 0, 0, 0, 0]
        emotion[a] = 1
        # 문장을 형태로 단위로 분할
        ids = vocab.encode_as_ids(sentence)

        if len(ids) > max_size:
            ids = ids[0:max_size-1]
        for idx in range(max_size-len(ids)):
            ids.insert(0, 0)

        ids = [ids]
        ids = torch.tensor(ids)
        emotion= torch.tensor(emotion)
        return ids, emotion

#zero padding
dataset = CustomDataSetPadding("datasets/original data/음성대화모음집.csv")
train_size = int(dataset.__len__() * 0.8) #30876
validation_size = int(dataset.__len__()* 0.1)
test_size = int(dataset.__len__() * 0.1) #7718
train_size += dataset.__len__() - train_size - test_size - validation_size

train_dataset , validation_dataset, test_dataset = random_split(dataset, [train_size, validation_size,test_size])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

print("train dataset size: ", len(train_dataset))
print("validation dataset size: ", len(validation_dataset))
print("test dataset size: ", len(test_dataset))

train dataset size:  46498
test dataset size:  5812
train dataset size:  46498
validation dataset size:  5812
test dataset size:  5812


In [80]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device_name = torch.cuda.get_device_name(0)
device_count = torch.cuda.device_count()

print(f"device: {device}\ndevice name: {device_name}\ndevice count: {device_count}")

device: cuda
device name: NVIDIA GeForce RTX 3070 Laptop GPU
device count: 1


In [81]:
class LSTMCell(nn.Module):
    def __init__(self, input_dim, emb_dim, output_dim, num_layers, bias, batch_first, dropout, bidirectional, proj_size):
        super(LSTMCell, self).__init__()
        self.LSTM1 = nn.LSTM(input_size= input_dim,
                            hidden_size= emb_dim,
                            num_layers=num_layers,
                            bias = bias,
                            batch_first=batch_first,
                            dropout=dropout,
                            bidirectional=bidirectional,
                            proj_size=proj_size)

        self.LSTM2 = nn.LSTM(input_size= emb_dim,
                            hidden_size= emb_dim,
                            num_layers=num_layers,
                            bias = bias,
                            batch_first=batch_first,
                            dropout=dropout,
                            bidirectional=bidirectional,
                            proj_size=proj_size)

        self.LSTM3 = nn.LSTM(input_size= emb_dim,
                            hidden_size= emb_dim,
                            num_layers=num_layers,
                            bias = bias,
                            batch_first=batch_first,
                            dropout=dropout,
                            bidirectional=bidirectional,
                            proj_size=proj_size)

        self.linear = nn.Linear(emb_dim, output_dim)

    def forward(self, input):
        output, (ht, ct) = self.LSTM1(input)
        output, (ht, ct) = self.LSTM2(output, (ht, ct))
        output, (_, _) = self.LSTM3(output, (ht, ct))
        output = self.linear(output) #output = self.linear(ht[-1])
        return output

In [82]:
#train parameters
lr = 0.001
epochs = 250

# lstm parameters
emb_dim = 512
out_dim = 7
num_layers = 1
bias = True
batch_first = True #N:batch size L:sequence Length Hin:input size
dropout = 0
bidirectional = False
proj_size = 0

model = LSTMCell(max_size,
                    emb_dim,
                    out_dim,
                    num_layers,
                    bias,
                    batch_first,
                    dropout,
                    bidirectional,
                    proj_size).to(device)

batch, (text, emo) = next(enumerate(train_dataloader))

print("sample input size: ", text.size())
print(text)
print("sample label size: ", emo.size())
print(emo)

sample input size:  torch.Size([512, 1, 128])
tensor([[[   0,    0,    0,  ...,   32,  849, 4311]],

        [[   0,    0,    0,  ..., 4313, 3639, 4311]],

        [[   0,    0,    0,  ...,  513, 3663, 3629]],

        ...,

        [[   0,    0,    0,  ..., 3350, 3663, 3629]],

        [[   0,    0,    0,  ..., 3807,  302, 4311]],

        [[   0,    0,    0,  ..., 6051, 3659, 3629]]])
sample label size:  torch.Size([512, 7])
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 1,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 1, 0, 0],
        ...,
        [0, 0, 0,  ..., 1, 0, 0],
        [0, 1, 0,  ..., 0, 0, 0],
        [0, 1, 0,  ..., 0, 0, 0]])


In [83]:
model_dict = 0
torch.cuda.empty_cache()
gc.collect()
test_loss2 = []

def train(train_dataloader, val_dataloader, model, epochs, lr):
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    best_correction = 0
    for epoch in range(epochs):
        now = time.time()
        avg_loss = 0
        total_batch = len(train_dataloader) #965
        count = 0
        correct = 0
        for batch, (text, emo) in enumerate(train_dataloader):
            text = text.type(torch.FloatTensor).to(device)
            emo = emo.type(torch.FloatTensor).to(device)
            optimizer.zero_grad()

            pred = model(text)
            pred = torch.squeeze(pred, 1)
            loss = criterion(pred, emo)

            loss.backward()
            optimizer.step()

            avg_loss += loss / total_batch
        with torch.no_grad():
            for batch, (text, emo) in enumerate(val_dataloader):
                text = text.type(torch.FloatTensor).to(device)
                emo = emo.type(torch.FloatTensor).to(device)
                pred = model(text)
                pred = torch.squeeze(pred, 1)
                for i in range(pred.size()[0]):
                    count += 1
                    if torch.argmax(pred[i]) == torch.argmax(emo[i]):
                        correct += 1
        print("[Epoch: {:>4}] \t loss = {:.4f} \t correct {:.4f} \t time = {:.4f}"
              .format(epoch + 1, avg_loss.data, correct/count ,time.time()-now))
        ''' # save model
        if(correct/count > best_correction):
            best_correction = correct/count
            torch.save(model.state_dict(), "models/lstm_encoder_onehot_3layer_batch512.cpkt")
        '''
train(train_dataloader, validation_dataloader, model, epochs, lr)

[Epoch:    1] 	 loss = 1.6263 	 correct 0.3828 	 time = 6.1800
[Epoch:    2] 	 loss = 1.5742 	 correct 0.3976 	 time = 5.9640
[Epoch:    3] 	 loss = 1.5642 	 correct 0.4028 	 time = 5.9701
[Epoch:    4] 	 loss = 1.5469 	 correct 0.4061 	 time = 5.8519
[Epoch:    5] 	 loss = 1.5320 	 correct 0.4085 	 time = 6.0040
[Epoch:    6] 	 loss = 1.5258 	 correct 0.3987 	 time = 5.9090
[Epoch:    7] 	 loss = 1.5133 	 correct 0.4246 	 time = 5.9090
[Epoch:    8] 	 loss = 1.4975 	 correct 0.4289 	 time = 6.0170
[Epoch:    9] 	 loss = 1.4805 	 correct 0.4355 	 time = 5.8917
[Epoch:   10] 	 loss = 1.4600 	 correct 0.4424 	 time = 5.9731
[Epoch:   11] 	 loss = 1.4449 	 correct 0.4491 	 time = 6.0270
[Epoch:   12] 	 loss = 1.4217 	 correct 0.4403 	 time = 5.9660
[Epoch:   13] 	 loss = 1.4037 	 correct 0.4639 	 time = 5.9700
[Epoch:   14] 	 loss = 1.3850 	 correct 0.4670 	 time = 5.8880
[Epoch:   15] 	 loss = 1.3542 	 correct 0.4683 	 time = 5.9800
[Epoch:   16] 	 loss = 1.3305 	 correct 0.4811 	 time =

KeyboardInterrupt: 

In [84]:
def test(dataloader, model):
    model.eval()
    now = time.time()
    with torch.no_grad():
        correct = 0
        count = 0
        for batch, (text, emo) in enumerate(dataloader):
            text = text.type(torch.FloatTensor).to(device)
            emo = emo.type(torch.FloatTensor).to(device)

            pred = model(text)
            pred = torch.squeeze(pred, 1)
            for i in range(pred.size()[0]):
               count += 1
               if torch.argmax(pred[i]) == torch.argmax(emo[i]):
                   correct += 1
        print("Test result: ", correct/count*100, "time = ", time.time() - now)

test(test_dataloader, model)

Test result:  85.92567102546455 time =  1.3069953918457031
