In [43]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import torch.nn.functional as F

dtype = torch.FloatTensor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.

# TextCNN Parameter
embedding_size = 2
sequence_length = len(sentences[0])  # every sentences contains sequence_length(=3) words
num_classes = 2  # 0 or 1
batch_size = 3

word_list = " ".join(sentences).split()
vocab = list(set(word_list))
word2idx = {w: i for i, w in enumerate(vocab)}
vocab_size = len(vocab)
vocab
word2idx

{'baseball': 0,
 'love': 1,
 'this': 2,
 'for': 3,
 'that': 4,
 'loves': 5,
 'you': 6,
 'sorry': 7,
 'is': 8,
 'i': 9,
 'she': 10,
 'likes': 11,
 'he': 12,
 'me': 13,
 'hate': 14,
 'awful': 15}

In [44]:
def make_data(sentences, labels):
    inputs = []
    for sen in sentences:
        inputs.append([word2idx[n] for n in sen.split()])

    targets = []
    for out in labels:
        targets.append(out)  # To using Torch Softmax Loss function
    return inputs, targets


input_batch, target_batch = make_data(sentences, labels)
input_batch, target_batch = torch.LongTensor(input_batch), torch.LongTensor(target_batch)
print(input_batch)
print(target_batch)
dataset = Data.TensorDataset(input_batch, target_batch)
loader = Data.DataLoader(dataset, batch_size, True)

tensor([[ 9,  1,  6],
        [12,  5, 13],
        [10, 11,  0],
        [ 9, 14,  6],
        [ 7,  3,  4],
        [ 2,  8, 15]])
tensor([1, 1, 1, 0, 0, 0])


In [45]:
class TextCNN(nn.Module):
    def __init__(self,hidden_dim,n_layers,output_size):
        super(TextCNN, self).__init__()
        self.W = nn.Embedding(vocab_size, embedding_size)
        output_channel = 3
        self.lstm1=nn.LSTM(input_size=embedding_size, hidden_size=hidden_dim, num_layers=2,
                            dropout=0.2,batch_first=True,
                            bidirectional=True)
        # self.conv1=nn.Sequential(
        #     nn.Linear(hidden_dim*2, output_size),
        #     nn.ReLU(),
        # )
        self.linear=nn.Linear(hidden_dim*2, output_size)
        self.relu=    nn.ReLU()
        self.conv = nn.Sequential(
            # conv : [input_channel(=1), output_channel, (filter_height, filter_width), stride=1]
            nn.Conv2d(1, output_channel, (2, embedding_size)),
            nn.ReLU(),
            # pool : ((filter_height, filter_width))
            nn.MaxPool2d((2, 1)),
        )
        # fc
        self.fc = nn.Linear(output_channel, num_classes)

    def forward(self, X):
        '''
        X: [batch_size, sequence_length]
        '''
        batch_size = X.shape[0]
        embedding_X = self.W(X)  # [batch_size, sequence_length, embedding_size]
        embedding_X,(h,c)=self.lstm1(embedding_X)
        embedding_X=self.linear(embedding_X)
        embedding_X=self.relu(embedding_X)
        # embedding_X=self.conv1(embedding_X)
        embedding_X = embedding_X.unsqueeze(1)  # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]
        conved = self.conv(embedding_X)  # [batch_size, output_channel, 1, 1]
        flatten = conved.view(batch_size, -1)  # [batch_size, output_channel*1*1]
        output = self.fc(flatten)
        return output

hidden_dim=10
n_layers=2
output_size=embedding_size
model = TextCNN(hidden_dim,n_layers,output_size).to(device)
print(model)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)


TextCNN(
  (W): Embedding(16, 2)
  (lstm1): LSTM(2, 10, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (linear): Linear(in_features=20, out_features=2, bias=True)
  (relu): ReLU()
  (conv): Sequential(
    (0): Conv2d(1, 3, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=3, out_features=2, bias=True)
)


In [46]:
# Training
for epoch in range(5000):
    for batch_x, batch_y in loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        pred = model(batch_x)
        loss = criterion(pred, batch_y)
        if (epoch + 1) % 1000 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'loss =', '{:.6f}'.format(loss))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Test
test_text = 'i hate me'
tests = [[word2idx[n] for n in test_text.split()]]
test_batch = torch.LongTensor(tests).to(device)
# Predict
model = model.eval()
predict = model(test_batch).data.max(1, keepdim=True)[1]
if predict[0][0] == 0:
    print(test_text, "is Bad Mean...")
else:
    print(test_text, "is Good Mean!!")

Epoch: 1000 loss = 0.019376
Epoch: 1000 loss = 0.038564
Epoch: 2000 loss = 0.011283
Epoch: 2000 loss = 0.005643
Epoch: 3000 loss = 0.001998
Epoch: 3000 loss = 0.003990
Epoch: 4000 loss = 0.000755
Epoch: 4000 loss = 0.001508
Epoch: 5000 loss = 0.000584
Epoch: 5000 loss = 0.000292
i hate me is Good Mean!!
