<a href="https://colab.research.google.com/github/learnerhouse/BiLSTM/blob/master/Bilstm_%2B_multi_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bilstm + multi classification

In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary
torch.manual_seed(1)

<torch._C.Generator at 0x7f1072607b10>

In [0]:
def argmax(vec):
    # return the argmax as a python int
    _, idx = torch.max(vec,1)
    return idx.item()

def prepare_sequence(seq, to_ix,max_len=-1):
    idxs = [to_ix[w] for w in seq]
    if max_len != -1:
        idxs.extend([0]*(max_len-len(seq)))
    return torch.tensor(idxs, dtype=torch.long)

In [0]:
class BiLSTM_MCLS(nn.Module):

    # 初始化变量以及定义几个隐层
    def __init__(self, vocab_size, embedding_dim, hidden_dim,label_tree,seq_max_len=10):
        super(BiLSTM_MCLS, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.label_tree = label_tree
        self.seq_max_len = seq_max_len
        self.height = len(self.label_tree)
        self.loss = nn.CrossEntropyLoss()
        self.word_embeds = nn.Embedding(vocab_size,embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2,
                            num_layers=1, bidirectional=True,batch_first=True)
        
        # Maps the output of the LSTM into tag space.
        self.hidden_cls = []
        self.label_cls  = []
        # for index,c_v in enumerate(label_tree):
        #     self.hidden_cls.append(nn.Linear(hidden_dim, hidden_dim))
        #     self.label_cls.append(nn.Linear(hidden_dim, len(c_v)))

        self.hidden_cls1 = nn.Linear(hidden_dim, hidden_dim)
        self.hidden_cls2 = nn.Linear(hidden_dim, hidden_dim)
        self.label_cls1  = nn.Linear(hidden_dim, len(label_tree[0]))
        self.label_cls2  = nn.Linear(hidden_dim, len(label_tree[1]))
        
        self.attention_layer = nn.Sequential(
            nn.Linear(self.hidden_dim//2, self.hidden_dim//2),
            nn.ReLU(inplace=True)
        )
        self.hidden = self.init_hidden()
    
    # 初始化隐层
    def init_hidden(self):
        return (torch.randn(2, 1, self.hidden_dim // 2),
                torch.randn(2, 1, self.hidden_dim // 2))

    # 获取特征向量
    def _get_lstm_feature(self,sentence):
        self.hidden = self.init_hidden()
        embeds = self.word_embeds(sentence).view(1,len(sentence), -1)
        lstm_out, self.hidden = self.lstm(embeds, self.hidden)
        lstm_out = lstm_out.view(len(sentence), self.hidden_dim)
        return lstm_out
    
    def attention_net_with_w(self, lstm_out):
        '''
        :param lstm_out: [batch_size, time_step, hidden_dims * num_directions(=2)]
        :return:
        '''
        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
        # h [batch_size, time_step, hidden_dims]
        h = lstm_tmp_out[0] + lstm_tmp_out[1]
        # atten_w [batch_size, time_step, hidden_dims]
        atten_w = self.attention_layer(h)
        # m [batch_size, time_step, hidden_dims]
        m = nn.Tanh()(h)
        # atten_context [batch_size, time_step, time_step]
        atten_context = torch.bmm(m, atten_w.transpose(1, 2))
        # softmax_w [batch_size, time_step, time_step]
        softmax_w = F.softmax(atten_context, dim=-1)
        # context [batch_size, hidden_dims, time_step]
        context = torch.bmm(h.transpose(1,2), softmax_w)
        context_with_attn = h.transpose(1, 2) + context
        # result [batch_size, hidden_dims]
        # result = torch.sum(context, dim=-1)
        result = torch.sum(context_with_attn, dim=-1)
        return result

    # 获取多级分类器的标签向量
    def _get_multi_label(self,feature):
        # for index,layer in enumerate(self.hidden_cls):
        #     if index >= 1:
        #         tmp_hidden = torch.cat((tmp_hidden,torch.sigmoid(layer(feature))))
        #         fixed_hidden = torch.sigmoid(self.label_cls[index](tmp_hidden)) 
        #         multi_label[index] = fixed_hidden[-1]
        #     else:
        #         tmp_hidden = torch.sigmoid(layer(feature)) 
        #         fixed_hidden = torch.sigmoid(self.label_cls[index](tmp_hidden))
        #         multi_label[index] = fixed_hidden[-1]
        
        # hidden_att = self.attention_net_with_w(feature.view(1,10,20).permute(1, 0, 2))
        # print (hidden_att.shape)  # 10*10
        
        tmp_hidden1 = torch.relu(self.hidden_cls1(feature))
        fixed_hidden1 = torch.sigmoid(self.label_cls1(tmp_hidden1))

        tmp_hidden2 = torch.cat((tmp_hidden1,torch.sigmoid(self.hidden_cls2(feature))))
        fixed_hidden2 = torch.sigmoid(self.label_cls2(tmp_hidden2))

        return fixed_hidden1,fixed_hidden2,[argmax(fixed_hidden1.view(1,-1)),argmax(fixed_hidden2.view(1,-1))]
    
    # 计算交叉熵
    def my_coss_entropy(self,sentence,labels):
        lstm_feature = self._get_lstm_feature(sentence)
        pre_label1,pre_label2,label_ids = self._get_multi_label(lstm_feature)
        score = self.loss(pre_label1.view(1,-1),labels[0].view(1)) + self.loss(pre_label2.view(1,-1),labels[1].view(1))
        return score
    
    # 计算前向网络，给出预测标签的结果
    def forward (self,sentence):
        # print ("句子编码:",sentence)
        lstm_feature = self._get_lstm_feature(sentence)
        # print ("句子嵌入:",lstm_feature)
        label1,label2,label_ids = self._get_multi_label(lstm_feature)
        return label_ids



测试代码

In [63]:
EMBEDDING_DIM = 10
HIDDEN_DIM = 10
max_len = -1

# 标签树
label_tree = [["nature","science"],["fruit","company"]]

# Make up some training data
training_data = [(
    "How much is the apple".split(),
    "nature_fruit".split("_")
), (
    "Apple is a great company in the world".split(),
    "science_company".split("_")
)]

# 词嵌入
word_to_ix = {}
for sentence, tags in training_data:
    for word in sentence:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

# 标签编码
def label_encode(label_names,label_tree):
    label_vectors = []
    label_ids = []
    for index,label in enumerate (label_names):
        vector = [0] * len(label_tree[index])
        vector[label_tree[index].index(label)] = 1
        label_vectors.append(vector)
        label_ids.append(label_tree[index].index(label))
    return label_vectors,label_ids

test_tag,_ = label_encode("science_company".split("_"),label_tree)

model = BiLSTM_MCLS(len(word_to_ix), EMBEDDING_DIM, HIDDEN_DIM,label_tree)
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-4)


# Check predictions before training
with torch.no_grad():
    precheck_sent = prepare_sequence(training_data[1][0], word_to_ix,max_len=max_len)
    print (precheck_sent)
    precheck_tags = torch.tensor(test_tag, dtype=torch.long)
    print(model(precheck_sent))

# Make sure prepare_sequence from earlier in the LSTM section is loaded
for epoch in range(
        800):  # again, normally you would NOT do 300 epochs, it is toy data
    for sentence, tags in training_data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is,
        # turn them into Tensors of word indices.
        sentence_in = prepare_sequence(sentence, word_to_ix,max_len=max_len)
        _,tags_id = label_encode(tags,label_tree)
        targets = torch.tensor(tags_id, dtype=torch.long)
        # print (sentence, targets)

        # Step 3. Run our forward pass.
        loss = model.my_coss_entropy(sentence_in, targets)
        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        if epoch % 10 == 0: print (loss)
        loss.backward()
        optimizer.step()

# Check predictions after training
with torch.no_grad():
    precheck_sent = prepare_sequence(training_data[0][0], word_to_ix,max_len=max_len)
    print(model(precheck_sent))


tensor([ 5,  2,  6,  7,  8,  9,  3, 10])
[9, 2]
tensor(5.2797, grad_fn=<AddBackward0>)
tensor(6.2059, grad_fn=<AddBackward0>)
tensor(5.2502, grad_fn=<AddBackward0>)
tensor(6.1544, grad_fn=<AddBackward0>)
tensor(5.2105, grad_fn=<AddBackward0>)
tensor(6.1239, grad_fn=<AddBackward0>)
tensor(5.1951, grad_fn=<AddBackward0>)
tensor(6.0954, grad_fn=<AddBackward0>)
tensor(5.1316, grad_fn=<AddBackward0>)
tensor(6.0417, grad_fn=<AddBackward0>)
tensor(5.0851, grad_fn=<AddBackward0>)
tensor(5.9818, grad_fn=<AddBackward0>)
tensor(5.0695, grad_fn=<AddBackward0>)
tensor(5.8686, grad_fn=<AddBackward0>)
tensor(5.0346, grad_fn=<AddBackward0>)
tensor(5.7419, grad_fn=<AddBackward0>)
tensor(4.8188, grad_fn=<AddBackward0>)
tensor(5.5339, grad_fn=<AddBackward0>)
tensor(4.9406, grad_fn=<AddBackward0>)
tensor(5.2692, grad_fn=<AddBackward0>)
tensor(4.5735, grad_fn=<AddBackward0>)
tensor(4.9892, grad_fn=<AddBackward0>)
tensor(4.4322, grad_fn=<AddBackward0>)
tensor(4.7848, grad_fn=<AddBackward0>)
tensor(4.3474, g

In [60]:
m = nn.MaxPool2d(3, stride=2)
input = torch.randn(20, 16, 50)
print (input.shape)
output = m(input)
print (output.shape)

torch.Size([20, 16, 50])
torch.Size([20, 7, 24])
