In [1]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
import data_preprocess
import os

In [2]:
use_cuda = torch.cuda.is_available()

In [3]:
# 将数据划分为训练集和测试集
X_train, X_test, Y_train, Y_test = data_preprocess.tensorFromData()
trainDataSet = data_preprocess.TextDataSet(X_train, Y_train)
testDataSet = data_preprocess.TextDataSet(X_test, Y_test)
trainDataLoader = DataLoader(trainDataSet, batch_size=16, shuffle=True)
testDataLoader = DataLoader(testDataSet, batch_size=16, shuffle=False)

In [4]:
# 获取字典
word_to_inx, inx_to_word = data_preprocess.get_dic()
len_dic = len(word_to_inx)

# 定义超参数
MAXLEN = 64
input_dim = MAXLEN
emb_dim = 128
num_epoches = 20
batch_size = 16

In [5]:

# 定义模型
class CNN_BiLSTM_Concat_model(nn.Module):
    def __init__(self, len_dic, emb_dim, input_dim):
        super(CNN_BiLSTM_Concat_model, self).__init__()
        self.embed = nn.Embedding(len_dic, emb_dim)  # # batchsize,64（序列长）,128(词向量长度)
        self.conv = nn.Sequential(
            nn.Conv1d(input_dim, 256, 3, 1, 1),  # b,256,128
            nn.ReLU(True),
            nn.MaxPool1d(2, 2)  # b,256,64 ->b,256*64
        )
        self.linear_cnn=nn.Linear(256*64,256)#b,256
        #64,b,128
        self.bilstm = nn.LSTM(input_size=128, hidden_size=256, dropout=0.2,bidirectional=True) # 64,b,256*2 ->b,64,256*2 ->b,64*256*2
        self.linear_lstm=nn.Linear(64*256*2,256)  #b,256
        #b,256+256
        self.classify = nn.Linear(256+256, 3)  # b,3

    def forward(self, x):
        x= self.embed(x)
        out1= self.conv(x)
        b,c,d=out1.size()
        out1=out1.view(b,c*d)
        out1=self.linear_cnn(out1)

        x=x.permute(1,0,2)
        out2,_=self.bilstm(x)
        out2=out2.permute(1,0,2).contiguous()
        b,c,d=out2.size()
        out2=out2.view(b,c*d)
        out2=self.linear_lstm(out2)

        out = torch.cat((out1,out2),1)
        # print(out.size())
        out = self.classify(out)
        # print(out.size())
        return out

In [8]:
if use_cuda:
    model = CNN_BiLSTM_Concat_model(len_dic, emb_dim, input_dim).cuda()
else:
    model = CNN_BiLSTM_Concat_model(len_dic, emb_dim, input_dim)

criterion = nn.CrossEntropyLoss()
optimzier = optim.Adam(model.parameters(), lr=1e-3)
best_acc = 0
best_model = None
for epoch in range(num_epoches):# 数据集迭代 https://www.jianshu.com/p/043083d114d4
    #初始化loss和acc
    train_loss = 0
    train_acc = 0
#     通过 module.train() 和 module.eval() 来切换模型的 训练测试阶段。
    model.train()
    
    #steps print
    for i, data in enumerate(trainDataLoader):
        x, y = data
        if use_cuda:
            x, y = Variable(x).cuda(), Variable(y).cuda()
        else:
            x, y = Variable(x), Variable(y)
            
        # forward
        out = model(x)#预测
        loss = criterion(out, y)#计算loss
        
        train_loss += loss.item() * len(y)
        _, pre = torch.max(out, 1)
        num_acc = (pre == y).sum()
        train_acc += num_acc.item()
        # backward
        optimzier.zero_grad()
        loss.backward()
        optimzier.step()
        if (i + 1) % 100 == 0:#每100步计算精度
            print('[{}/{}],train loss is:{:.6f},train acc is:{:.6f}'.format(i, len(trainDataLoader),
                                                                            train_loss / (i * batch_size),
                                                                            train_acc / (i * batch_size)))
    print(
        'epoch:[{}],train loss is:{:.6f},train acc is:{:.6f}'.format(epoch,
                                                                     train_loss / (len(trainDataLoader) * batch_size),
                                                                     train_acc / (len(trainDataLoader) * batch_size)))
    #eval()时,框架会自动把BN和DropOut固定住,不会取平均,而是用训练好的值
    model.eval()
    eval_loss = 0
    eval_acc = 0
    for i, data in enumerate(testDataLoader):
        x, y = data
        if use_cuda:
            x = Variable(x, volatile=True).cuda()
            y = Variable(y, volatile=True).cuda()
        else:
            x = Variable(x, volatile=True)
            y = Variable(y, volatile=True)
        out = model(x)
        loss = criterion(out, y)
        eval_loss += loss.item() * len(y)
        _, pre = torch.max(out, 1)
        num_acc = (pre == y).sum()
        eval_acc += num_acc.item()
    print('test loss is:{:.6f},test acc is:{:.6f}'.format(
        eval_loss / (len(testDataLoader) * batch_size),
        eval_acc / (len(testDataLoader) * batch_size)))
    if best_acc<(eval_acc / (len(testDataLoader) * batch_size)):
        best_acc=eval_acc / (len(testDataLoader) * batch_size)
        best_model=model.state_dict()
        # print(best_model)
        print('best acc is {:.6f},best model is changed'.format(best_acc))

  "num_layers={}".format(dropout, num_layers))


[99/786],train loss is:1.894302,train acc is:0.484848
[199/786],train loss is:1.394250,train acc is:0.530151
[299/786],train loss is:1.206809,train acc is:0.557483
[399/786],train loss is:1.110612,train acc is:0.570645
[499/786],train loss is:1.044849,train acc is:0.587174
[599/786],train loss is:0.998292,train acc is:0.599228
[699/786],train loss is:0.961464,train acc is:0.607922
epoch:[0],train loss is:0.935291,train acc is:0.613391




test loss is:0.828618,test acc is:0.626586
best acc is 0.626586,best model is changed
[99/786],train loss is:0.594188,train acc is:0.761364
[199/786],train loss is:0.604486,train acc is:0.750628
[299/786],train loss is:0.600047,train acc is:0.747701
[399/786],train loss is:0.594671,train acc is:0.748434
[499/786],train loss is:0.599063,train acc is:0.742986
[599/786],train loss is:0.605668,train acc is:0.741235
[699/786],train loss is:0.610550,train acc is:0.737303
epoch:[1],train loss is:0.610173,train acc is:0.733063
test loss is:0.877052,test acc is:0.626904
best acc is 0.626904,best model is changed
[99/786],train loss is:0.420149,train acc is:0.842803
[199/786],train loss is:0.407925,train acc is:0.833543
[299/786],train loss is:0.409311,train acc is:0.832776
[399/786],train loss is:0.414454,train acc is:0.828634
[499/786],train loss is:0.418837,train acc is:0.825401
[599/786],train loss is:0.421121,train acc is:0.824082
[699/786],train loss is:0.418821,train acc is:0.825554
epoch

[599/786],train loss is:0.043677,train acc is:0.990818
[699/786],train loss is:0.046979,train acc is:0.990075
epoch:[17],train loss is:0.048095,train acc is:0.987198
test loss is:4.547723,test acc is:0.645622
[99/786],train loss is:0.026183,train acc is:0.999369
[199/786],train loss is:0.021360,train acc is:0.996545
[299/786],train loss is:0.028130,train acc is:0.994147
[399/786],train loss is:0.032862,train acc is:0.992481
[499/786],train loss is:0.037034,train acc is:0.991984
[599/786],train loss is:0.039036,train acc is:0.991131
[699/786],train loss is:0.042473,train acc is:0.989986
epoch:[18],train loss is:0.042539,train acc is:0.987675
test loss is:5.601687,test acc is:0.643084
[99/786],train loss is:0.043565,train acc is:0.999369
[199/786],train loss is:0.038253,train acc is:0.994661
[299/786],train loss is:0.038100,train acc is:0.992684
[399/786],train loss is:0.033750,train acc is:0.992638
[499/786],train loss is:0.044056,train acc is:0.991733
[599/786],train loss is:0.041106,t