In [36]:
import torch
from torch import nn,optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
import data_preprocess
import os

In [37]:
use_cuda=torch.cuda.is_available()

In [38]:
# 将数据划分为训练集和测试集
X_train, X_test, Y_train, Y_test = data_preprocess.tensorFromData()
trainDataSet = data_preprocess.TextDataSet(X_train, Y_train)
testDataSet = data_preprocess.TextDataSet(X_test, Y_test)
trainDataLoader = DataLoader(trainDataSet, batch_size=16, shuffle=True)
testDataLoader = DataLoader(testDataSet, batch_size=16, shuffle=False)

In [39]:
# 获取字典
word_to_inx, inx_to_word = data_preprocess.get_dic()
len_dic = len(word_to_inx)

In [40]:
# 定义超参数
MAXLEN = 64
input_dim = MAXLEN
emb_dim = 128
num_epoches = 20
batch_size = 16

In [41]:
#定义模型
#     一维卷积带来的问题是需要通过设计不同 kernel_size 的 filter 获取不同宽度的视野。

class TextCNN_model(nn.Module):
    def __init__(self,len_dic,emb_dim,input_dim):
        super(TextCNN_model,self).__init__()
        self.embed=nn.Embedding(len_dic,emb_dim)#b,64序列长,128词向量长度
        self.conv1=nn.Sequential(
            nn.Conv1d(input_dim,256,1,1,padding=0),#b,256,128
            nn.ReLU(True),
            nn.MaxPool1d(2,2)#b,256,64池化窗口2*2，st->/2，step=2
        )
        self.conv2=nn.Sequential(
            nn.Conv1d(input_dim,256,3,1,padding=1),#b,256,128
            nn.ReLU(True),
            nn.MaxPool1d(2,2)#b,256,64
        )
        self.conv3=nn.Sequential(
            nn.Conv1d(input_dim,256,5,1,padding=2),#b,256,128
            nn.ReLU(True),
            nn.MaxPool1d(2,2)#b,256,64
        )
        #b,256,64+64+64
        #b,256*192
        self.drop=nn.Dropout(0.2)#b,256*192
        self.classify=nn.Linear(256*192,3)#b,3  feature_size * window_sizes
    def forward(self, x):
        x=self.embed(x)
        x1=self.conv1(x)
        x2=self.conv2(x)
        x3=self.conv3(x)
        x=torch.cat((x1,x2,x3),2)
        b,c,d=x.size()
        x=x.view(-1,c*d)
        x=self.drop(x)
        out=self.classify(x)
        return out