In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import nltk
import random
import numpy as np
from collections import Counter, OrderedDict
import re
from copy import deepcopy
import pandas as pd
import numpy as np
from torch.nn.utils.rnn import PackedSequence,pack_padded_sequence
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline

flatten = lambda l: [item for sublist in l for item in sublist]

In [None]:
USE_CUDA = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if USE_CUDA else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if USE_CUDA else torch.ByteTensor

In [None]:
MIN_LENGTH=3
MAX_LENGTH=120

In [None]:
def prepare_sequence(seq, to_index):
    idxs = list(map(lambda w: to_index[w] if w in to_index.keys() else to_index["<unk>"], seq))
    return LongTensor(idxs)

In [None]:
def getBatch(batch_size,train_data):
    random.shuffle(train_data)
    sindex=0
    eindex=batch_size
    while eindex < len(train_data):
        batch = train_data[sindex:eindex]
        temp = eindex
        eindex = eindex+batch_size
        sindex = temp
        yield batch
    
    if eindex >= len(train_data):
        batch = train_data[sindex:]
        yield batch

In [None]:
def normalize_string(s):
    s = re.sub(r"\s+", r" ", s).strip()
    return s

In [None]:
def pad_to_batch(batch,x_to_ix):
    
    sorted_batch =  sorted(batch, key=lambda b:b[0].size(1),reverse=True) # sort by len
    
    x1, x2, y = list(zip(*sorted_batch))
    max_x1 = max([s.size(1) for s in x1])
    max_x2 = max([s.size(1) for s in x2])
    
    x1_p, x2_p, y_p=[],[],[]
    for i in range(len(batch)):
        if x1[i].size(1)<max_x1:
            x1_p.append(torch.cat([x1[i],Variable(LongTensor([x_to_ix['<PAD>']]*(max_x1-x1[i].size(1)))).view(1,-1)],1))
        else:
            x1_p.append(x1[i])
        
        if x2[i].size(1)<max_x2:
            x2_p.append(torch.cat([x2[i],Variable(LongTensor([x_to_ix['<PAD>']]*(max_x2-x2[i].size(1)))).view(1,-1)],1))
        else:
            x2_p.append(x2[i])
            
        y_p.append(torch.tensor(y[i]))
        
        
    x1_var = torch.cat(x1_p)
    x2_var = torch.cat(x2_p)
    target_var = torch.tensor(y_p)
    x1_len = [list(map(lambda s: s ==0, t.data)).count(False) for t in x1_var]
    x2_len = [list(map(lambda s: s ==0, t.data)).count(False) for t in x2_var]
    
    return x1_var, x2_var, target_var, x1_len, x2_len

In [None]:
df = pd.read_csv('data/atec_nlp_sim_train.csv', sep='\t', names=['number', "sen1", "sen2", "label"])

In [None]:
len(df)

In [None]:
%%time

X1_r = list(map(normalize_string, df.sen1.tolist()))
X2_r = list(map(normalize_string, df.sen2.tolist()))
y_r = df.label.tolist()
print(len(X1_r), len(X2_r), len(y_r))
print(X1_r[0], "@@@@", X2_r[0], "@@@@",y_r[0])

In [None]:
vocab = list(set(flatten(X1_r + X2_r)))
len(vocab)

In [None]:
source2index = {'<PAD>':0,'<UNK>':1,'<s>':2,'</s>':3}
for vo in vocab:
    if vo not in source2index.keys():
        source2index[vo]=len(source2index)
index2source = {v:k for k,v in source2index.items()}

In [None]:
index2source[source2index[","]]

In [None]:
%%time
X1_p, X2_p = [],[]
ta_p = []

for s1, s2, ta in zip(X1_r, X2_r, y_r):
    X1_p.append(prepare_sequence(s1,source2index).view(1,-1))
    X2_p.append(prepare_sequence(s2,source2index).view(1,-1))
    ta_p.append(ta)


In [None]:
train_data = list(zip(X1_p, X2_p, ta_p))
print(train_data[0])

In [None]:
EPOCH=50
BATCH_SIZE = 64
EMBEDDING_SIZE = 128
HIDDEN_SIZE = 256
LR = 0.01
DECODER_LEARNING_RATIO=5.0
RESCHEDULED=False

In [None]:
batch = next(getBatch(BATCH_SIZE,train_data))

In [None]:
batch[0][0].size(1)

In [None]:
pad_to_batch(batch,source2index)

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size,hidden_size, n_layers=1,bidirec=False):
        super(Encoder, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_size, embedding_size)
        
        if bidirec:
            self.n_direction = 2 
            self.gru = nn.GRU(embedding_size, hidden_size, n_layers, batch_first=True,bidirectional=True)
        else:
            self.n_direction = 1
            self.gru = nn.GRU(embedding_size, hidden_size, n_layers, batch_first=True)
    
    def init_hidden(self,inputs):  # input.size(0) = batch_size
        hidden = Variable(torch.zeros(self.n_layers*self.n_direction,inputs.size(0),self.hidden_size))
        return hidden.cuda() if USE_CUDA else hidden
    
    def init_weight(self):
        self.embedding.weight = nn.init.xavier_uniform_(self.embedding.weight)
        self.gru.weight_hh_l0 = nn.init.xavier_uniform_(self.gru.weight_hh_l0)
        self.gru.weight_ih_l0 = nn.init.xavier_uniform_(self.gru.weight_ih_l0)
    
    def forward(self, inputs, input_lengths):
        """
        inputs : B,T (LongTensor)
        input_lengths : real lengths of input batch (list)
        """
        hidden = self.init_hidden(inputs)
        #print('inputs: ', inputs.shape)
        #print('input_lengths: ', input_lengths)
        embedded = self.embedding(inputs)
        #print('embedded', embedded.shape)
        packed = pack_padded_sequence(embedded, input_lengths,batch_first=True)
        #print('packed', packed)
        outputs, hidden = self.gru(packed, hidden)
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs,batch_first=True) # unpack (back to padded)
        
        print("print(hidden.shape)", hidden.shape)
        if self.n_layers>1:
            if self.n_direction==2:
                hidden = hidden[-2:]
                print("print(hidden.shape)", hidden.shape)
                return outputs, torch.cat((hidden[0], hidden[1]),1).unsqueeze(1)
            else:
                hidden = hidden[-1]
                return outputs, hidden.unsqueeze(1)
        
        #print(hidden.shape)
        #return outputs, torch.cat((hidden,),1).unsqueeze(1)   # cat 并起来[hidden_size*2], treat dim 0 as sequence of cat

In [None]:
class EncoderV(nn.Module):
    def __init__(self, input_size, embedding_size,hidden_size, n_layers=1,bidirec=False):
        super(EncoderV, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_size, embedding_size)
        
        if bidirec:
            self.n_direction = 2 
            self.gru = nn.GRU(embedding_size, hidden_size, n_layers, batch_first=True,bidirectional=True)
        else:
            self.n_direction = 1
            self.gru = nn.GRU(embedding_size, hidden_size, n_layers, batch_first=True)
    
    def init_hidden(self,inputs):  # input.size(0) = batch_size
        hidden = Variable(torch.zeros(self.n_layers*self.n_direction,inputs.size(0),self.hidden_size))
        return hidden.cuda() if USE_CUDA else hidden
    
    def init_weight(self):
        self.embedding.weight = nn.init.xavier_uniform_(self.embedding.weight)
        self.gru.weight_hh_l0 = nn.init.xavier_uniform_(self.gru.weight_hh_l0)
        self.gru.weight_ih_l0 = nn.init.xavier_uniform_(self.gru.weight_ih_l0)
    
    def forward(self, x, x_len):
        """
        sequence -> sort -> pad and pack ->process using RNN -> unpack ->unsort

        :param x: Variable
        :param x_len: numpy list
        :return:
        """
        
        hidden = self.init_hidden(x)
        
        #print('x', x.size())
        #print('x_len', x_len)
        
        """sort"""
        x_sort_idx = np.argsort(x_len)[::-1]
        #print("x_sort_idx", x_sort_idx)
        x_unsort_idx = LongTensor(np.argsort(x_sort_idx))
        #print("x_unsort_idx", x_unsort_idx)
        x_len = np.array(x_len)[x_sort_idx]
        x = x[LongTensor(x_sort_idx.copy())]
        #print('x', x)
        embedded = self.embedding(x)
        #print("embedded", embedded.shape)
        """pack"""
        x_emb_p = torch.nn.utils.rnn.pack_padded_sequence(embedded, x_len, batch_first=True)
    
        #print("x_emb_p", x_emb_p)
            
        """process using RNN"""
        out_pack, ht = self.gru(x_emb_p, None)
        
        """unsort: h"""
        ht = torch.transpose(ht, 0, 1)[
            x_unsort_idx]  # (num_layers * num_directions, batch, hidden_size) -> (batch, ...)
        ht = torch.transpose(ht, 0, 1)

        #print("ht", ht.shape)
        if self.n_layers>1:
            if self.n_direction==2:
                ht = ht[-2:]
                return out_pack, torch.cat((ht[0], ht[1]),1)
            else:
                hidden = hidden[-1]
                return out_pack, ht

     

In [43]:
class Model(nn.Module):
    
    def __init__(self, encoder1, encoder2, hidden_size):
        
        super(Model, self).__init__()
        self.encoder1 = encoder1
        #self.encoder2 = encoder2
        
        self.fc1 = nn.Linear(hidden_size*4, hidden_size)  
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 2)
        
    def init_weight(self):
        
        self.encoder1.init_weight()
        #self.encoder2.init_weight()
        self.fc1.weight = nn.init.xavier_uniform_(self.fc1.weight)
        self.fc2.weight = nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self, sen1, sen2, sen1_lengths, sen2_lengths):
             
        outputs_1, hidden_c1 = encoder1(sen1,sen1_lengths)
        outputs_2, hidden_c2 = encoder1(sen2,sen2_lengths)
        
        #print("hidden_c2", hidden_c2.shape)
        
        hidden = torch.cat((hidden_c1, hidden_c2), 1).squeeze(1)  # batch * 2hidden
        #print("hidden", hidden.shape)
        out = self.fc1(hidden)
        out = self.sigmod(out)
        out = self.fc2(out)
        return out

In [44]:
encoder1 = EncoderV(len(source2index),EMBEDDING_SIZE,HIDDEN_SIZE,2,True)
#encoder2 = Encoder(len(source2index),EMBEDDING_SIZE,HIDDEN_SIZE,2,True)

model = Model(encoder1, encoder1, HIDDEN_SIZE)
model.init_weight()

if USE_CUDA:
    model = model.cuda()

criterion = nn.CrossEntropyLoss()
#criterion = F.binary_cross_entropy(F.sigmoid(input), target)
optimizer = optim.Adam(model.parameters(),lr=LR)

In [45]:
model.parameters

<bound method Module.parameters of Model(
  (encoder1): EncoderV(
    (embedding): Embedding(1720, 128)
    (gru): GRU(128, 256, num_layers=2, batch_first=True, bidirectional=True)
  )
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=256, out_features=2, bias=True)
)>

In [51]:
for i in list(model.parameters()):
    if i.requires_grad:
        print(i.shape)

torch.Size([1720, 128])
torch.Size([768, 128])
torch.Size([768, 256])
torch.Size([768])
torch.Size([768])
torch.Size([768, 128])
torch.Size([768, 256])
torch.Size([768])
torch.Size([768])
torch.Size([768, 512])
torch.Size([768, 256])
torch.Size([768])
torch.Size([768])
torch.Size([768, 512])
torch.Size([768, 256])
torch.Size([768])
torch.Size([768])
torch.Size([256, 1024])
torch.Size([256])
torch.Size([2, 256])
torch.Size([2])


In [49]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [50]:
count_parameters(model)

2258690

In [None]:
### test 
for i,batch in enumerate(getBatch(BATCH_SIZE,train_data)):
    
    # sen1 = batch * l
    sen1, sen2, targets,sen1_lengths, sen2_lengths = pad_to_batch(batch,source2index)
    print("sen1 size: ", sen1.shape)
    #input_masks = torch.cat([Variable(ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in sen1]).view(sen1.size(0),-1)
    model.zero_grad()
    optimizer.zero_grad()
    
    # Forward + Backward + Optimize
    outputs = model(sen1, sen2, sen1_lengths, sen2_lengths)
    print('output', outputs.size())
    print('targets', outputs.size())
    loss = criterion(outputs, targets)
    print('loss', loss)
    loss.backward()
    optimizer.step()
    
    break

In [26]:
total_step = round(len(train_data) / BATCH_SIZE)

for epoch in range(EPOCH):
    
    for i,batch in enumerate(getBatch(BATCH_SIZE,train_data)):
        sen1, sen2, targets,sen1_lengths, sen2_lengths = pad_to_batch(batch,source2index)
        model.zero_grad()
        optimizer.zero_grad()

        # Forward + Backward + Optimize
        outputs = model(sen1, sen2, sen1_lengths, sen2_lengths)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        if (i+1) % 5 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch+1, EPOCH, i+1, total_step, loss.item()))

Epoch [1/50], Step [5/615], Loss: 0.4890
Epoch [1/50], Step [10/615], Loss: 0.6969
Epoch [1/50], Step [15/615], Loss: 0.5125
Epoch [1/50], Step [20/615], Loss: 0.4999
Epoch [1/50], Step [25/615], Loss: 0.4824
Epoch [1/50], Step [30/615], Loss: 0.5468
Epoch [1/50], Step [35/615], Loss: 0.4263
Epoch [1/50], Step [40/615], Loss: 0.6197
Epoch [1/50], Step [45/615], Loss: 0.6428
Epoch [1/50], Step [50/615], Loss: 0.4839
Epoch [1/50], Step [55/615], Loss: 0.4963
Epoch [1/50], Step [60/615], Loss: 0.5348
Epoch [1/50], Step [65/615], Loss: 0.4908
Epoch [1/50], Step [70/615], Loss: 0.4886
Epoch [1/50], Step [75/615], Loss: 0.5880
Epoch [1/50], Step [80/615], Loss: 0.4815
Epoch [1/50], Step [85/615], Loss: 0.5006
Epoch [1/50], Step [90/615], Loss: 0.5650
Epoch [1/50], Step [95/615], Loss: 0.4086
Epoch [1/50], Step [100/615], Loss: 0.5737
Epoch [1/50], Step [105/615], Loss: 0.4343
Epoch [1/50], Step [110/615], Loss: 0.5304
Epoch [1/50], Step [115/615], Loss: 0.5614
Epoch [1/50], Step [120/615], L

Epoch [2/50], Step [350/615], Loss: 0.3994
Epoch [2/50], Step [355/615], Loss: 0.5523
Epoch [2/50], Step [360/615], Loss: 0.5057
Epoch [2/50], Step [365/615], Loss: 0.5994
Epoch [2/50], Step [370/615], Loss: 0.4665
Epoch [2/50], Step [375/615], Loss: 0.4690
Epoch [2/50], Step [380/615], Loss: 0.5307
Epoch [2/50], Step [385/615], Loss: 0.6263
Epoch [2/50], Step [390/615], Loss: 0.4340
Epoch [2/50], Step [395/615], Loss: 0.5576
Epoch [2/50], Step [400/615], Loss: 0.4765
Epoch [2/50], Step [405/615], Loss: 0.5511
Epoch [2/50], Step [410/615], Loss: 0.7391
Epoch [2/50], Step [415/615], Loss: 0.4772
Epoch [2/50], Step [420/615], Loss: 0.4761
Epoch [2/50], Step [425/615], Loss: 0.5401
Epoch [2/50], Step [430/615], Loss: 0.4942
Epoch [2/50], Step [435/615], Loss: 0.4429
Epoch [2/50], Step [440/615], Loss: 0.5264
Epoch [2/50], Step [445/615], Loss: 0.5739
Epoch [2/50], Step [450/615], Loss: 0.4602
Epoch [2/50], Step [455/615], Loss: 0.5169
Epoch [2/50], Step [460/615], Loss: 0.5511
Epoch [2/50

Epoch [4/50], Step [80/615], Loss: 0.3619
Epoch [4/50], Step [85/615], Loss: 0.5548
Epoch [4/50], Step [90/615], Loss: 0.4731
Epoch [4/50], Step [95/615], Loss: 0.4455
Epoch [4/50], Step [100/615], Loss: 0.5330
Epoch [4/50], Step [105/615], Loss: 0.4735
Epoch [4/50], Step [110/615], Loss: 0.3972
Epoch [4/50], Step [115/615], Loss: 0.4515
Epoch [4/50], Step [120/615], Loss: 0.4024
Epoch [4/50], Step [125/615], Loss: 0.4747
Epoch [4/50], Step [130/615], Loss: 0.3863
Epoch [4/50], Step [135/615], Loss: 0.4979
Epoch [4/50], Step [140/615], Loss: 0.2950
Epoch [4/50], Step [145/615], Loss: 0.5399
Epoch [4/50], Step [150/615], Loss: 0.4514
Epoch [4/50], Step [155/615], Loss: 0.3652
Epoch [4/50], Step [160/615], Loss: 0.3876
Epoch [4/50], Step [165/615], Loss: 0.4510
Epoch [4/50], Step [170/615], Loss: 0.5662
Epoch [4/50], Step [175/615], Loss: 0.4613
Epoch [4/50], Step [180/615], Loss: 0.4912
Epoch [4/50], Step [185/615], Loss: 0.3678
Epoch [4/50], Step [190/615], Loss: 0.3969
Epoch [4/50], S

Epoch [5/50], Step [425/615], Loss: 0.4282
Epoch [5/50], Step [430/615], Loss: 0.4168
Epoch [5/50], Step [435/615], Loss: 0.4459
Epoch [5/50], Step [440/615], Loss: 0.5215
Epoch [5/50], Step [445/615], Loss: 0.4166
Epoch [5/50], Step [450/615], Loss: 0.5022
Epoch [5/50], Step [455/615], Loss: 0.3676
Epoch [5/50], Step [460/615], Loss: 0.3875
Epoch [5/50], Step [465/615], Loss: 0.4563
Epoch [5/50], Step [470/615], Loss: 0.4149
Epoch [5/50], Step [475/615], Loss: 0.4512
Epoch [5/50], Step [480/615], Loss: 0.4098
Epoch [5/50], Step [485/615], Loss: 0.5772
Epoch [5/50], Step [490/615], Loss: 0.4901
Epoch [5/50], Step [495/615], Loss: 0.4124
Epoch [5/50], Step [500/615], Loss: 0.4502
Epoch [5/50], Step [505/615], Loss: 0.4397
Epoch [5/50], Step [510/615], Loss: 0.4479
Epoch [5/50], Step [515/615], Loss: 0.4416
Epoch [5/50], Step [520/615], Loss: 0.5128
Epoch [5/50], Step [525/615], Loss: 0.4172
Epoch [5/50], Step [530/615], Loss: 0.4118
Epoch [5/50], Step [535/615], Loss: 0.4684
Epoch [5/50

Epoch [7/50], Step [155/615], Loss: 0.4344
Epoch [7/50], Step [160/615], Loss: 0.5011
Epoch [7/50], Step [165/615], Loss: 0.4694
Epoch [7/50], Step [170/615], Loss: 0.4189
Epoch [7/50], Step [175/615], Loss: 0.4948
Epoch [7/50], Step [180/615], Loss: 0.4956
Epoch [7/50], Step [185/615], Loss: 0.3978
Epoch [7/50], Step [190/615], Loss: 0.4016
Epoch [7/50], Step [195/615], Loss: 0.4751
Epoch [7/50], Step [200/615], Loss: 0.5033
Epoch [7/50], Step [205/615], Loss: 0.4944
Epoch [7/50], Step [210/615], Loss: 0.4053
Epoch [7/50], Step [215/615], Loss: 0.4941
Epoch [7/50], Step [220/615], Loss: 0.4988
Epoch [7/50], Step [225/615], Loss: 0.3789
Epoch [7/50], Step [230/615], Loss: 0.3835
Epoch [7/50], Step [235/615], Loss: 0.4873
Epoch [7/50], Step [240/615], Loss: 0.3550
Epoch [7/50], Step [245/615], Loss: 0.4011
Epoch [7/50], Step [250/615], Loss: 0.3527
Epoch [7/50], Step [255/615], Loss: 0.4803
Epoch [7/50], Step [260/615], Loss: 0.3349
Epoch [7/50], Step [265/615], Loss: 0.3143
Epoch [7/50

Epoch [8/50], Step [495/615], Loss: 0.3707
Epoch [8/50], Step [500/615], Loss: 0.4032
Epoch [8/50], Step [505/615], Loss: 0.4103
Epoch [8/50], Step [510/615], Loss: 0.5153
Epoch [8/50], Step [515/615], Loss: 0.3646
Epoch [8/50], Step [520/615], Loss: 0.4385
Epoch [8/50], Step [525/615], Loss: 0.3833
Epoch [8/50], Step [530/615], Loss: 0.4665
Epoch [8/50], Step [535/615], Loss: 0.3699
Epoch [8/50], Step [540/615], Loss: 0.3822
Epoch [8/50], Step [545/615], Loss: 0.4696
Epoch [8/50], Step [550/615], Loss: 0.3435
Epoch [8/50], Step [555/615], Loss: 0.3973
Epoch [8/50], Step [560/615], Loss: 0.4658
Epoch [8/50], Step [565/615], Loss: 0.4319
Epoch [8/50], Step [570/615], Loss: 0.3646
Epoch [8/50], Step [575/615], Loss: 0.4227
Epoch [8/50], Step [580/615], Loss: 0.4079
Epoch [8/50], Step [585/615], Loss: 0.3562
Epoch [8/50], Step [590/615], Loss: 0.5317
Epoch [8/50], Step [595/615], Loss: 0.5081
Epoch [8/50], Step [600/615], Loss: 0.4895
Epoch [8/50], Step [605/615], Loss: 0.4656
Epoch [8/50

Epoch [10/50], Step [220/615], Loss: 0.3821
Epoch [10/50], Step [225/615], Loss: 0.3465
Epoch [10/50], Step [230/615], Loss: 0.4021
Epoch [10/50], Step [235/615], Loss: 0.3364
Epoch [10/50], Step [240/615], Loss: 0.4534
Epoch [10/50], Step [245/615], Loss: 0.3468
Epoch [10/50], Step [250/615], Loss: 0.4842
Epoch [10/50], Step [255/615], Loss: 0.4971
Epoch [10/50], Step [260/615], Loss: 0.4020
Epoch [10/50], Step [265/615], Loss: 0.4342
Epoch [10/50], Step [270/615], Loss: 0.3593
Epoch [10/50], Step [275/615], Loss: 0.4674
Epoch [10/50], Step [280/615], Loss: 0.5480
Epoch [10/50], Step [285/615], Loss: 0.3997
Epoch [10/50], Step [290/615], Loss: 0.5339
Epoch [10/50], Step [295/615], Loss: 0.3898
Epoch [10/50], Step [300/615], Loss: 0.4080
Epoch [10/50], Step [305/615], Loss: 0.4071
Epoch [10/50], Step [310/615], Loss: 0.3346
Epoch [10/50], Step [315/615], Loss: 0.3789
Epoch [10/50], Step [320/615], Loss: 0.3280
Epoch [10/50], Step [325/615], Loss: 0.4978
Epoch [10/50], Step [330/615], L

Epoch [11/50], Step [540/615], Loss: 0.3943
Epoch [11/50], Step [545/615], Loss: 0.4397
Epoch [11/50], Step [550/615], Loss: 0.4249
Epoch [11/50], Step [555/615], Loss: 0.3438
Epoch [11/50], Step [560/615], Loss: 0.4207
Epoch [11/50], Step [565/615], Loss: 0.3977
Epoch [11/50], Step [570/615], Loss: 0.3204
Epoch [11/50], Step [575/615], Loss: 0.5689
Epoch [11/50], Step [580/615], Loss: 0.2723
Epoch [11/50], Step [585/615], Loss: 0.4096
Epoch [11/50], Step [590/615], Loss: 0.5509
Epoch [11/50], Step [595/615], Loss: 0.3021
Epoch [11/50], Step [600/615], Loss: 0.4552
Epoch [11/50], Step [605/615], Loss: 0.3744
Epoch [11/50], Step [610/615], Loss: 0.3818
Epoch [11/50], Step [615/615], Loss: 0.4816
Epoch [12/50], Step [5/615], Loss: 0.4977
Epoch [12/50], Step [10/615], Loss: 0.3782
Epoch [12/50], Step [15/615], Loss: 0.5208
Epoch [12/50], Step [20/615], Loss: 0.3905
Epoch [12/50], Step [25/615], Loss: 0.4323
Epoch [12/50], Step [30/615], Loss: 0.4808
Epoch [12/50], Step [35/615], Loss: 0.4

Epoch [13/50], Step [250/615], Loss: 0.2816
Epoch [13/50], Step [255/615], Loss: 0.3916
Epoch [13/50], Step [260/615], Loss: 0.4102
Epoch [13/50], Step [265/615], Loss: 0.3123
Epoch [13/50], Step [270/615], Loss: 0.3951
Epoch [13/50], Step [275/615], Loss: 0.3924
Epoch [13/50], Step [280/615], Loss: 0.3214
Epoch [13/50], Step [285/615], Loss: 0.2974
Epoch [13/50], Step [290/615], Loss: 0.4342
Epoch [13/50], Step [295/615], Loss: 0.3495
Epoch [13/50], Step [300/615], Loss: 0.4165
Epoch [13/50], Step [305/615], Loss: 0.3638
Epoch [13/50], Step [310/615], Loss: 0.4624
Epoch [13/50], Step [315/615], Loss: 0.3322
Epoch [13/50], Step [320/615], Loss: 0.4303
Epoch [13/50], Step [325/615], Loss: 0.3558
Epoch [13/50], Step [330/615], Loss: 0.4339
Epoch [13/50], Step [335/615], Loss: 0.3976
Epoch [13/50], Step [340/615], Loss: 0.3507
Epoch [13/50], Step [345/615], Loss: 0.2916
Epoch [13/50], Step [350/615], Loss: 0.4171
Epoch [13/50], Step [355/615], Loss: 0.4822
Epoch [13/50], Step [360/615], L

Epoch [14/50], Step [570/615], Loss: 0.3411
Epoch [14/50], Step [575/615], Loss: 0.4506
Epoch [14/50], Step [580/615], Loss: 0.3560
Epoch [14/50], Step [585/615], Loss: 0.4311
Epoch [14/50], Step [590/615], Loss: 0.5208
Epoch [14/50], Step [595/615], Loss: 0.4279
Epoch [14/50], Step [600/615], Loss: 0.3694
Epoch [14/50], Step [605/615], Loss: 0.4981
Epoch [14/50], Step [610/615], Loss: 0.3852
Epoch [14/50], Step [615/615], Loss: 0.3730
Epoch [15/50], Step [5/615], Loss: 0.3108
Epoch [15/50], Step [10/615], Loss: 0.2945
Epoch [15/50], Step [15/615], Loss: 0.3210
Epoch [15/50], Step [20/615], Loss: 0.3764
Epoch [15/50], Step [25/615], Loss: 0.3760
Epoch [15/50], Step [30/615], Loss: 0.3115
Epoch [15/50], Step [35/615], Loss: 0.3504
Epoch [15/50], Step [40/615], Loss: 0.3430
Epoch [15/50], Step [45/615], Loss: 0.2280
Epoch [15/50], Step [50/615], Loss: 0.4877
Epoch [15/50], Step [55/615], Loss: 0.4139
Epoch [15/50], Step [60/615], Loss: 0.3463
Epoch [15/50], Step [65/615], Loss: 0.3020
Ep

Epoch [16/50], Step [280/615], Loss: 0.4869
Epoch [16/50], Step [285/615], Loss: 0.4510
Epoch [16/50], Step [290/615], Loss: 0.3553
Epoch [16/50], Step [295/615], Loss: 0.3464
Epoch [16/50], Step [300/615], Loss: 0.4064
Epoch [16/50], Step [305/615], Loss: 0.3324
Epoch [16/50], Step [310/615], Loss: 0.4540
Epoch [16/50], Step [315/615], Loss: 0.3554
Epoch [16/50], Step [320/615], Loss: 0.3342
Epoch [16/50], Step [325/615], Loss: 0.2730
Epoch [16/50], Step [330/615], Loss: 0.3849
Epoch [16/50], Step [335/615], Loss: 0.4383
Epoch [16/50], Step [340/615], Loss: 0.3366
Epoch [16/50], Step [345/615], Loss: 0.3744
Epoch [16/50], Step [350/615], Loss: 0.3228
Epoch [16/50], Step [355/615], Loss: 0.3083
Epoch [16/50], Step [360/615], Loss: 0.3862
Epoch [16/50], Step [365/615], Loss: 0.3151
Epoch [16/50], Step [370/615], Loss: 0.3468
Epoch [16/50], Step [375/615], Loss: 0.3326
Epoch [16/50], Step [380/615], Loss: 0.4599
Epoch [16/50], Step [385/615], Loss: 0.3164
Epoch [16/50], Step [390/615], L

Epoch [17/50], Step [600/615], Loss: 0.3910
Epoch [17/50], Step [605/615], Loss: 0.3296
Epoch [17/50], Step [610/615], Loss: 0.4401
Epoch [17/50], Step [615/615], Loss: 0.4193
Epoch [18/50], Step [5/615], Loss: 0.3176
Epoch [18/50], Step [10/615], Loss: 0.2680
Epoch [18/50], Step [15/615], Loss: 0.2949
Epoch [18/50], Step [20/615], Loss: 0.3324
Epoch [18/50], Step [25/615], Loss: 0.3546
Epoch [18/50], Step [30/615], Loss: 0.3849
Epoch [18/50], Step [35/615], Loss: 0.3628
Epoch [18/50], Step [40/615], Loss: 0.3677
Epoch [18/50], Step [45/615], Loss: 0.3040
Epoch [18/50], Step [50/615], Loss: 0.4382
Epoch [18/50], Step [55/615], Loss: 0.3666
Epoch [18/50], Step [60/615], Loss: 0.3700
Epoch [18/50], Step [65/615], Loss: 0.3657
Epoch [18/50], Step [70/615], Loss: 0.3737
Epoch [18/50], Step [75/615], Loss: 0.3449
Epoch [18/50], Step [80/615], Loss: 0.3526
Epoch [18/50], Step [85/615], Loss: 0.3922
Epoch [18/50], Step [90/615], Loss: 0.3078
Epoch [18/50], Step [95/615], Loss: 0.3354
Epoch [1

Epoch [19/50], Step [310/615], Loss: 0.3372
Epoch [19/50], Step [315/615], Loss: 0.3961
Epoch [19/50], Step [320/615], Loss: 0.3181
Epoch [19/50], Step [325/615], Loss: 0.3462
Epoch [19/50], Step [330/615], Loss: 0.2578
Epoch [19/50], Step [335/615], Loss: 0.3710
Epoch [19/50], Step [340/615], Loss: 0.3539
Epoch [19/50], Step [345/615], Loss: 0.3980
Epoch [19/50], Step [350/615], Loss: 0.3628
Epoch [19/50], Step [355/615], Loss: 0.4363
Epoch [19/50], Step [360/615], Loss: 0.2965
Epoch [19/50], Step [365/615], Loss: 0.3449
Epoch [19/50], Step [370/615], Loss: 0.2878
Epoch [19/50], Step [375/615], Loss: 0.2772
Epoch [19/50], Step [380/615], Loss: 0.4324
Epoch [19/50], Step [385/615], Loss: 0.3781
Epoch [19/50], Step [390/615], Loss: 0.3532
Epoch [19/50], Step [395/615], Loss: 0.3439
Epoch [19/50], Step [400/615], Loss: 0.4275
Epoch [19/50], Step [405/615], Loss: 0.3785
Epoch [19/50], Step [410/615], Loss: 0.2904
Epoch [19/50], Step [415/615], Loss: 0.4996
Epoch [19/50], Step [420/615], L

Epoch [21/50], Step [15/615], Loss: 0.3127
Epoch [21/50], Step [20/615], Loss: 0.3560
Epoch [21/50], Step [25/615], Loss: 0.2747
Epoch [21/50], Step [30/615], Loss: 0.3442
Epoch [21/50], Step [35/615], Loss: 0.2899
Epoch [21/50], Step [40/615], Loss: 0.4656
Epoch [21/50], Step [45/615], Loss: 0.3030
Epoch [21/50], Step [50/615], Loss: 0.2700
Epoch [21/50], Step [55/615], Loss: 0.2724
Epoch [21/50], Step [60/615], Loss: 0.3106
Epoch [21/50], Step [65/615], Loss: 0.2923
Epoch [21/50], Step [70/615], Loss: 0.3924
Epoch [21/50], Step [75/615], Loss: 0.3115
Epoch [21/50], Step [80/615], Loss: 0.3551
Epoch [21/50], Step [85/615], Loss: 0.3091
Epoch [21/50], Step [90/615], Loss: 0.3442
Epoch [21/50], Step [95/615], Loss: 0.3199
Epoch [21/50], Step [100/615], Loss: 0.2381
Epoch [21/50], Step [105/615], Loss: 0.3152
Epoch [21/50], Step [110/615], Loss: 0.5112
Epoch [21/50], Step [115/615], Loss: 0.4848
Epoch [21/50], Step [120/615], Loss: 0.3742
Epoch [21/50], Step [125/615], Loss: 0.4621
Epoch

Epoch [22/50], Step [340/615], Loss: 0.3391
Epoch [22/50], Step [345/615], Loss: 0.3553
Epoch [22/50], Step [350/615], Loss: 0.4129
Epoch [22/50], Step [355/615], Loss: 0.2972
Epoch [22/50], Step [360/615], Loss: 0.4928
Epoch [22/50], Step [365/615], Loss: 0.4060
Epoch [22/50], Step [370/615], Loss: 0.2986
Epoch [22/50], Step [375/615], Loss: 0.3795
Epoch [22/50], Step [380/615], Loss: 0.3314
Epoch [22/50], Step [385/615], Loss: 0.3938
Epoch [22/50], Step [390/615], Loss: 0.3506
Epoch [22/50], Step [395/615], Loss: 0.3263
Epoch [22/50], Step [400/615], Loss: 0.3545
Epoch [22/50], Step [405/615], Loss: 0.3557
Epoch [22/50], Step [410/615], Loss: 0.3438
Epoch [22/50], Step [415/615], Loss: 0.2574
Epoch [22/50], Step [420/615], Loss: 0.4356
Epoch [22/50], Step [425/615], Loss: 0.3099
Epoch [22/50], Step [430/615], Loss: 0.3557
Epoch [22/50], Step [435/615], Loss: 0.3934
Epoch [22/50], Step [440/615], Loss: 0.4159
Epoch [22/50], Step [445/615], Loss: 0.3009
Epoch [22/50], Step [450/615], L

Epoch [24/50], Step [45/615], Loss: 0.3592
Epoch [24/50], Step [50/615], Loss: 0.2763
Epoch [24/50], Step [55/615], Loss: 0.3018
Epoch [24/50], Step [60/615], Loss: 0.3091
Epoch [24/50], Step [65/615], Loss: 0.3283
Epoch [24/50], Step [70/615], Loss: 0.2851
Epoch [24/50], Step [75/615], Loss: 0.3368
Epoch [24/50], Step [80/615], Loss: 0.2784
Epoch [24/50], Step [85/615], Loss: 0.2669
Epoch [24/50], Step [90/615], Loss: 0.2441
Epoch [24/50], Step [95/615], Loss: 0.2905
Epoch [24/50], Step [100/615], Loss: 0.1635
Epoch [24/50], Step [105/615], Loss: 0.3324
Epoch [24/50], Step [110/615], Loss: 0.2975
Epoch [24/50], Step [115/615], Loss: 0.3102
Epoch [24/50], Step [120/615], Loss: 0.3866
Epoch [24/50], Step [125/615], Loss: 0.2719
Epoch [24/50], Step [130/615], Loss: 0.4495
Epoch [24/50], Step [135/615], Loss: 0.3419
Epoch [24/50], Step [140/615], Loss: 0.3204
Epoch [24/50], Step [145/615], Loss: 0.3141
Epoch [24/50], Step [150/615], Loss: 0.2704
Epoch [24/50], Step [155/615], Loss: 0.3822

Epoch [25/50], Step [365/615], Loss: 0.3160
Epoch [25/50], Step [370/615], Loss: 0.4017
Epoch [25/50], Step [375/615], Loss: 0.3782
Epoch [25/50], Step [380/615], Loss: 0.3234
Epoch [25/50], Step [385/615], Loss: 0.2525
Epoch [25/50], Step [390/615], Loss: 0.3221
Epoch [25/50], Step [395/615], Loss: 0.3455
Epoch [25/50], Step [400/615], Loss: 0.3298
Epoch [25/50], Step [405/615], Loss: 0.2303
Epoch [25/50], Step [410/615], Loss: 0.3131
Epoch [25/50], Step [415/615], Loss: 0.1546
Epoch [25/50], Step [420/615], Loss: 0.3052
Epoch [25/50], Step [425/615], Loss: 0.4501
Epoch [25/50], Step [430/615], Loss: 0.3916
Epoch [25/50], Step [435/615], Loss: 0.2613
Epoch [25/50], Step [440/615], Loss: 0.3619
Epoch [25/50], Step [445/615], Loss: 0.2893
Epoch [25/50], Step [450/615], Loss: 0.3838
Epoch [25/50], Step [455/615], Loss: 0.3805
Epoch [25/50], Step [460/615], Loss: 0.2501
Epoch [25/50], Step [465/615], Loss: 0.3188
Epoch [25/50], Step [470/615], Loss: 0.2910
Epoch [25/50], Step [475/615], L

Epoch [27/50], Step [70/615], Loss: 0.3343
Epoch [27/50], Step [75/615], Loss: 0.4139
Epoch [27/50], Step [80/615], Loss: 0.2887
Epoch [27/50], Step [85/615], Loss: 0.3119
Epoch [27/50], Step [90/615], Loss: 0.2646
Epoch [27/50], Step [95/615], Loss: 0.2883
Epoch [27/50], Step [100/615], Loss: 0.2592
Epoch [27/50], Step [105/615], Loss: 0.3024
Epoch [27/50], Step [110/615], Loss: 0.3472
Epoch [27/50], Step [115/615], Loss: 0.3007
Epoch [27/50], Step [120/615], Loss: 0.2567
Epoch [27/50], Step [125/615], Loss: 0.2743
Epoch [27/50], Step [130/615], Loss: 0.1151
Epoch [27/50], Step [135/615], Loss: 0.2279
Epoch [27/50], Step [140/615], Loss: 0.2873
Epoch [27/50], Step [145/615], Loss: 0.3056
Epoch [27/50], Step [150/615], Loss: 0.2837
Epoch [27/50], Step [155/615], Loss: 0.2951
Epoch [27/50], Step [160/615], Loss: 0.3682
Epoch [27/50], Step [165/615], Loss: 0.2853
Epoch [27/50], Step [170/615], Loss: 0.1647
Epoch [27/50], Step [175/615], Loss: 0.2956
Epoch [27/50], Step [180/615], Loss: 0

Epoch [28/50], Step [390/615], Loss: 0.2833
Epoch [28/50], Step [395/615], Loss: 0.3543
Epoch [28/50], Step [400/615], Loss: 0.1740
Epoch [28/50], Step [405/615], Loss: 0.2986
Epoch [28/50], Step [410/615], Loss: 0.2674
Epoch [28/50], Step [415/615], Loss: 0.2035
Epoch [28/50], Step [420/615], Loss: 0.3020
Epoch [28/50], Step [425/615], Loss: 0.3007
Epoch [28/50], Step [430/615], Loss: 0.2575
Epoch [28/50], Step [435/615], Loss: 0.2212
Epoch [28/50], Step [440/615], Loss: 0.2851
Epoch [28/50], Step [445/615], Loss: 0.2477
Epoch [28/50], Step [450/615], Loss: 0.2980
Epoch [28/50], Step [455/615], Loss: 0.2785
Epoch [28/50], Step [460/615], Loss: 0.3391
Epoch [28/50], Step [465/615], Loss: 0.3754
Epoch [28/50], Step [470/615], Loss: 0.2742
Epoch [28/50], Step [475/615], Loss: 0.3571
Epoch [28/50], Step [480/615], Loss: 0.3152
Epoch [28/50], Step [485/615], Loss: 0.4161
Epoch [28/50], Step [490/615], Loss: 0.2670
Epoch [28/50], Step [495/615], Loss: 0.3159
Epoch [28/50], Step [500/615], L

Epoch [30/50], Step [100/615], Loss: 0.2206
Epoch [30/50], Step [105/615], Loss: 0.3248
Epoch [30/50], Step [110/615], Loss: 0.3525
Epoch [30/50], Step [115/615], Loss: 0.2930
Epoch [30/50], Step [120/615], Loss: 0.2390
Epoch [30/50], Step [125/615], Loss: 0.3083
Epoch [30/50], Step [130/615], Loss: 0.2702
Epoch [30/50], Step [135/615], Loss: 0.3078
Epoch [30/50], Step [140/615], Loss: 0.2628
Epoch [30/50], Step [145/615], Loss: 0.3992
Epoch [30/50], Step [150/615], Loss: 0.2715
Epoch [30/50], Step [155/615], Loss: 0.3421
Epoch [30/50], Step [160/615], Loss: 0.3164
Epoch [30/50], Step [165/615], Loss: 0.2514
Epoch [30/50], Step [170/615], Loss: 0.3780
Epoch [30/50], Step [175/615], Loss: 0.3443
Epoch [30/50], Step [180/615], Loss: 0.3363
Epoch [30/50], Step [185/615], Loss: 0.2439
Epoch [30/50], Step [190/615], Loss: 0.2191
Epoch [30/50], Step [195/615], Loss: 0.2322
Epoch [30/50], Step [200/615], Loss: 0.2662
Epoch [30/50], Step [205/615], Loss: 0.2776
Epoch [30/50], Step [210/615], L

Epoch [31/50], Step [420/615], Loss: 0.3370
Epoch [31/50], Step [425/615], Loss: 0.3748
Epoch [31/50], Step [430/615], Loss: 0.3146
Epoch [31/50], Step [435/615], Loss: 0.2525
Epoch [31/50], Step [440/615], Loss: 0.4048
Epoch [31/50], Step [445/615], Loss: 0.3294
Epoch [31/50], Step [450/615], Loss: 0.3005
Epoch [31/50], Step [455/615], Loss: 0.3263
Epoch [31/50], Step [460/615], Loss: 0.2212
Epoch [31/50], Step [465/615], Loss: 0.2621
Epoch [31/50], Step [470/615], Loss: 0.3283
Epoch [31/50], Step [475/615], Loss: 0.3029
Epoch [31/50], Step [480/615], Loss: 0.3370
Epoch [31/50], Step [485/615], Loss: 0.1780
Epoch [31/50], Step [490/615], Loss: 0.1560
Epoch [31/50], Step [495/615], Loss: 0.2831
Epoch [31/50], Step [500/615], Loss: 0.2356
Epoch [31/50], Step [505/615], Loss: 0.3461
Epoch [31/50], Step [510/615], Loss: 0.2896
Epoch [31/50], Step [515/615], Loss: 0.3095
Epoch [31/50], Step [520/615], Loss: 0.2236
Epoch [31/50], Step [525/615], Loss: 0.3311
Epoch [31/50], Step [530/615], L

KeyboardInterrupt: 

In [27]:
def infer(sens):
    
    outputs = model(sens[0], sens[1], [len(sens[0])], [len(sens[1])])
    
    return np.argmax(outputs)

In [31]:
sen1 = "花呗的安全没有验证成功"
sen2 = "花呗安全验证没通过怎么回事"
sp1, sp2 = [], []
sp1.append(prepare_sequence(sen1,source2index).view(1,-1))
sp2.append(prepare_sequence(sen2,source2index).view(1,-1))
sens = [sp1, sp2]
infer(sens)

AttributeError: 'list' object has no attribute 'size'

In [None]:
torch.save(model.state_dict(), 'model.ckpt')