# import package

In [1]:
import numpy as np
import re
import json
import torch.utils.data
from torch.utils.data import DataLoader
import io,os
from torch import nn
from gensim.models import word2vec
from torch.nn.utils.rnn import pad_sequence
from torch import optim
from torch.autograd import Variable
from torchtext import data

# Global Variables

In [2]:
BATCH_SIZE = 100
device = torch.device("cuda")

tag_to_ix = {'start_tag':0,'stop_tag':29,'pad_tag':30}

working_path = '/home/jongsu/jupyter/pytorch_dialogue_ie/'
WV_PATH = '/home/jongsu/jupyter/pytorch_dialogue_ie/parameter/dialogue_wv'


# basic functions

In [3]:
def argmax(vec):
    '''
    return the argmax as a python int
    '''
    
    _, idx = torch.max(vec, 1)
    return idx.item()


def sent_loader(sentence): 
    '''
    pre_process per sentence
    '''
    
    result = []
    for elem in sentence.split(' '):
        if elem != '':
            result = np.append(result, elem)
    return result,len(result)



def batchload(dataset, repeat, batchsize, data_seq):
    '''
    load data as much as batch
    
    Args:
    
        dataset:
            data to load
        repeat:
            True if repeat load data
        batchsize:
            batchsize
        data_seq:
            order of data to load
        
    Yields:
    
        Batch data
    
    '''
    
    while True:
        i = batchsize
        while(i <= len(data_seq)):
            batch = []
            batch_seq = 0
            batchnum = data_seq[i-batchsize:i]
            
            while(batch_seq < batchsize):
                batch.append(dataset[batchnum[batch_seq]])
                batch_seq = batch_seq + 1
            print("batchnum = ",i)
            yield batch
            i = i + batchsize
        
        if repeat == False:
            break

    
def pad_batch(minibatch):
    i = 0
    new_batch = []
    leng_set = []
    maxleng = 1
    sentnum_per_dialogue = []
    while(i < len(minibatch)): #almost equal to BATCH_SIZE
        j = 0
        temp = []
        sentnum_per_dialogue.append(len(minibatch[i].Text)) #'' = stoptag
        #save sentnum per dialogue
        while(j < len(minibatch[i].Text)-1):
            sent, leng = sent_loader(minibatch[i].Text[j])  
            
            #convert text to word_list, word_list_length
            leng_set.append(leng)
            if leng > maxleng:
                maxleng = leng

            temp.append(sent)
            j = j + 1
        
        temp.append(["<stop_tag>"])
        leng_set.append(1) #stoptag
        new_batch.append(temp)

        i = i + 1

    i = 0
    while (i < len(minibatch)): #almost equal to BATCH_SIZE
        j = 0
        while (j < len(new_batch[i])):

            while(len(new_batch[i][j]) < maxleng):
                new_batch[i][j] = np.append(new_batch[i][j],"<pad>")

            j = j + 1
        i = i + 1
    #batch * sentnum * (word_list+pad) -> new_batch
    #batch * sentnum * (word_list_length) -> leng_set
    #batch * (sentnum) -> sentnum_per_dialogue

    return new_batch, leng_set ,sentnum_per_dialogue


wv_model = word2vec.Word2Vec(size = 100, window = 5, min_count = 5, workers = 4)
wv_model = word2vec.Word2Vec.load(WV_PATH)

def numerize_sent(sent, len_sent):
    i = 0
    n_sent = []
    while(i < len_sent):
        if(sent[i] == '<pad>'):
            n_sent.append(np.zeros(100))
            
        elif(sent[i] == '<stop_tag>'):
            n_sent.append(np.ones(100))
        else:
            try:
                n_sent.append(wv_model.wv[sent[i]])
            except:
                n_sent.append(np.zeros(100))

        i = i + 1
    return n_sent

def batch_numerical(sent_set):
    numeric_batch = []#numerized batch
    i = 0
    while(i < len(sent_set) ): #BATCH_SIZE
        dial = []#numerized dialogue
        j = 0
        while(j < len(sent_set[i])): #per dialogue
            '''
            sent_set[i][j] ['Is' 'this' 'your' 'new' 'teacher' '?' '<pad>']
            sent_set[i][j] ['Yes' ',' 'it' 'is' '.' '<pad>' '<pad>']
            sent_set[i][j] ['Is' 'she' 'short' '?' '<pad>' '<pad>' '<pad>']
            sent_set[i][j] ['No' ',' 'she' '’' 's' 'average' '.']
            sent_set[i][j] ['What' 'color' 'are' 'her' 'eyes' '?' '<pad>']
            sent_set[i][j] ['They' '’' 're' 'dark' 'gray' '.' '<pad>']
            sent_set[i][j] ['What' 'color' 'is' 'her' 'hair' '?' '<pad>']
            sent_set[i][j] ['It' '’' 's' 'blond' '.' '<pad>' '<pad>']
            sent_set[i][j] ['And' 'how' 'old' 'is' 'she' '?' '<pad>']
            sent_set[i][j] ['I' 'don' '’' 't' 'know' '.' '<pad>']
            sent_set[i][j] ['<stop_tag>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
            '''
            dial.append(numerize_sent(sent_set[i][j], len(sent_set[i][j]))) #numerized_sentence
            j = j + 1
        numeric_batch.append(dial)
        i = i + 1
    #batch * sent_num * sent_leng * wv -> numeric_batch
    return numeric_batch

def make_batch2sent(new):
    for_sentmodel = []
    batchnum = 0
    while (batchnum < len(new)): #BATCH_SIZE
        for_sentmodel = for_sentmodel + new[batchnum]
        batchnum = batchnum + 1
    sentbatch_len = len(for_sentmodel)
    #batch * sent_num * sent_leng * wv -> all_sent_num * sent_leng * wv
    return sentbatch_len, for_sentmodel

def pad_dial(last_v):
    leng_set = []
    i = 0
    while(i < len(last_v)):#BATCH_SIZE
        leng_set.append(len(last_v[i]))#sentence num
        i = i + 1
    padded_dial = pad_sequence(last_v, batch_first = True)#append padtag vector
    #print('max_dialogue_length',len(padded_dial[0]))
    
    return padded_dial, leng_set

def sent_loader(sentence): #pre_process per sentence
    result = []
    for elem in sentence.split(' '):
        if elem != '':
            result = np.append(result, elem)
    return result,len(result)

def pad_cat_tag(emotion, act): 
    i = 0
    new_tag = []
    while(i < len(emotion)): #BATCH_SIZE
        emo, lenge = sent_loader(emotion[i][0])
        ac, lenga = sent_loader(act[i][0])
        j = 0
        inte = []
        inte.append(tag_to_ix['start_tag']) #append stop tag
        while(j < len(emo)): #sent length
            inte.append(int(emo[j]) * 4 + int(ac[j]))
            j = j + 1
        inte.append(tag_to_ix['stop_tag']) #append stop tag
        torch_inte = torch.tensor(inte)
        new_tag.append(torch_inte) #str to int
        i = i + 1

            
    padded_tag = pad_sequence(new_tag, batch_first = True, padding_value = tag_to_ix['pad_tag'])
    
    #emotion+action string -> emotion+action numb + padding
    #batch*tag
    return padded_tag

def make_mask(leng):
    '''
    make one-hot vector of mask from lengset
    '''

    var = np.zeros(shape = (len(leng), leng[0])) #len(leng) = BATCH_SIZE, leng[0]+1= largest dialogue + stop
    i = 0
    while(i < len(leng)):#BATCH_SIZE
        j = 0
        while(j < leng[0]): 
            if(j < leng[i]): # <= stop tag
                var[i][j] = 1
            j = j + 1
        
        i = i + 1

    return var

def log_sum_exp(x):
    max_score, _ = torch.max(x, -1)
    max_score_broadcast = max_score.unsqueeze(-1).expand_as(x)
    return max_score + torch.log(torch.sum(torch.exp(x - max_score_broadcast), -1))



# class for load data

In [4]:
class Example(object):
    @classmethod
    def fromdict(cls, data, fields):
        ex = cls()
        for key, vals in fields.items():
            if key not in data:
                raise ValueError("Specified key {} was not found in "
                                 "the input data".format(key))
            if vals is not None:
                if not isinstance(vals, list):
                    vals = [vals]
                for val in vals:
                    name, field = val
                    setattr(ex, name, field.preprocess(data[key]))
        return ex

In [5]:
class Dataset(torch.utils.data.Dataset):
    sort_key = None

    def __init__(self, examples, fields, filter_pred=None):
        self.examples = examples

        self.fields = dict(fields)

        # Unpack field tuples
        for n, f in list(self.fields.items()):
            if isinstance(n, tuple):
                self.fields.update(zip(n, f))
                del self.fields[n]
        self.pp = tuple(d for d in self.examples if d is not None)


    @classmethod
    def splits(cls, path=None, root='.data', train=None, **kwargs):

        train_data = cls(os.path.join(path, train), **kwargs)
        #print(train_data.examples) #여기엔 field example둘다 들어있음
        #print(tuple(d for d in train_data if d is not None)) #여기엔 example만 나열된 튜플이됨
        return tuple(d for d in train_data if d is not None)

    def split(self, split_ratio=0.7, stratified=False, strata_field='label',
              random_state=None):
        
        train_ratio, test_ratio, val_ratio = check_split_ratio(split_ratio)

        # For the permutations
        rnd = RandomShuffler(random_state)
        if not stratified:
            train_data, test_data, val_data = rationed_split(self.examples, train_ratio,
                                                             test_ratio, val_ratio, rnd)
        else:
            if strata_field not in self.fields:
                raise ValueError("Invalid field name for strata_field {}"
                                 .format(strata_field))
            strata = stratify(self.examples, strata_field)
            train_data, test_data, val_data = [], [], []
            for group in strata:
                # Stratify each group and add together the indices.
                group_train, group_test, group_val = rationed_split(group, train_ratio,
                                                                    test_ratio, val_ratio,
                                                                    rnd)
                train_data += group_train
                test_data += group_test
                val_data += group_val

        splits = tuple(Dataset(d, self.fields)
                       for d in (train_data, val_data, test_data) if d)

        # In case the parent sort key isn't none
        if self.sort_key:
            for subset in splits:
                subset.sort_key = self.sort_key
        return splits

    def __getitem__(self, i):
        return self.examples[i]

    def __len__(self):
        try:
            return len(self.examples)
        except TypeError:
            return 2 ** 32

    def __iter__(self):
        for x in self.examples:
            yield x

    def __getattr__(self, attr):
        if attr in self.fields:
            for x in self.examples:
                yield getattr(x, attr)

In [6]:
class my_TabularDataset(Dataset):

    def __init__(self, path,  fields,  **kwargs):


        with open(path, encoding="utf8") as f:
            for line in f:
                examples = [Example.fromdict(per_data, fields) for per_data in json.loads(line)]


        if isinstance(fields, dict):
            fields, field_dict = [], fields
            for field in field_dict.values():
                if isinstance(field, list):
                    fields.extend(field)
                else:
                    fields.append(field)


        super(my_TabularDataset, self).__init__(examples, fields, **kwargs)

# Model_1 (shared)

In [7]:
class makesent_gru(nn.Module):
    def __init__(self, hidden_size, bidirectional):
        super(makesent_gru, self).__init__()
        self.hidden_size = hidden_size
        if (bidirectional == True):
            self.bidirectional = 2
        else:
            self.bidirectional = 1
        self.gru = nn.GRU(100, 100, bidirectional=bidirectional)
        self.lastnet = nn.Linear(200, 100)
        
    def masking_f(self, new_sent, all_seq_len):
        remake = torch.transpose(new_sent, 0, 1)
        #remake [326, 7, 200]
        
        i = 0
        while(i < len(remake)):
            if i == 0:
                # all_seq_len[0]-1 -> 5
                tensor = remake[0][all_seq_len[0]-1].view(1,200)
            else:
                tensor = torch.cat((tensor, remake[i][all_seq_len[i]-1].view(1,200)), 0)
            i = i + 1
        return tensor
    
    def forward(self, char, h0, masking_v):
        #char 7,326,100 [6][0] = 0000....<pad>
        gru_out, h0 = self.gru(char, h0)
        #gru 7,326,200
        
        last_hidden_state = self.masking_f(gru_out, masking_v)
        #[326,200]
        
        last_w = self.lastnet(last_hidden_state)
        #[326,100]
        
        return last_w

    def initHidden(self):
        return torch.zeros(self.bidirectional, 1, self.hidden_size, device=device, requires_grad=False)

# Model_2 

## compare crf-gru with crf , bi-gru

In [8]:
class CRF_(nn.Module):
    def __init__(self, tag_to_ix, hidden_dim):
        super(BiGru_CRF, self).__init__()

        self.gru = nn.GRU(100, 100, bidirectional=True) # default requires_grad = true
        self.hidden2tag = nn.Linear(100, 31) # default requires_grad = true
        self.transitions = nn.Parameter(torch.randn(31, 31))# [a,b] trans from b to a,  requires_grad = true
        self.transitions.data[0, :] = -10000 #all to start
        self.transitions.data[:, 29] = -10000 #stop to all
        self.transitions.data[:, 30] = -10000 #pad to all
        self.transitions.data[30][30] = 0 #pad to pad
        self.transitions.data[29][30] = 0 #stop to pad
        
        self.hidden_dim = hidden_dim
        self.tag_to_ix = tag_to_ix
        

    def init_hidden(self,batch):
        return Variable(torch.zeros(2, batch, 100).cuda()) # default requires_grad = false 
    
    def _get_gru_features(self, batch, sentence_set):
        
        hidden = self.init_hidden(batch)
        #gru_out, hidden = self.gru(sentence_set, hidden)

        gru_feats = self.hidden2tag(sentence_set)

        return gru_feats

    def for_score(self, pre_mask, feats):
        score = Variable(torch.zeros((BATCH_SIZE, 31)).fill_(-10000.).cuda()) #default requires_grad = false 
        score[:, self.tag_to_ix['start_tag']] = 0. #start to all is 0
        
        mask = Variable(torch.Tensor(pre_mask).cuda()) # default requires_grad = false 
        
        for t in range(feats.size(0)):  # 안에서 연산하는데이터들은 batch*featuresize*featuresize
            
            mask_t = mask[:, t].unsqueeze(-1).expand_as(score) #batch_size -> batch_size*featuresize
            
            score_t = score.unsqueeze(1).expand(-1, *self.transitions.size()) #batch_size*f -> batch_size*f*f
            
            emit = feats[t].unsqueeze(-1).expand_as(score_t) #b*f-> b*f*f
            
            trans = self.transitions.unsqueeze(0).expand_as(score_t) #b*f*f
            
            score_t = log_sum_exp(score_t + emit + trans)
            
            score = score_t * mask_t + score * (1 - mask_t) #no updating in masked score,all b*f

        score = log_sum_exp(score)
        return score

    def cal_score(self, mask, feats, tag):
        score = Variable(torch.FloatTensor(BATCH_SIZE).fill_(0.).cuda()) # default requires_grad = false 
        
        temp_tag = Variable(tag.cuda()) # default requires_grad = false 
        mask_tensor = torch.transpose(torch.FloatTensor(mask), 0, 1) #seq*batch
        mask_tensor = Variable(mask_tensor.cuda()) # default requires_grad = false 
        
        
        for i, feat in enumerate(feats): #seq*batch*feat->batch*feat
            
            transit = torch.cat(
                [torch.tensor([self.transitions[temp_tag[batch][i + 1], temp_tag[batch][i]]]) for batch in range(BATCH_SIZE)])
            
            transit = transit.cuda()
            
            transit = transit * mask_tensor[i] #batch*batch->batch

            emit = torch.cat([feat[batch][temp_tag[batch][i + 1]].view(1, -1) for batch in range(BATCH_SIZE)]).squeeze(1)

            emit = emit * mask_tensor[i]#batch*batch->batch

            score = score + transit + emit

        return score

    def neg_log_likelihood(self, mask, sentence, tags, batch):

        feats = self._get_gru_features(batch, sentence)
        
        forward_score = self.for_score(mask, feats)

        gold_score = self.cal_score(mask, feats, tags)
        '''
        newt = self.transitions.data.cpu().numpy()
        newt[0,:] = 0
        newt[:,29] = 0
        newt[:,30] = 0
        x = np.tile(np.arange(1, 32), (31, 1))
        y = x.transpose()
        z = newt #for visdom
        print(z)
        
        
        x = np.tile(np.arange(1, 32), (31, 1))
        y = x.transpose()
        z = (x + y)/20
        
        # surface
        viz.surf(X=z, opts=dict(colormap='Hot'))
        '''
        return forward_score - gold_score

    def _viterbi_decode(self, mask, feats):
        backpointers = []

        # Initialize the viterbi variables in log space
        init_vvars = Variable(torch.full((1, 31), -10000.).cuda()) # default requires_grad = false 
        init_vvars[0][0] = 0

        # forward_var at step i holds the viterbi variables for step i-1
        forward_var = init_vvars

        for i, feat in enumerate(feats):
            if mask[i] == 0:
                print('breaked')
                break
            bptrs_t = []  # holds the backpointers for this step
            viterbivars_t = []  # holds the viterbi variables for this step

            for next_tag in range(31):
                next_tag_var = forward_var + self.transitions[next_tag]
                best_tag_id = argmax(next_tag_var)
                bptrs_t.append(best_tag_id)
                viterbivars_t.append(next_tag_var[0][best_tag_id].view(1))

            forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1)
            backpointers.append(bptrs_t)

        #terminal_var = forward_var  + self.transitions[self.tag_to_ix['stop_tag']]
        best_tag_id = argmax(forward_var) #not terminal_var
        path_score = forward_var[0][best_tag_id]

        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        # Pop off the start tag (we dont want to return that to the caller)
        start = best_path.pop()
        assert start == 0  # Sanity check
        best_path.reverse()
        return path_score, best_path

    def forward(self,batch, dummy_input, seq):  # dont confuse this with _forward_alg above.
        # Get the emission scores from the BiLSTM
        lstm_feats = self._get_gru_features(batch,dummy_input[1])
        
        lstm_feats = torch.transpose(lstm_feats,0,1)
        mask = dummy_input[0]
        #print(self.transitions)
        # Find the best path, given thue features.
        score, tag_seq = self._viterbi_decode(mask[seq], lstm_feats[seq])
        return score, tag_seq
    

In [9]:
class BiGru_(nn.Module):
    def __init__(self, tag_to_ix, hidden_dim):
        super(BiGru_CRF, self).__init__()

        self.gru = nn.GRU(100, 100, bidirectional=True) # default requires_grad = true
        self.hidden2tag = nn.Linear(200, 31) # default requires_grad = true
 

        self.tag_to_ix = tag_to_ix
        
    def _get_gru_features(self, batch, sentence_set):
        
        hidden = self.init_hidden(batch)
        gru_out, hidden = self.gru(sentence_set, hidden)

        gru_feats = self.hidden2tag(gru_out)

        return gru_feats


    def forward(self,batch, dummy_input, seq):  # dont confuse this with _forward_alg above.
        feats = self._get_gru_features(batch, sentence)
        
      
        return feats

In [10]:
class BiGru_CRF(nn.Module):
    def __init__(self, tag_to_ix, hidden_dim):
        super(BiGru_CRF, self).__init__()

        self.gru = nn.GRU(100, 100, bidirectional=True) # default requires_grad = true
        self.hidden2tag = nn.Linear(200, 31) # default requires_grad = true
        self.transitions = nn.Parameter(torch.randn(31, 31))# [a,b] trans from b to a,  requires_grad = true
        self.transitions.data[0, :] = -10000 #all to start
        self.transitions.data[:, 29] = -10000 #stop to all
        self.transitions.data[:, 30] = -10000 #pad to all
        self.transitions.data[30][30] = 0 #pad to pad
        self.transitions.data[29][30] = 0 #stop to pad
        
        self.hidden_dim = hidden_dim
        self.tag_to_ix = tag_to_ix
        

    def init_hidden(self,batch):
        return Variable(torch.zeros(2, batch, 100).cuda()) # default requires_grad = false 
    
    def _get_gru_features(self, batch, sentence_set):
        
        hidden = self.init_hidden(batch)
        gru_out, hidden = self.gru(sentence_set, hidden)

        gru_feats = self.hidden2tag(gru_out)

        return gru_feats

    def for_score(self, pre_mask, feats):
        
        score = Variable(torch.zeros((BATCH_SIZE, 31)).fill_(-10000.).cuda()) #default requires_grad = false 
        score[:, self.tag_to_ix['start_tag']] = 0. #start to all is 0
        
        mask = Variable(torch.Tensor(pre_mask).cuda()) # default requires_grad = false 
        
        for t in range(feats.size(0)):  # 안에서 연산하는데이터들은 batch*featuresize*featuresize
            
            mask_t = mask[:, t].unsqueeze(-1).expand_as(score) #batch_size -> batch_size*featuresize
            
            score_t = score.unsqueeze(1).expand(-1, *self.transitions.size()) #batch_size*f -> batch_size*f*f
            
            emit = feats[t].unsqueeze(-1).expand_as(score_t) #b*f-> b*f*f
            
            trans = self.transitions.unsqueeze(0).expand_as(score_t) #b*f*f
            
            score_t = log_sum_exp(score_t + emit + trans)
            
            score = score_t * mask_t + score * (1 - mask_t) #no updating in masked score,all b*f

        score = log_sum_exp(score)
        return score

    def cal_score(self, mask, feats, tag):
        score = Variable(torch.FloatTensor(BATCH_SIZE).fill_(0.).cuda()) # default requires_grad = false 
        
        temp_tag = Variable(tag.cuda()) # default requires_grad = false 
        mask_tensor = torch.transpose(torch.FloatTensor(mask), 0, 1) #seq*batch
        mask_tensor = Variable(mask_tensor.cuda()) # default requires_grad = false 
        
        
        for i, feat in enumerate(feats): #seq*batch*feat->batch*feat
            
            transit = torch.cat(
                [torch.tensor([self.transitions[temp_tag[batch][i + 1], temp_tag[batch][i]]]) for batch in range(BATCH_SIZE)])
            
            transit = transit.cuda()
            
            transit = transit * mask_tensor[i] #batch*batch->batch

            emit = torch.cat([feat[batch][temp_tag[batch][i + 1]].view(1, -1) for batch in range(BATCH_SIZE)]).squeeze(1)

            emit = emit * mask_tensor[i]#batch*batch->batch

            score = score + transit + emit

        return score

    def neg_log_likelihood(self, mask, sentence, tags, batch):

        feats = self._get_gru_features(batch, sentence)
        
        forward_score = self.for_score(mask, feats)

        gold_score = self.cal_score(mask, feats, tags)
        '''
        newt = self.transitions.data.cpu().numpy()
        newt[0,:] = 0
        newt[:,29] = 0
        newt[:,30] = 0
        x = np.tile(np.arange(1, 32), (31, 1))
        y = x.transpose()
        z = newt #for visdom
        print(z)
        
        
        x = np.tile(np.arange(1, 32), (31, 1))
        y = x.transpose()
        z = (x + y)/20
        
        # surface
        viz.surf(X=z, opts=dict(colormap='Hot'))
        '''
        return forward_score - gold_score

    def _viterbi_decode(self, mask, feats):
        backpointers = []

        # Initialize the viterbi variables in log space
        init_vvars = Variable(torch.full((1, 31), -10000.).cuda()) # default requires_grad = false 
        init_vvars[0][0] = 0

        # forward_var at step i holds the viterbi variables for step i-1
        forward_var = init_vvars

        for i, feat in enumerate(feats):
            if mask[i] == 0:
                #print('breaked')
                break
            bptrs_t = []  # holds the backpointers for this step
            viterbivars_t = []  # holds the viterbi variables for this step
            for next_tag in range(31):
                next_tag_var = forward_var + self.transitions[next_tag]
                best_tag_id = argmax(next_tag_var)
                bptrs_t.append(best_tag_id)
                viterbivars_t.append(next_tag_var[0][best_tag_id].view(1))
 
            forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1)
            backpointers.append(bptrs_t)

        #terminal_var = forward_var  + self.transitions[self.tag_to_ix['stop_tag']]
        best_tag_id = argmax(forward_var) #not terminal_var
        path_score = forward_var[0][best_tag_id]

        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        # Pop off the start tag (we dont want to return that to the caller)
        start = best_path.pop()
        assert start == 0  # Sanity check
        best_path.reverse()
        return path_score, best_path

    def forward(self,batch, dummy_input, seq):  # dont confuse this with _forward_alg above.
        # Get the emission scores from the BiLSTM
        lstm_feats = self._get_gru_features(batch,dummy_input[1])
        
        lstm_feats = torch.transpose(lstm_feats,0,1)
        mask = dummy_input[0]
        #print(self.transitions)
        # Find the best path, given thue features.
        score, tag_seq = self._viterbi_decode(mask[seq], lstm_feats[seq])
        return score, tag_seq
    

# preprocess function

In [11]:
def pad_cat_tag2(emotion, act): 
    i = 0
    new_tag = []
    decodesent = []
    while(i < len(emotion)): #BATCH_SIZE
        emo, lenge = sent_loader(emotion[i][0])
        ac, lenga = sent_loader(act[i][0])
        j = 0
        inte = []
        
        #inte.append(tag_to_ix['start_tag']) #append stop tag
        while(j < len(emo)-1): #sent length
            inte.append(int(emo[j]) * 4 + int(ac[j]))
            j = j + 1
        decodesent.append(int(emo[j]) * 4 + int(ac[j])) #<-for decoding sent
        #inte.append(tag_to_ix['stop_tag']) #append stop tag
        torch_inte = torch.tensor(inte)
        new_tag.append(torch_inte) #str to int
        i = i + 1

            
    padded_tag = pad_sequence(new_tag, batch_first = True, padding_value = tag_to_ix['pad_tag'])
    
    #emotion+action string -> emotion+action numb + padding
    #batch*tag
    return padded_tag, decodesent

def pad_batch2(minibatch):
    i = 0
    new_batch = []
    decode_newbatch = []
    leng_set = []
    decode_lengset = []
    maxleng = 1
    sentnum_per_dialogue = []
    while(i < len(minibatch)): #almost equal to BATCH_SIZE
        j = 0
        temp = []
        sentnum_per_dialogue.append(len(minibatch[i].Text)-2) #-stoptag-lastsent
        #save sentnum per dialogue
        while(j < len(minibatch[i].Text)-2):#-lastsent
            sent, leng = sent_loader(minibatch[i].Text[j])  
            
            #convert text to word_list, word_list_length
            leng_set.append(leng)
            if leng > maxleng:
                maxleng = leng

            temp.append(sent)
            j = j + 1
        sent, leng = sent_loader(minibatch[i].Text[j])
        decode_newbatch.append(sent)
        decode_lengset.append(leng)
        #temp.append(["<stop_tag>"])
        #leng_set.append(1) #stoptag
        new_batch.append(temp)

        i = i + 1

    i = 0
    while (i < len(minibatch)): #almost equal to BATCH_SIZE
        j = 0
        while (j < len(new_batch[i])):

            while(len(new_batch[i][j]) < maxleng):
                new_batch[i][j] = np.append(new_batch[i][j],"<pad>")

            j = j + 1
        i = i + 1
        
    i = 0
    while (i < len(minibatch)): #almost equal to BATCH_SIZE
        
        while(len(decode_newbatch[i]) < maxleng):
            decode_newbatch[i] = np.append(decode_newbatch[i],"<pad>")


        i = i + 1
    #batch * sentnum * (word_list+pad) -> new_batch
    #batch * sentnum * (word_list_length) -> leng_set
    #batch * (sentnum) -> sentnum_per_dialogue

    return new_batch, leng_set, sentnum_per_dialogue, decode_newbatch, decode_lengset


In [12]:
def new_preprocess(sent, batch_data):

    
    #sorted with dialogue length
    #print(batch_data[0].Text)
    #######################################################
    emotion_set = []
    action_set = []
    i = 0
    while(i < len(batch_data)): #equals to BATCH_SIZE except last dataset
        emotion_set.append(batch_data[i].labels_1)
        
        action_set.append(batch_data[i].labels_2)
        
        i = i + 1
    
    new_tag, decodetag = pad_cat_tag2(emotion_set, action_set) #in new preprocess, remake pad cat tag to split last sent
    '''
    new_tag representation 0 = start_tag, 29 = stop_tag, 30 = pad_tag
    
    tensor([   2,   1,   2,   1,   2,   1,   2,   1,   2,   1]) except1
    tensor([   2,   1,   2,   1,   2,   1,   2,   1,   2,   1]) except1
    tensor([  21,   1,   1,   1,  30,  30,  30,  30,  30,  30]) except1
    tensor([   2,   1,   2,   1,  30,  30,  30,  30,  30,  30]) except1
    tensor([   1,   1,   1,   1,  30,  30,  30,  30,  30,  30]) except1
    tensor([   2,   1,   2,   1,  30,  30,  30,  30,  30,  30]) except1
    tensor([  17,  17,  17,  30,  30,  30,  30,  30,  30,  30]) except17
    tensor([   1,   1,   1,  30,  30,  30,  30,  30,  30,  30]) ..
    tensor([   3,   2,  30,  30,  30,  30,  30,  30,  30,  30]) ..
    tensor([  17,  17,  30,  30,  30,  30,  30,  30,  30,  30]) ..
    tensor([   2,   2,  30,  30,  30,  30,  30,  30,  30,  30]) ..
    
    decodesent representation
    [1, 1, 1, 1, 1, 1, 17, 1, 2, 17, 2...] len = batchsize
    
    '''
   
    
    #batch*tag
    new_tag = Variable(new_tag.cuda())# default requires_grad = false
    
    #####################################################tag_preprocess
    
    
    new, all_seq_len, sentnum_per_batch, dn, dl = pad_batch2(batch_data) #in new preprocess, remake pad batch to split last sent
    #batch * sentnum * (word_list+pad) -> new
    #batch * sentnum * (word_list_length) -> all_seq_len
    #batch * (sentnum) -> sentnum_per_batch
    #batch * (word_list + pad) -> dn
    #batch * leng -> dl
    '''
    print(dn[0])
    print(dn[1])
    print(dn[2])
    print(dl)
    
    ['Thank' 'you' 'so' 'much' '.' 'You' 'guys' 'are' 'really' 'responsible'
     '.' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>']
    ['Alright' ',' 'please' 'show' 'me' 'what' 'you' 'have' '.' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>']
    ['Bye' '!' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>'
     '<pad>']
    [11, 9, 2, 3, 12, 15, 2, 5, 20, 24, 6, 12, 10, 4, 4, 11, 6, 12, 7, 6, 15, 
    16, 12, 2, 17, 4, 7, 2, 2, 12, 20, 9, 2, 6, 9, 4, 21, 3, 7, 5, 4, 18, 7, 
    7, 10, 19, 7, 31, 9, 7, 7, 4, 12, 4, 4, 15, 4, 12, 9, 6, 2, 2, 6, 10, 16, 
    4, 6, 7, 5, 22, 9, 11, 11, 10, 4, 4, 2, 2, 4, 17, 15, 5, 16, 14, 5, 3, 22, 
    2, 2, 20, 7, 2, 2, 3, 2, 3, 6, 6, 5, 5]
    '''
    
    
    '''
    new[0]
    [array(['Is', 'this', 'your', 'new', 'teacher', '?', '<pad>'], dtype='<U32'), 
      array(['Yes', ',', 'it', 'is', '.', '<pad>', '<pad>'], dtype='<U32'), 
      array(['Is', 'she', 'short', '?', '<pad>', '<pad>', '<pad>'], dtype='<U32'), 
      array(['No', ',', 'she', '’', 's', 'average', '.'], dtype='<U32'), 
      array(['What', 'color', 'are', 'her', 'eyes', '?', '<pad>'], dtype='<U32'), 
      array(['They', '’', 're', 'dark', 'gray', '.', '<pad>'], dtype='<U32'), 
      array(['What', 'color', 'is', 'her', 'hair', '?', '<pad>'], dtype='<U32'),
      array(['It', '’', 's', 'blond', '.', '<pad>', '<pad>'], dtype='<U32'), 
      array(['And', 'how', 'old', 'is', 'she', '?', '<pad>'], dtype='<U32'), 
      array(['I', 'don', '’', 't', 'know', '.', '<pad>'], dtype='<U32'), 
      array(['<stop_tag>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'], dtype='<U32'), 
      
    all_seq_len[0]
    [6, 5, 4, 7, 6, 6, 6, 5, 6, 6, 1, 
    6, 5, 4, 7, 6, 6, 6, 5, 6, 6, 1, 
    6, 5, 7, 2, 1, 
    5, 6, 5, 6, 1, 
    4, 4, 6, 4, 1, 
    6, 5, 5, 5, 1, 
    4, 4, 3, 1, 
    5, 5, 4, 1, 
    3, 3, 1, 
    3, 2, 1, 
    3, 2, 1, 
    3, 3, 1, 
    4, 2, 1, 
    4, 2, 1, 
    3, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    3, 3, 1, 
    4, 2, 1, 
    3, 3, 1, 
    5, 4, 1, 
    4, 3, 1, 
    .
    .
    .
    sentnum_per_batch
    [11, 11, 5, 5, 5, 5, 4, 4, 3, 3, 3, 3, .....]
    
    '''
    
    
    new2 = batch_numerical(new)
    #batch * sent_num * sent_leng * wv -> new2

    
    sentbatch_len, for_sentmodel = make_batch2sent(new2)
    #batch * sent_num * sent_leng * wv -> all_sent_num(new_batch) * sent_leng * wv
    #for_sentmodel2 -> torch.Size([326, 7, 100])
    #sentbatch_len -> 326
    
    hidden_state = torch.tensor(np.zeros((2, sentbatch_len, 100)), dtype=torch.float, device= device, requires_grad=False)
    for_sentmodel2 = torch.tensor(np.transpose(for_sentmodel, [1, 0, 2]), dtype=torch.float, device= device, requires_grad=False)
    
    #for_sentmodel2 -> torch.Size([7, 326, 100])
    
    pre_crf_gru = sent(for_sentmodel2, hidden_state, all_seq_len) 
    '''
    print(pre_crf_gru[0])
    print(pre_crf_gru[1])
    print(pre_crf_gru[2])
    print(pre_crf_gru[3])
    print(pre_crf_gru[4])
    print(pre_crf_gru[5])
    print(pre_crf_gru[6])
    print(pre_crf_gru[7])
    print(pre_crf_gru[8])
    print(pre_crf_gru[9])
    '''
    #pre_crf_gru -> torch.Size([326, 100])
    
    #################################################sent network
    #all_sent_num(new_batch) * sent_leng * wv -> all_sent_num(new_batch) * wv
    
    
    
    last_var = torch.split(pre_crf_gru, sentnum_per_batch)
    #all_sent_num(new_batch) * wv -> batch * sent_num * wv
    '''
    last_var
    
    [11,100]
    [11,100]
    [5,100]
    [5,100]
    [5,100]
    .
    .
    '''
    new_dial, dial_leng = pad_dial(last_var) 
    #batch * sent_num * wv -> batch * (sent_num + pad) * wv
    #save dial_leng for masking
    
    '''
    new_dial
    
    [11,100]
    [11,100]
    [11,100]
    [11,100]
    [11,100]
    .
    .
    '''
    
    
    
    #new_dial = torch.transpose(new_dial, 0, 1)
    
    

    return new_dial, new_tag, dial_leng, dn, dl, decodetag

In [13]:
def all_preprocess(sent, batch_data):

    
    #sorted with dialogue length
    #print(batch_data[0].Text)
    #######################################################
    emotion_set = []
    action_set = []
    i = 0
    while(i < len(batch_data)): #equals to BATCH_SIZE except last dataset
        emotion_set.append(batch_data[i].labels_1)
        
        action_set.append(batch_data[i].labels_2)
        
        i = i + 1
    
    new_tag = pad_cat_tag(emotion_set, action_set) #in new preprocess, remake pad cat tag to split last sent
    '''
    new_tag representation 0 = start_tag, 29 = stop_tag, 30 = pad_tag
    
    tensor([  0,   2,   1,   2,   1,   2,   1,   2,   1,   2,   1,  29])
    tensor([  0,   2,   1,   2,   1,   2,   1,   2,   1,   2,   1,  29])
    tensor([  0,  21,   1,   1,   1,  29,  30,  30,  30,  30,  30,  30])
    tensor([  0,   2,   1,   2,   1,  29,  30,  30,  30,  30,  30,  30])
    tensor([  0,   1,   1,   1,   1,  29,  30,  30,  30,  30,  30,  30])
    tensor([  0,   2,   1,   2,   1,  29,  30,  30,  30,  30,  30,  30])
    tensor([  0,  17,  17,  17,  29,  30,  30,  30,  30,  30,  30,  30])
    tensor([  0,   1,   1,   1,  29,  30,  30,  30,  30,  30,  30,  30])
    tensor([  0,   3,   2,  29,  30,  30,  30,  30,  30,  30,  30,  30])
    tensor([  0,  17,  17,  29,  30,  30,  30,  30,  30,  30,  30,  30])
    tensor([  0,   2,   2,  29,  30,  30,  30,  30,  30,  30,  30,  30])
    '''
    #batch*tag
    new_tag = Variable(new_tag.cuda())# default requires_grad = false
    
    #####################################################tag_preprocess
    
    
    new, all_seq_len, sentnum_per_batch = pad_batch(batch_data) #in new preprocess, remake pad batch to split last sent
    #batch * sentnum * (word_list+pad) -> new
    #batch * sentnum * (word_list_length) -> all_seq_len
    #batch * (sentnum) -> sentnum_per_batch
    '''
    new[0]
    [array(['Is', 'this', 'your', 'new', 'teacher', '?', '<pad>'], dtype='<U32'), 
      array(['Yes', ',', 'it', 'is', '.', '<pad>', '<pad>'], dtype='<U32'), 
      array(['Is', 'she', 'short', '?', '<pad>', '<pad>', '<pad>'], dtype='<U32'), 
      array(['No', ',', 'she', '’', 's', 'average', '.'], dtype='<U32'), 
      array(['What', 'color', 'are', 'her', 'eyes', '?', '<pad>'], dtype='<U32'), 
      array(['They', '’', 're', 'dark', 'gray', '.', '<pad>'], dtype='<U32'), 
      array(['What', 'color', 'is', 'her', 'hair', '?', '<pad>'], dtype='<U32'),
      array(['It', '’', 's', 'blond', '.', '<pad>', '<pad>'], dtype='<U32'), 
      array(['And', 'how', 'old', 'is', 'she', '?', '<pad>'], dtype='<U32'), 
      array(['I', 'don', '’', 't', 'know', '.', '<pad>'], dtype='<U32'), 
      array(['<stop_tag>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'], dtype='<U32'), 
      
    all_seq_len[0]
    [6, 5, 4, 7, 6, 6, 6, 5, 6, 6, 1, 
    6, 5, 4, 7, 6, 6, 6, 5, 6, 6, 1, 
    6, 5, 7, 2, 1, 
    5, 6, 5, 6, 1, 
    4, 4, 6, 4, 1, 
    6, 5, 5, 5, 1, 
    4, 4, 3, 1, 
    5, 5, 4, 1, 
    3, 3, 1, 
    3, 2, 1, 
    3, 2, 1, 
    3, 3, 1, 
    4, 2, 1, 
    4, 2, 1, 
    3, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    3, 3, 1, 
    4, 2, 1, 
    3, 3, 1, 
    5, 4, 1, 
    4, 3, 1, 
    .
    .
    .
    sentnum_per_batch #contain stop tag
    [11, 11, 5, 5, 5, 5, 4, 4, 3, 3, 3, 3, .....]
    
    '''
    
    
    new2 = batch_numerical(new)
    #batch * sent_num * sent_leng * wv -> new2

    
    sentbatch_len, for_sentmodel = make_batch2sent(new2)
    #batch * sent_num * sent_leng * wv -> all_sent_num(new_batch) * sent_leng * wv
    #for_sentmodel2 -> torch.Size([326, 7, 100])
    #sentbatch_len -> 326
    
    hidden_state = torch.tensor(np.zeros((2, sentbatch_len, 100)), dtype=torch.float, device= device, requires_grad=False)
    for_sentmodel2 = torch.tensor(np.transpose(for_sentmodel, [1, 0, 2]), dtype=torch.float, device= device, requires_grad=False)
    
    #for_sentmodel2 -> torch.Size([7, 326, 100])
    
    pre_crf_gru = sent(for_sentmodel2, hidden_state, all_seq_len) 
    '''
    print(pre_crf_gru[0])
    print(pre_crf_gru[1])
    print(pre_crf_gru[2])
    print(pre_crf_gru[3])
    print(pre_crf_gru[4])
    print(pre_crf_gru[5])
    print(pre_crf_gru[6])
    print(pre_crf_gru[7])
    print(pre_crf_gru[8])
    print(pre_crf_gru[9])
    '''
    #pre_crf_gru -> torch.Size([326, 100])
    
    #################################################sent network
    #all_sent_num(new_batch) * sent_leng * wv -> all_sent_num(new_batch) * wv
    
    
    
    last_var = torch.split(pre_crf_gru, sentnum_per_batch)
    #all_sent_num(new_batch) * wv -> batch * sent_num * wv
    '''
    last_var
    
    [11,100]
    [11,100]
    [5,100]
    [5,100]
    [5,100]
    .
    .
    '''
    new_dial, dial_leng = pad_dial(last_var) 
    #batch * sent_num * wv -> batch * (sent_num + pad) * wv
    #save dial_leng for masking
    
    '''
    new_dial
    
    [11,100]
    [11,100]
    [11,100]
    [11,100]
    [11,100]
    .
    .
    '''
    
    
    
    new_dial = torch.transpose(new_dial, 0, 1)
    
    

    return new_dial, new_tag, dial_leng

# train function

In [14]:
def loss_filtering(loss_arr, filtering_value, newary, batchnum):
    """
    function for prevent overfitting
    
    Args:
        loss_arr:
            loss array for batch data
        
        filtering_value:
            allowed maximum loss
        
        newary:
            index for big loss data
            
        batchnum:
            current batch count
        
        
    Yields:
        loss_arr:
            filtered loss array for batch data
    
    """
    i = 0
    err_count = 0
    while i < len(loss_arr):
        if loss_arr[i] < filtering_value:
            loss_arr[i] = 0
        elif loss_arr[i] > (filtering_value*4):
            err_count = err_count + 1
            newary.append(i + batchnum*100)
        i = i + 1
    print ("###############################################errcount",err_count)
    return loss_arr, newary

In [15]:
def train_func(train_data, shared_model , comp_model, dataseq, filtering_value, iter_num, batch_size, learning_rate):
    """
    Args:
            train_data: 
                train data
                
            shared_model:
                shared model
                
            comp_model:
                comp model
                
            dataseq:
                data sort sequence
                
            filtering_value:
                allowed maximum loss
                
            iter_num:
                train iterate
                
            batch_size:
                batchsize
            
            learning_rate:
                learning_rate

    Yields:
            newary:
                not trained data
                which has loss bigger then filtering value
    
    """
    
    optimizer1 = optim.SGD(shared_model.parameters(), lr= learning_rate, weight_decay=1e-4)
    optimizer2 = optim.SGD(comp_model.parameters(), lr= learning_rate, weight_decay=1e-4)
    
    newary_ = []
    k = 0
    for batch_data in batchload(train_data, repeat=True, batchsize = batch_size, data_seq = dataseq ):
        #load txt data from jsonfile

        shared_model.zero_grad()
        comp_model.zero_grad()

        new_dial, new_tag, dial_leng = all_preprocess(shared_model, batch_data)
        #load batch*(dialogue_length*sent_vec(float)) -> new_dial
        #load batch*tag -> new_tag
        #load batch * dial_leng

        loss = comp_model.neg_log_likelihood(make_mask(dial_leng), new_dial, new_tag, BATCH_SIZE)
        loss,newary_ = loss_filtering(loss,filtering_value, newary_,k)
        batch_loss = torch.sum(loss)
        batch_loss.backward(retain_graph=False)
        optimizer1.step()
        optimizer2.step()

        unuselist = [new_dial, new_tag, dial_leng]
        del unuselist
        
        k = k + 1
        print(k)
        if k%10 != 0:
            torch.save(shared_model.state_dict(),working_path + 'parameter/shared.pth')
            torch.save(comp_model.state_dict(),working_path + 'parameter/crf_gru.pth') #3.53 save with dummy
            dummy_input = [make_mask(dial_leng),new_dial]
            
            print("tag = ",new_tag[7])
            print("expect = ",comp_model(BATCH_SIZE,dummy_input,seq=7)[1])
            print("accuracy = ", cal_accuracy(comp_model(BATCH_SIZE,dummy_input,seq=7)[1],new_tag[7]))
            
            print(loss)
        if k == int(len(train_data)/BATCH_SIZE)*iter_num:
            break
            
        if k % int(len(train_data)/BATCH_SIZE) == 0:
            newary = newary_
            newary_ = []
    print(newary)
    
    return newary

# Test function

In [16]:
def cal_accuracy(model_predict, real_tag):
    '''
    Args:
        model_predict
            model predicted tags
        real_tag
            real tags
        tag_len
            tag len
        
    Yields:
        accuracy
        
    Example:
        
    real = torch.tensor([  0,   2,   1,   2,   1,   1,   1,   1,   1,   2,   1,   1, 1,   1,  29], device='cuda:0')
    model = [2, 1, 2, 1, 17, 1, 17, 1, 3, 1, 17, 1, 17, 29]
    taglen = len(model)
    
    (npreal[tagseq+1]//4) real emotion
    (model_predict[tagseq]//4) model emotion
    
    (npreal[tagseq+1]%4) real action
    (model_predict[tagseq]%4) model action
    
    emotion err = 0.3076923076923077
    action err = 0.07692307692307693
    accuracy = 0.8076923076923077
    '''
    tag_len = len(model_predict)
    npreal = real_tag.cpu().numpy()
    tagseq = 0
    emotiontag = []
    actiontag = []
    emoerr = 0
    acterr = 0
    while(tagseq < tag_len-1):
      
        emotiontag = np.append(emotiontag, npreal[tagseq+1]//4)
        actiontag = np.append(actiontag, npreal[tagseq+1]%4)
        
        
        if((npreal[tagseq+1]//4) != (model_predict[tagseq]//4)):
            emoerr = emoerr + 1

        
        if((npreal[tagseq+1]%4) != (model_predict[tagseq]%4)):
            acterr = acterr + 1

        tagseq = tagseq + 1

    return(1-(emoerr/tagseq + acterr/tagseq)/2)
    
    

In [17]:
def test_func(test_data, shared_model , comp_model, dataseq, batch_size):
    """
    Args:
            test_data: 
                test data
                
            shared_model:
                shared model
                
            comp_model:
                comp model
                
            dataseq:
                data sort sequence
                
            batch_size:
                batchsize
            


    Yields:
            accuracy:
                accuracy of all data
    
    """
    

    k = 0
    accuracy = 0
    with torch.no_grad():
        for batch_data in batchload(test_data, repeat=True, batchsize = batch_size, data_seq = dataseq ):
            #load txt data from jsonfile


            new_dial, new_tag, dial_leng = all_preprocess(shared_model, batch_data)
            #load batch*(dialogue_length*sent_vec(float)) -> new_dial
            #load batch*tag -> new_tag
            #load batch * dial_leng

            unuselist = [new_dial, new_tag, dial_leng]
            del unuselist
            dummy_input = [make_mask(dial_leng),new_dial]

            print("tag = ",new_tag[7])
            print("expect = ",comp_model(batch_size,dummy_input,seq=7)[1])
            print("accuracy = ", cal_accuracy(comp_model(batch_size,dummy_input,seq=7)[1],new_tag[7]))

            tag_num = 0
            batch_acc = 0
            while(tag_num < batch_size):
                batch_acc = batch_acc + cal_accuracy(comp_model(batch_size,dummy_input,seq=tag_num)[1],new_tag[tag_num])
                tag_num = tag_num + 1
            batch_acc = batch_acc/batch_size
            accuracy = accuracy + batch_acc
            print("batchacc = ",batch_acc)
            if k == int(len(test_data)/batch_size):
                break

            k = k + 1
            print(k)

    
    return accuracy*batch_size/len(test_data)

In [18]:
def encoder_mask(leng):
    '''
    make one-hot vector of mask from lengset
    '''

    var = np.zeros(shape = (len(leng), leng[0])) #len(leng) = BATCH_SIZE, leng[0]+1= largest dialogue + stop
    i = 0
    while(i < len(leng)):#BATCH_SIZE
        j = 0
        while(j < leng[0]): 
            if(j == leng[i]-1): # <= stop tag
                var[i][j] = 1
            
            j = j + 1
        
        i = i + 1

    return var

def decoder_mask(leng, maxsize):
    '''
    make one-hot vector of mask from lengset
    '''

    var = np.zeros(shape = (len(leng), maxsize)) #len(leng) = BATCH_SIZE, leng[0]+1= largest dialogue + stop
    i = 0
    while(i < len(leng)):#BATCH_SIZE
        j = 0
        while(j < maxsize): 
            if(j < leng[i]): # <= stop tag
                var[i][j] = 1
            
            j = j + 1
        
        i = i + 1

    return var

In [19]:
hidden_state_size = 100
batch_size = 100

class encoder(nn.Module):
    def __init__(self, hidden_state_size, is_tag_, tag_size, bidir):
        super(encoder, self).__init__()
        self.h_size = hidden_state_size
        self.is_tag = is_tag_
        self.t_size = tag_size
        self.bid = bidir
        self.gru = nn.GRU(self.h_size, self.h_size, bidirectional = self.bid)
        self.embed_tag = nn.Embedding(self.t_size, self.h_size)
        self.combinput = nn.Linear(2*self.h_size, self.h_size)
        self.comblast_t = nn.Linear(3*self.h_size, 2*self.h_size)
        
    def forward(self, input_, input_tag, de_len, last_tag):
        hidden = self.init_hidden()
        de_len = Variable(torch.tensor(de_len).cuda())
        last_tag = Variable(torch.tensor(last_tag).cuda())
        if self.is_tag == False:
            output, hidden_state = self.gru(Variable(input_.cuda()), hidden)
        else:
            emb_tag = self.embed_tag(Variable(input_tag.cuda()))
            newinput = torch.cat((emb_tag, input_),2)
            input_ = self.combinput(newinput)
            input_ = torch.transpose(input_, 0, 1)
            
            output, hidden_state = self.gru(input_, hidden) #outputsize [19, 100, 200] hiddensize [2, 100, 100]
            len_info = torch.tensor(de_len)
            len_info = torch.unsqueeze(len_info,2).type(torch.cuda.FloatTensor)
            output = torch.transpose(output, 0, 1)
            new_output = torch.mul(output,len_info)
            new_output = torch.sum(new_output,1) #100,200
  
            new_last_tag = self.embed_tag(last_tag) #100,100
 
            cat = torch.cat((new_output, new_last_tag),1) #300,100

            lastoutput = self.comblast_t(cat)
            lastoutput = torch.reshape(lastoutput, (100, 2, 100))
            lastoutput = torch.transpose(lastoutput, 0, 1).contiguous()
            
            #not hidden state. output post processing need
        return lastoutput
    
    def init_hidden(self):
        return Variable(torch.zeros(2,100,100,device= device))
    
    
    
class decoder(nn.Module): #target padding sos
    def __init__(self, hidden_state_size, bidir):
        super(decoder, self).__init__()
        self.bid = bidir
        self.h_size = hidden_state_size
        self.comboutput = nn.Linear(2*self.h_size, self.h_size)
        self.gru = nn.GRU(self.h_size, self.h_size, bidirectional = self.bid)
        
    def forward(self, new_input, pre_hidden_state):
        input_ = new_input #1,100,100
        hidden_state = pre_hidden_state #2,100,100
        output, hidden_state = self.gru(input_, hidden_state)
        output = self.comboutput(output)
        return output, hidden_state
    
    def init_hidden(self):
        return Variable(torch.zeros(2,100,100,device= device))

    
    
encoder1 = encoder(hidden_state_size,
                   is_tag_ = False,
                   tag_size = 31, #???
                   bidir = True
                  ).cuda()

encoder2 = encoder(hidden_state_size,
                   is_tag_ = True,
                   tag_size = 31, 
                   bidir = True
                  ).cuda()

decoder1 = decoder(hidden_state_size,
                   bidir = True
                  ).cuda()

# generate

In [20]:
def dialogue_maxlen(data):
    '''
    used in lamda function
    
    return longest sentence len per dialogue
    '''
    i = 0
    maxleng = 0
    while(i < len(data.Text)): # len(data.Text) = dialogue length
        
        text, leng = sent_loader(data.Text[i])
        if leng > maxleng:
            maxleng = leng
        i = i + 1
    return maxleng

def dialogue_maxlen_per_batch(batch_len, batchdata):
    i = 0
    maxlen = 1
    while(i < batch_len):

        if dialogue_maxlen(batch_data[i]) > maxlen:
            maxlen = dialogue_maxlen(batch_data[i])
        i = i + 1
    return maxlen

my_fields={'dial': ('Text', data.Field(sequential=True)),
        'emo': ('labels_1', data.Field(sequential=False)),
        'act': ('labels_2', data.Field(sequential=False))}

train = my_TabularDataset.splits(path = working_path, train = 'data_jsonfile/full_data.json',
                          fields=my_fields) 
train = sorted(train, key = lambda  x: dialogue_maxlen(x))
train = train[:-1118] #exclude dialogue which has extremely long sentence (0~11117 => 0~9999)
train = sorted(train, key = lambda  x: -len(x.Text)) #reordering training dataset with number of sentences
# low index has much sentence because afterwards we use torch pad_sequence
dataseq = torch.arange(end = len(train),dtype=torch.int)


train_data = train
#shared_model = sent_to_vextor_bigru_net
#comp_model = my_grucrf_model
dataseq = dataseq
filtering_value = 3
iter_num = 1
batch_size = 100
learning_rate = 0.00003

sent_to_vector_bigru_net = makesent_gru(100, True).cuda()


In [21]:
def makewv(target):
        targetwv = []
        batchnum = 0
        while(batchnum < batch_size):
            wv = numerize_sent(target[batchnum],len(target[batchnum]))
            targetwv.append(wv)
            
            batchnum = batchnum + 1
        return targetwv

In [22]:
k = 1
for batch_data in batchload(train_data, repeat=True, batchsize = batch_size, data_seq = dataseq ):
    #load txt data from jsonfile
    print(k)

    '''
    batch_data[0].labels_1
    ['0 0 0 0 0 0 0 0 0 0 0 0 4 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 4']
    ['1 2 3 4 1 2 2 2 2 2 1 1 1 2 1 3 4 2 1 2 1 2 1 2 1 1 1 2 2 2 1 3 4 1 1']
    
    '''
    
    k = k + 1
    if k == 3:
        sent_to_vector_bigru_net.zero_grad()
    
    
        new_dial, new_tag, dial_leng, de_sent, de_len, de_tag = new_preprocess(sent_to_vector_bigru_net, 
                                                                                    batch_data) #mask??
        
        
        '''
        encoder_mask(dial_leng)
        [[0. 0. 0. ... 0. 0. 1.]
         [0. 0. 0. ... 0. 0. 1.]
         [0. 0. 0. ... 0. 0. 1.]
         ...
         [0. 0. 0. ... 1. 0. 0.]
         [0. 0. 0. ... 1. 0. 0.]
         [0. 0. 0. ... 1. 0. 0.]]
        torch.sum(torch.tensor(encoder_mask(dial_leng)),1) 
        1111111111...
        
        '''
        
        
        decoder_hidden = encoder2(new_dial, new_tag, encoder_mask(dial_leng), de_tag) #2,100,100
        
        decoder_sent = 0
        all_output = Variable(torch.zeros(1,100,100,device= device))
        out = Variable(torch.zeros(1,100,100,device= device))
        seq = 0
        while(decoder_sent < dialogue_maxlen_per_batch(batch_size, batch_data)): #all_output = seq*batch*hidden
            out, decoder_hidden = decoder1(out, decoder_hidden)
            decoder_sent = decoder_sent + 1
            if seq != 0:
                all_output = torch.cat((all_output,out),0)
            else:
                all_output = out
            seq = seq + 1
        de_mask = decoder_mask(de_len,dialogue_maxlen_per_batch(batch_size, batch_data)) #100,44
        demask = torch.tensor(de_mask)
        demask = torch.unsqueeze(demask,2).type(torch.cuda.FloatTensor)
        demask = torch.transpose(demask, 0, 1)
        
        all_output = torch.mul(demask,all_output)
        
        targetwv = makewv(de_sent)
        targetwv = torch.transpose(torch.tensor(targetwv), 0, 1).type(torch.cuda.FloatTensor)
        
        loss = nn.MSELoss()
        
        mseloss = loss(all_output, targetwv)
        mseloss.backward()
        print(np.shape(targetwv))
        print(all_output.size())
        
        
        #newout, newhidden = decoder(out) #newout, newdial loss
        
        break
    '''
    
    
    shared_model.zero_grad()
    comp_model.zero_grad()

    new_dial, new_tag, dial_leng = all_preprocess(shared_model, batch_data) #we need split batch_data
    #load batch* (dialogue_length*sent_vec(float)) -> new_dial
    #load batch* tag -> new_tag
    #load batch* dial_leng

    loss = comp_model.neg_log_likelihood(make_mask(dial_leng), new_dial, new_tag, BATCH_SIZE)
    loss,newary_ = loss_filtering(loss,filtering_value, newary_,k)
    batch_loss = torch.sum(loss)
    batch_loss.backward(retain_graph=False)
    optimizer1.step()
    optimizer2.step()

    unuselist = [new_dial, new_tag, dial_leng]
    del unuselist
    '''

batchnum =  100
1
batchnum =  200
2
torch.Size([44, 100, 100])
torch.Size([44, 100, 100])


In [23]:
x = torch.randn(2, 3, 4)
y = torch.randn(2, 3)
print(x)
print(y)
torch.mul(x,y)

tensor([[[-0.3534, -0.0571, -0.6270,  1.0470],
         [ 1.8247,  0.1644,  1.4346, -0.6674],
         [-0.4834,  0.6080, -0.2463, -1.0291]],

        [[ 1.6242, -0.0439, -1.2608,  0.0611],
         [-0.9631,  0.3465, -1.0810, -0.5787],
         [ 0.2873, -0.7777,  0.0336, -0.4647]]])
tensor([[-1.1274,  0.7311, -0.1448],
        [-1.1653, -0.0840, -1.0307]])


RuntimeError: The size of tensor a (4) must match the size of tensor b (3) at non-singleton dimension 2

In [None]:
np.shape(torch.cat((x, x, x), 0))

In [None]:
np.shape(torch.cat((x, x, x), 1))

In [None]:
np.shape(torch.cat((x, x, x), 2))

In [None]:
a = torch.randn(4, 3)
torch.sum(a,0)