# import package

In [1]:
import numpy as np
import re
import json
import torch.utils.data
from torch.utils.data import DataLoader
import io,os
from torch import nn
from gensim.models import word2vec
from torch.nn.utils.rnn import pad_sequence
from torch import optim
from torch.autograd import Variable
from torchtext import data

# Global Variables

In [2]:
BATCH_SIZE = 100
device = torch.device("cuda")

tag_to_ix = {'start_tag':0,'stop_tag':29,'pad_tag':30}

working_path = '/home/jongsu/jupyter/pytorch_dialogue_ie/'
WV_PATH = '/home/jongsu/jupyter/pytorch_dialogue_ie/parameter/dialogue_wv'


# basic functions

In [3]:
def argmax(vec):
    '''
    return the argmax as a python int
    '''
    
    _, idx = torch.max(vec, 1)
    return idx.item()


def sent_loader(sentence): 
    '''
    pre_process per sentence
    '''
    
    result = []
    for elem in sentence.split(' '):
        if elem != '':
            result = np.append(result, elem)
    return result,len(result)

def cal_dialogue(data):
    '''
    used in lamda function
    
    return longest sentence len per dialogue
    '''
    i = 0
    maxleng = 0
    while(i < len(data.Text)): # len(data.Text) = dialogue length
        if len(data.Text[i]) > maxleng:
            maxleng = len(data.Text[i])
        i = i + 1
    return maxleng

def batchload(dataset, repeat, batchsize, data_seq):
    '''
    load data as much as batch
    
    Args:
    
        dataset:
            data to load
        repeat:
            True if repeat load data
        batchsize:
            batchsize
        data_seq:
            order of data to load
        
    Yields:
    
        Batch data
    
    '''
    
    while True:
        i = batchsize
        while(i <= len(data_seq)):
            batch = []
            batch_seq = 0
            batchnum = data_seq[i-batchsize:i]
            
            while(batch_seq < batchsize):
                batch.append(dataset[batchnum[batch_seq]])
                batch_seq = batch_seq + 1
            print("batchnum = ",i)
            yield batch
            i = i + batchsize
        
        if repeat == False:
            break

    
def pad_batch(minibatch):
    i = 0
    new_batch = []
    leng_set = []
    maxleng = 1
    sentnum_per_dialogue = []
    while(i < len(minibatch)): #almost equal to BATCH_SIZE
        j = 0
        temp = []
        sentnum_per_dialogue.append(len(minibatch[i].Text)) #stoptag
        #save sentnum per dialogue
        while(j < len(minibatch[i].Text)-1):
            sent, leng = sent_loader(minibatch[i].Text[j])  
            
            #convert text to word_list, word_list_length
            leng_set.append(leng)
            if leng > maxleng:
                maxleng = leng

            temp.append(sent)
            j = j + 1
        
        temp.append(["<stop_tag>"])
        leng_set.append(1) #stoptag
        new_batch.append(temp)

        i = i + 1

    i = 0
    while (i < len(minibatch)): #almost equal to BATCH_SIZE
        j = 0
        while (j < len(new_batch[i])):

            while(len(new_batch[i][j]) < maxleng):
                new_batch[i][j] = np.append(new_batch[i][j],"<pad>")

            j = j + 1
        i = i + 1
    #batch * sentnum * (word_list+pad) -> new_batch
    #batch * sentnum * (word_list_length) -> leng_set
    #batch * (sentnum) -> sentnum_per_dialogue

    return new_batch, leng_set ,sentnum_per_dialogue


wv_model = word2vec.Word2Vec(size = 100, window = 5, min_count = 5, workers = 4)
wv_model = word2vec.Word2Vec.load(WV_PATH)

def numerize_sent(sent, len_sent):
    i = 0
    n_sent = []
    while(i < len_sent):
        if(sent[i] == '<pad>'):
            n_sent.append(np.zeros(100))
            
        elif(sent[i] == '<stop_tag>'):
            n_sent.append(np.ones(100))
        else:
            try:
                n_sent.append(wv_model.wv[sent[i]])
            except:
                n_sent.append(np.zeros(100))

        i = i + 1
    return n_sent

def batch_numerical(sent_set):
    numeric_batch = []#numerized batch
    i = 0
    while(i < len(sent_set) ): #BATCH_SIZE
        dial = []#numerized dialogue
        j = 0
        while(j < len(sent_set[i])): #per dialogue
            '''
            sent_set[i][j] ['Is' 'this' 'your' 'new' 'teacher' '?' '<pad>']
            sent_set[i][j] ['Yes' ',' 'it' 'is' '.' '<pad>' '<pad>']
            sent_set[i][j] ['Is' 'she' 'short' '?' '<pad>' '<pad>' '<pad>']
            sent_set[i][j] ['No' ',' 'she' '’' 's' 'average' '.']
            sent_set[i][j] ['What' 'color' 'are' 'her' 'eyes' '?' '<pad>']
            sent_set[i][j] ['They' '’' 're' 'dark' 'gray' '.' '<pad>']
            sent_set[i][j] ['What' 'color' 'is' 'her' 'hair' '?' '<pad>']
            sent_set[i][j] ['It' '’' 's' 'blond' '.' '<pad>' '<pad>']
            sent_set[i][j] ['And' 'how' 'old' 'is' 'she' '?' '<pad>']
            sent_set[i][j] ['I' 'don' '’' 't' 'know' '.' '<pad>']
            sent_set[i][j] ['<stop_tag>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
            '''
            dial.append(numerize_sent(sent_set[i][j], len(sent_set[i][j]))) #numerized_sentence
            j = j + 1
        numeric_batch.append(dial)
        i = i + 1
    #batch * sent_num * sent_leng * wv -> numeric_batch
    return numeric_batch

def make_batch2sent(new):
    for_sentmodel = []
    batchnum = 0
    while (batchnum < len(new)): #BATCH_SIZE
        for_sentmodel = for_sentmodel + new[batchnum]
        batchnum = batchnum + 1
    sentbatch_len = len(for_sentmodel)
    #batch * sent_num * sent_leng * wv -> all_sent_num * sent_leng * wv
    return sentbatch_len, for_sentmodel

def pad_dial(last_v):
    leng_set = []
    i = 0
    while(i < len(last_v)):#BATCH_SIZE
        leng_set.append(len(last_v[i]))#sentence num
        i = i + 1
    padded_dial = pad_sequence(last_v, batch_first = True)#append padtag vector
    #print('max_dialogue_length',len(padded_dial[0]))
    
    return padded_dial, leng_set

def sent_loader(sentence): #pre_process per sentence
    result = []
    for elem in sentence.split(' '):
        if elem != '':
            result = np.append(result, elem)
    return result,len(result)

def pad_cat_tag(emotion, act): 
    i = 0
    new_tag = []
    while(i < len(emotion)): #BATCH_SIZE
        emo, lenge = sent_loader(emotion[i][0])
        ac, lenga = sent_loader(act[i][0])
        j = 0
        inte = []
        inte.append(tag_to_ix['start_tag']) #append stop tag
        while(j < len(emo)): #sent length
            inte.append(int(emo[j]) * 4 + int(ac[j]))
            j = j + 1
        inte.append(tag_to_ix['stop_tag']) #append stop tag
        torch_inte = torch.tensor(inte)
        new_tag.append(torch_inte) #str to int
        i = i + 1

            
    padded_tag = pad_sequence(new_tag, batch_first = True, padding_value = tag_to_ix['pad_tag'])
    
    #emotion+action string -> emotion+action numb + padding
    #batch*tag
    return padded_tag

def make_mask(leng):
    '''
    make one-hot vector of mask from lengset
    '''

    var = np.zeros(shape = (len(leng), leng[0])) #len(leng) = BATCH_SIZE, leng[0]+1= largest dialogue + stop
    i = 0
    while(i < len(leng)):#BATCH_SIZE
        j = 0
        while(j < leng[0]): 
            if(j < leng[i]): # <= stop tag
                var[i][j] = 1
            j = j + 1
        
        i = i + 1

    return var

def log_sum_exp(x):
    max_score, _ = torch.max(x, -1)
    max_score_broadcast = max_score.unsqueeze(-1).expand_as(x)
    return max_score + torch.log(torch.sum(torch.exp(x - max_score_broadcast), -1))



# class for load data

In [4]:
class Example(object):
    @classmethod
    def fromdict(cls, data, fields):
        ex = cls()
        for key, vals in fields.items():
            if key not in data:
                raise ValueError("Specified key {} was not found in "
                                 "the input data".format(key))
            if vals is not None:
                if not isinstance(vals, list):
                    vals = [vals]
                for val in vals:
                    name, field = val
                    setattr(ex, name, field.preprocess(data[key]))
        return ex

In [5]:
class Dataset(torch.utils.data.Dataset):
    sort_key = None

    def __init__(self, examples, fields, filter_pred=None):
        self.examples = examples

        self.fields = dict(fields)

        # Unpack field tuples
        for n, f in list(self.fields.items()):
            if isinstance(n, tuple):
                self.fields.update(zip(n, f))
                del self.fields[n]
        self.pp = tuple(d for d in self.examples if d is not None)


    @classmethod
    def splits(cls, path=None, root='.data', train=None, **kwargs):

        train_data = cls(os.path.join(path, train), **kwargs)
        #print(train_data.examples) #여기엔 field example둘다 들어있음
        #print(tuple(d for d in train_data if d is not None)) #여기엔 example만 나열된 튜플이됨
        return tuple(d for d in train_data if d is not None)

    def split(self, split_ratio=0.7, stratified=False, strata_field='label',
              random_state=None):
        
        train_ratio, test_ratio, val_ratio = check_split_ratio(split_ratio)

        # For the permutations
        rnd = RandomShuffler(random_state)
        if not stratified:
            train_data, test_data, val_data = rationed_split(self.examples, train_ratio,
                                                             test_ratio, val_ratio, rnd)
        else:
            if strata_field not in self.fields:
                raise ValueError("Invalid field name for strata_field {}"
                                 .format(strata_field))
            strata = stratify(self.examples, strata_field)
            train_data, test_data, val_data = [], [], []
            for group in strata:
                # Stratify each group and add together the indices.
                group_train, group_test, group_val = rationed_split(group, train_ratio,
                                                                    test_ratio, val_ratio,
                                                                    rnd)
                train_data += group_train
                test_data += group_test
                val_data += group_val

        splits = tuple(Dataset(d, self.fields)
                       for d in (train_data, val_data, test_data) if d)

        # In case the parent sort key isn't none
        if self.sort_key:
            for subset in splits:
                subset.sort_key = self.sort_key
        return splits

    def __getitem__(self, i):
        return self.examples[i]

    def __len__(self):
        try:
            return len(self.examples)
        except TypeError:
            return 2 ** 32

    def __iter__(self):
        for x in self.examples:
            yield x

    def __getattr__(self, attr):
        if attr in self.fields:
            for x in self.examples:
                yield getattr(x, attr)

In [6]:
class my_TabularDataset(Dataset):

    def __init__(self, path,  fields,  **kwargs):


        with open(path, encoding="utf8") as f:
            for line in f:
                examples = [Example.fromdict(per_data, my_fields) for per_data in json.loads(line)]


        if isinstance(fields, dict):
            fields, field_dict = [], fields
            for field in field_dict.values():
                if isinstance(field, list):
                    fields.extend(field)
                else:
                    fields.append(field)


        super(my_TabularDataset, self).__init__(examples, fields, **kwargs)

# Model_1 (shared)

In [7]:
class makesent_gru(nn.Module):
    def __init__(self, hidden_size, bidirectional):
        super(makesent_gru, self).__init__()
        self.hidden_size = hidden_size
        if (bidirectional == True):
            self.bidirectional = 2
        else:
            self.bidirectional = 1
        self.gru = nn.GRU(100, 100, bidirectional=bidirectional)
        self.lastnet = nn.Linear(200, 100)
        
    def masking_f(self, new_sent, all_seq_len):
        remake = torch.transpose(new_sent, 0, 1)
        #remake [326, 7, 200]
        
        i = 0
        while(i < len(remake)):
            if i == 0:
                # all_seq_len[0]-1 -> 5
                tensor = remake[0][all_seq_len[0]-1].view(1,200)
            else:
                tensor = torch.cat((tensor, remake[i][all_seq_len[i]-1].view(1,200)), 0)
            i = i + 1
        return tensor
    
    def forward(self, char, h0, masking_v):
        #char 7,326,100 [6][0] = 0000....<pad>
        gru_out, h0 = self.gru(char, h0)
        #gru 7,326,200
        
        last_hidden_state = self.masking_f(gru_out, masking_v)
        #[326,200]
        
        last_w = self.lastnet(last_hidden_state)
        #[326,100]
        
        return last_w

    def initHidden(self):
        return torch.zeros(self.bidirectional, 1, self.hidden_size, device=device, requires_grad=False)

# Model_2 

## compare crf-gru with crf , bi-gru

In [8]:
class CRF_(nn.Module):
    def __init__(self, tag_to_ix, hidden_dim):
        super(BiGru_CRF, self).__init__()

        self.gru = nn.GRU(100, 100, bidirectional=True) # default requires_grad = true
        self.hidden2tag = nn.Linear(100, 31) # default requires_grad = true
        self.transitions = nn.Parameter(torch.randn(31, 31))# [a,b] trans from b to a,  requires_grad = true
        self.transitions.data[0, :] = -10000 #all to start
        self.transitions.data[:, 29] = -10000 #stop to all
        self.transitions.data[:, 30] = -10000 #pad to all
        self.transitions.data[30][30] = 0 #pad to pad
        self.transitions.data[29][30] = 0 #stop to pad
        
        self.hidden_dim = hidden_dim
        self.tag_to_ix = tag_to_ix
        

    def init_hidden(self,batch):
        return Variable(torch.zeros(2, batch, 100).cuda()) # default requires_grad = false 
    
    def _get_gru_features(self, batch, sentence_set):
        
        hidden = self.init_hidden(batch)
        #gru_out, hidden = self.gru(sentence_set, hidden)

        gru_feats = self.hidden2tag(sentence_set)

        return gru_feats

    def for_score(self, pre_mask, feats):
        score = Variable(torch.zeros((BATCH_SIZE, 31)).fill_(-10000.).cuda()) #default requires_grad = false 
        score[:, self.tag_to_ix['start_tag']] = 0. #start to all is 0
        
        mask = Variable(torch.Tensor(pre_mask).cuda()) # default requires_grad = false 
        
        for t in range(feats.size(0)):  # 안에서 연산하는데이터들은 batch*featuresize*featuresize
            
            mask_t = mask[:, t].unsqueeze(-1).expand_as(score) #batch_size -> batch_size*featuresize
            
            score_t = score.unsqueeze(1).expand(-1, *self.transitions.size()) #batch_size*f -> batch_size*f*f
            
            emit = feats[t].unsqueeze(-1).expand_as(score_t) #b*f-> b*f*f
            
            trans = self.transitions.unsqueeze(0).expand_as(score_t) #b*f*f
            
            score_t = log_sum_exp(score_t + emit + trans)
            
            score = score_t * mask_t + score * (1 - mask_t) #no updating in masked score,all b*f

        score = log_sum_exp(score)
        return score

    def cal_score(self, mask, feats, tag):
        score = Variable(torch.FloatTensor(BATCH_SIZE).fill_(0.).cuda()) # default requires_grad = false 
        
        temp_tag = Variable(tag.cuda()) # default requires_grad = false 
        mask_tensor = torch.transpose(torch.FloatTensor(mask), 0, 1) #seq*batch
        mask_tensor = Variable(mask_tensor.cuda()) # default requires_grad = false 
        
        
        for i, feat in enumerate(feats): #seq*batch*feat->batch*feat
            
            transit = torch.cat(
                [torch.tensor([self.transitions[temp_tag[batch][i + 1], temp_tag[batch][i]]]) for batch in range(BATCH_SIZE)])
            
            transit = transit.cuda()
            
            transit = transit * mask_tensor[i] #batch*batch->batch

            emit = torch.cat([feat[batch][temp_tag[batch][i + 1]].view(1, -1) for batch in range(BATCH_SIZE)]).squeeze(1)

            emit = emit * mask_tensor[i]#batch*batch->batch

            score = score + transit + emit

        return score

    def neg_log_likelihood(self, mask, sentence, tags, batch):

        feats = self._get_gru_features(batch, sentence)
        
        forward_score = self.for_score(mask, feats)

        gold_score = self.cal_score(mask, feats, tags)
        '''
        newt = self.transitions.data.cpu().numpy()
        newt[0,:] = 0
        newt[:,29] = 0
        newt[:,30] = 0
        x = np.tile(np.arange(1, 32), (31, 1))
        y = x.transpose()
        z = newt #for visdom
        print(z)
        
        
        x = np.tile(np.arange(1, 32), (31, 1))
        y = x.transpose()
        z = (x + y)/20
        
        # surface
        viz.surf(X=z, opts=dict(colormap='Hot'))
        '''
        return forward_score - gold_score

    def _viterbi_decode(self, mask, feats):
        backpointers = []

        # Initialize the viterbi variables in log space
        init_vvars = Variable(torch.full((1, 31), -10000.).cuda()) # default requires_grad = false 
        init_vvars[0][0] = 0

        # forward_var at step i holds the viterbi variables for step i-1
        forward_var = init_vvars

        for i, feat in enumerate(feats):
            if mask[i] == 0:
                print('breaked')
                break
            bptrs_t = []  # holds the backpointers for this step
            viterbivars_t = []  # holds the viterbi variables for this step

            for next_tag in range(31):
                next_tag_var = forward_var + self.transitions[next_tag]
                best_tag_id = argmax(next_tag_var)
                bptrs_t.append(best_tag_id)
                viterbivars_t.append(next_tag_var[0][best_tag_id].view(1))

            forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1)
            backpointers.append(bptrs_t)

        #terminal_var = forward_var  + self.transitions[self.tag_to_ix['stop_tag']]
        best_tag_id = argmax(forward_var) #not terminal_var
        path_score = forward_var[0][best_tag_id]

        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        # Pop off the start tag (we dont want to return that to the caller)
        start = best_path.pop()
        assert start == 0  # Sanity check
        best_path.reverse()
        return path_score, best_path

    def forward(self,batch, dummy_input, seq):  # dont confuse this with _forward_alg above.
        # Get the emission scores from the BiLSTM
        lstm_feats = self._get_gru_features(batch,dummy_input[1])
        
        lstm_feats = torch.transpose(lstm_feats,0,1)
        mask = dummy_input[0]
        #print(self.transitions)
        # Find the best path, given thue features.
        score, tag_seq = self._viterbi_decode(mask[seq], lstm_feats[seq])
        return score, tag_seq
    

In [9]:
class BiGru_(nn.Module):
    def __init__(self, tag_to_ix, hidden_dim):
        super(BiGru_CRF, self).__init__()

        self.gru = nn.GRU(100, 100, bidirectional=True) # default requires_grad = true
        self.hidden2tag = nn.Linear(200, 31) # default requires_grad = true
 

        self.tag_to_ix = tag_to_ix
        
    def _get_gru_features(self, batch, sentence_set):
        
        hidden = self.init_hidden(batch)
        gru_out, hidden = self.gru(sentence_set, hidden)

        gru_feats = self.hidden2tag(gru_out)

        return gru_feats


    def forward(self,batch, dummy_input, seq):  # dont confuse this with _forward_alg above.
        feats = self._get_gru_features(batch, sentence)
        
      
        return feats

In [28]:
class BiGru_CRF(nn.Module):
    def __init__(self, tag_to_ix, hidden_dim):
        super(BiGru_CRF, self).__init__()

        self.gru = nn.GRU(100, 100, bidirectional=True) # default requires_grad = true
        self.hidden2tag = nn.Linear(200, 31) # default requires_grad = true
        self.transitions = nn.Parameter(torch.randn(31, 31))# [a,b] trans from b to a,  requires_grad = true
        self.transitions.data[0, :] = -10000 #all to start
        self.transitions.data[:, 29] = -10000 #stop to all
        self.transitions.data[:, 30] = -10000 #pad to all
        self.transitions.data[30][30] = 0 #pad to pad
        self.transitions.data[29][30] = 0 #stop to pad
        
        self.hidden_dim = hidden_dim
        self.tag_to_ix = tag_to_ix
        

    def init_hidden(self,batch):
        return Variable(torch.zeros(2, batch, 100).cuda()) # default requires_grad = false 
    
    def _get_gru_features(self, batch, sentence_set):
        
        hidden = self.init_hidden(batch)
        gru_out, hidden = self.gru(sentence_set, hidden)

        gru_feats = self.hidden2tag(gru_out)

        return gru_feats

    def for_score(self, pre_mask, feats):
        score = Variable(torch.zeros((BATCH_SIZE, 31)).fill_(-10000.).cuda()) #default requires_grad = false 
        score[:, self.tag_to_ix['start_tag']] = 0. #start to all is 0
        
        mask = Variable(torch.Tensor(pre_mask).cuda()) # default requires_grad = false 
        
        for t in range(feats.size(0)):  # 안에서 연산하는데이터들은 batch*featuresize*featuresize
            
            mask_t = mask[:, t].unsqueeze(-1).expand_as(score) #batch_size -> batch_size*featuresize
            
            score_t = score.unsqueeze(1).expand(-1, *self.transitions.size()) #batch_size*f -> batch_size*f*f
            
            emit = feats[t].unsqueeze(-1).expand_as(score_t) #b*f-> b*f*f
            
            trans = self.transitions.unsqueeze(0).expand_as(score_t) #b*f*f
            
            score_t = log_sum_exp(score_t + emit + trans)
            
            score = score_t * mask_t + score * (1 - mask_t) #no updating in masked score,all b*f

        score = log_sum_exp(score)
        return score

    def cal_score(self, mask, feats, tag):
        score = Variable(torch.FloatTensor(BATCH_SIZE).fill_(0.).cuda()) # default requires_grad = false 
        
        temp_tag = Variable(tag.cuda()) # default requires_grad = false 
        mask_tensor = torch.transpose(torch.FloatTensor(mask), 0, 1) #seq*batch
        mask_tensor = Variable(mask_tensor.cuda()) # default requires_grad = false 
        
        
        for i, feat in enumerate(feats): #seq*batch*feat->batch*feat
            
            transit = torch.cat(
                [torch.tensor([self.transitions[temp_tag[batch][i + 1], temp_tag[batch][i]]]) for batch in range(BATCH_SIZE)])
            
            transit = transit.cuda()
            
            transit = transit * mask_tensor[i] #batch*batch->batch

            emit = torch.cat([feat[batch][temp_tag[batch][i + 1]].view(1, -1) for batch in range(BATCH_SIZE)]).squeeze(1)

            emit = emit * mask_tensor[i]#batch*batch->batch

            score = score + transit + emit

        return score

    def neg_log_likelihood(self, mask, sentence, tags, batch):

        feats = self._get_gru_features(batch, sentence)
        
        forward_score = self.for_score(mask, feats)

        gold_score = self.cal_score(mask, feats, tags)
        '''
        newt = self.transitions.data.cpu().numpy()
        newt[0,:] = 0
        newt[:,29] = 0
        newt[:,30] = 0
        x = np.tile(np.arange(1, 32), (31, 1))
        y = x.transpose()
        z = newt #for visdom
        print(z)
        
        
        x = np.tile(np.arange(1, 32), (31, 1))
        y = x.transpose()
        z = (x + y)/20
        
        # surface
        viz.surf(X=z, opts=dict(colormap='Hot'))
        '''
        return forward_score - gold_score

    def _viterbi_decode(self, mask, feats):
        backpointers = []

        # Initialize the viterbi variables in log space
        init_vvars = Variable(torch.full((1, 31), -10000.).cuda()) # default requires_grad = false 
        init_vvars[0][0] = 0

        # forward_var at step i holds the viterbi variables for step i-1
        forward_var = init_vvars

        for i, feat in enumerate(feats):
            if mask[i] == 0:
                #print('breaked')
                break
            bptrs_t = []  # holds the backpointers for this step
            viterbivars_t = []  # holds the viterbi variables for this step
            for next_tag in range(31):
                next_tag_var = forward_var + self.transitions[next_tag]
                best_tag_id = argmax(next_tag_var)
                bptrs_t.append(best_tag_id)
                viterbivars_t.append(next_tag_var[0][best_tag_id].view(1))
 
            forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1)
            backpointers.append(bptrs_t)

        #terminal_var = forward_var  + self.transitions[self.tag_to_ix['stop_tag']]
        best_tag_id = argmax(forward_var) #not terminal_var
        path_score = forward_var[0][best_tag_id]

        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        # Pop off the start tag (we dont want to return that to the caller)
        start = best_path.pop()
        assert start == 0  # Sanity check
        best_path.reverse()
        return path_score, best_path

    def forward(self,batch, dummy_input, seq):  # dont confuse this with _forward_alg above.
        # Get the emission scores from the BiLSTM
        lstm_feats = self._get_gru_features(batch,dummy_input[1])
        
        lstm_feats = torch.transpose(lstm_feats,0,1)
        mask = dummy_input[0]
        #print(self.transitions)
        # Find the best path, given thue features.
        score, tag_seq = self._viterbi_decode(mask[seq], lstm_feats[seq])
        return score, tag_seq
    

# preprocess function

In [11]:
def all_preprocess(sent, batch_data):

    
    #sorted with dialogue length
    #print(batch_data[0].Text)
    #######################################################
    emotion_set = []
    action_set = []
    i = 0
    while(i < len(batch_data)): #equals to BATCH_SIZE except last dataset
        emotion_set.append(batch_data[i].labels_1)
        
        action_set.append(batch_data[i].labels_2)
        
        i = i + 1
    
    new_tag = pad_cat_tag(emotion_set, action_set)
    '''
    new_tag representation 0 = start_tag, 29 = stop_tag, 30 = pad_tag
    
    tensor([  0,   2,   1,   2,   1,   2,   1,   2,   1,   2,   1,  29])
    tensor([  0,   2,   1,   2,   1,   2,   1,   2,   1,   2,   1,  29])
    tensor([  0,  21,   1,   1,   1,  29,  30,  30,  30,  30,  30,  30])
    tensor([  0,   2,   1,   2,   1,  29,  30,  30,  30,  30,  30,  30])
    tensor([  0,   1,   1,   1,   1,  29,  30,  30,  30,  30,  30,  30])
    tensor([  0,   2,   1,   2,   1,  29,  30,  30,  30,  30,  30,  30])
    tensor([  0,  17,  17,  17,  29,  30,  30,  30,  30,  30,  30,  30])
    tensor([  0,   1,   1,   1,  29,  30,  30,  30,  30,  30,  30,  30])
    tensor([  0,   3,   2,  29,  30,  30,  30,  30,  30,  30,  30,  30])
    tensor([  0,  17,  17,  29,  30,  30,  30,  30,  30,  30,  30,  30])
    tensor([  0,   2,   2,  29,  30,  30,  30,  30,  30,  30,  30,  30])
    '''
    #batch*tag
    new_tag = Variable(new_tag.cuda())# default requires_grad = false
    
    #####################################################tag_preprocess
    
    
    new, all_seq_len, sentnum_per_batch = pad_batch(batch_data)
    #batch * sentnum * (word_list+pad) -> new
    #batch * sentnum * (word_list_length) -> all_seq_len
    #batch * (sentnum) -> sentnum_per_batch
    '''
    new[0]
    [array(['Is', 'this', 'your', 'new', 'teacher', '?', '<pad>'], dtype='<U32'), 
      array(['Yes', ',', 'it', 'is', '.', '<pad>', '<pad>'], dtype='<U32'), 
      array(['Is', 'she', 'short', '?', '<pad>', '<pad>', '<pad>'], dtype='<U32'), 
      array(['No', ',', 'she', '’', 's', 'average', '.'], dtype='<U32'), 
      array(['What', 'color', 'are', 'her', 'eyes', '?', '<pad>'], dtype='<U32'), 
      array(['They', '’', 're', 'dark', 'gray', '.', '<pad>'], dtype='<U32'), 
      array(['What', 'color', 'is', 'her', 'hair', '?', '<pad>'], dtype='<U32'),
      array(['It', '’', 's', 'blond', '.', '<pad>', '<pad>'], dtype='<U32'), 
      array(['And', 'how', 'old', 'is', 'she', '?', '<pad>'], dtype='<U32'), 
      array(['I', 'don', '’', 't', 'know', '.', '<pad>'], dtype='<U32'), 
      array(['<stop_tag>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
      
    all_seq_len[0]
    [6, 5, 4, 7, 6, 6, 6, 5, 6, 6, 1, 
    6, 5, 4, 7, 6, 6, 6, 5, 6, 6, 1, 
    6, 5, 7, 2, 1, 
    5, 6, 5, 6, 1, 
    4, 4, 6, 4, 1, 
    6, 5, 5, 5, 1, 
    4, 4, 3, 1, 
    5, 5, 4, 1, 
    3, 3, 1, 
    3, 2, 1, 
    3, 2, 1, 
    3, 3, 1, 
    4, 2, 1, 
    4, 2, 1, 
    3, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    4, 3, 1, 
    3, 3, 1, 
    4, 2, 1, 
    3, 3, 1, 
    5, 4, 1, 
    4, 3, 1, 
    .
    .
    .
    sentnum_per_batch
    [11, 11, 5, 5, 5, 5, 4, 4, 3, 3, 3, 3, .....]
    
    '''
    
    
    new2 = batch_numerical(new)
    #batch * sent_num * sent_leng * wv -> new2

    
    sentbatch_len, for_sentmodel = make_batch2sent(new2)
    #batch * sent_num * sent_leng * wv -> all_sent_num(new_batch) * sent_leng * wv
    #for_sentmodel2 -> torch.Size([326, 7, 100])
    #sentbatch_len -> 326
    
    hidden_state = torch.tensor(np.zeros((2, sentbatch_len, 100)), dtype=torch.float, device= device, requires_grad=False)
    for_sentmodel2 = torch.tensor(np.transpose(for_sentmodel, [1, 0, 2]), dtype=torch.float, device= device, requires_grad=False)
    
    #for_sentmodel2 -> torch.Size([7, 326, 100])
    
    pre_crf_gru = sent(for_sentmodel2, hidden_state, all_seq_len) 
    '''
    print(pre_crf_gru[0])
    print(pre_crf_gru[1])
    print(pre_crf_gru[2])
    print(pre_crf_gru[3])
    print(pre_crf_gru[4])
    print(pre_crf_gru[5])
    print(pre_crf_gru[6])
    print(pre_crf_gru[7])
    print(pre_crf_gru[8])
    print(pre_crf_gru[9])
    '''
    #pre_crf_gru -> torch.Size([326, 100])
    
    #################################################sent network
    #all_sent_num(new_batch) * sent_leng * wv -> all_sent_num(new_batch) * wv
    
    
    
    last_var = torch.split(pre_crf_gru, sentnum_per_batch)
    #all_sent_num(new_batch) * wv -> batch * sent_num * wv
    '''
    last_var
    
    [11,100]
    [11,100]
    [5,100]
    [5,100]
    [5,100]
    .
    .
    '''
    new_dial, dial_leng = pad_dial(last_var)
    #batch * sent_num * wv -> batch * (sent_num + pad) * wv
    #save dial_leng for masking
    
    '''
    new_dial
    
    [11,100]
    [11,100]
    [11,100]
    [11,100]
    [11,100]
    .
    .
    '''
    
    
    
    new_dial = torch.transpose(new_dial, 0, 1)
    
    

    return new_dial, new_tag, dial_leng

# make model obj

In [29]:
sent_to_vextor_bigru_net = makesent_gru(100, True).cuda()
#we need convert batch*dialogue -> batch*(dialogue_length*sent_vec(float))

my_grucrf_model = BiGru_CRF(tag_to_ix,BATCH_SIZE).cuda()
#this convert batch*(dialogue_length*sent_vec(float)) -> batch*(dialogue_tag,score)

In [30]:

def load_models(path1,path2):
    sent_to_vextor_bigru_net.load_state_dict(torch.load(path1))
    my_grucrf_model.load_state_dict(torch.load(path2))
    return

load_models(working_path + '/parameter/shared.pth',working_path+'/parameter/crf_gru.pth')