In [1]:
import tensorflow as tf
from tensorflow.keras import Input, backend as K
from tensorflow.keras.preprocessing import image 
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.applications import InceptionV3,VGG16
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Bidirectional, Dot, Concatenate, Lambda, Attention, Conv2D, Embedding, BatchNormalization, MaxPool2D, GlobalMaxPool2D, Dropout, TimeDistributed, Dense, LSTM, GRU, Flatten, RepeatVector
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.sequence import pad_sequences
from IPython.display import Video, HTML
import time
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from collections import deque
import copy
from PIL import Image
from scipy import spatial
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from collections import deque
import copy
import pickle
import unicodedata
import re
import numpy as np
import os
import io
import time
import pandas as pd
import cv2
from tqdm import tqdm,trange
import shutil
import csv
from nltk.translate.bleu_score import corpus_bleu
import json
import math
import random
tf.keras.backend.set_floatx('float64')

In [2]:
class Option():
    def __init__(self,model_version):
        self.model_version=model_version
        self.model_path=str(self.model_version).zfill(3)+'_model'
        self.checkpoints_path=os.path.join(self.model_path,'checkpoints/')
        self.history_path=os.path.join(self.model_path,'history.json')
        if not os.path.exists(self.model_path):os.mkdir(self.model_path)
        if not os.path.exists(self.checkpoints_path):os.mkdir(self.checkpoints_path)

        self.encoder_type='LSTM'    #EDIT
        self.encoder_units=256      #EDIT
        self.decoder_type='LSTM'    #EDIT
        self.decoder_units=2 * self.encoder_units
        self.layer_count=1          #EDIT
        self.beam_width=1
        self.batch_size=10          #EDIT
        self.attention=True
        self.max_len_target=15
        self.temporal_length=16
        self.embed_path='glove.6B.100d.txt'
        self.embed_out=100
        self.caption_path='MSVD_captions.csv'
        self.num_words=8000         #EDIT
        self.seq_join_out=15
        self.tokenizer=None
        self.embedding=None
        self.model=None
        self.name=None
        self.novel_model=True
        
        self.save_option()
        
    def get_tokenizer(self):
        if self.tokenizer == None:
            self.tokenizer = TokenizerWrap(self)
        return self.tokenizer
    def get_embedding(self):
        if self.embedding == None:
            self.embedding = M_Embedding(self)
        return self.embedding
    def get_model(self):
        if self.model == None:
            self.model = self.new_model()
        return self.model
    def new_model(self):
        if not self.novel_model: self.model = M_Model(self)
        else: self.model = M_Novel_Model(self)
        self.name = self.model.name
        self.save_update()
        return self.model
    def save_option(self):
        json.dump(self.__dict__,open(os.path.join(self.model_path,'info.json'),'w'))
    def save_update(self):
        info = json.load(open(os.path.join(self.model_path,'info.json'),'r'))
        info.update({'name':self.name})
        json.dump(info,open(os.path.join(self.model_path,'info.json'),'w'))

In [3]:
opt = Option(3)

In [4]:
class TokenizerWrap(Tokenizer):
    def __init__(self, options):
        Tokenizer.__init__(self, num_words=options.num_words)
        self.mark_start = 'ssss '
        self.mark_end = ' eeee'
        self.pad = ' pppp'
        self.temporal_length = options.temporal_length
        self.mode_dict = {0:'validation',1:'test',2:'train'}
        
#         self.caption_dictionary = self.get_caption_dict(options.caption_path)
        self.caption_dictionary = self.get_full_caption_dict(options.caption_path)
        self.texts = self.create_tokenizer(self.caption_dictionary)
        self.fit_on_texts(self.texts)
        
        self.index_to_word = dict(zip(self.word_index.values(), self.word_index.keys()))
        self.word_to_index = dict(zip(self.word_index.keys(), self.word_index.values()))
    
    def word_to_token(self, token):
        token = 0 if word not in word_to_index else self.word_to_index[word]
        return token
    
    def token_to_word(self, token):
        word = " " if token == 0 else self.index_to_word[token]
        return word

    def tokens_to_string(self, tokens):
        words = [self.index_to_word[token]
                 for token in tokens
                 if token != 0]
        text = " ".join(words)
        return text
    
    def captions_to_tokens(self, captions_list):
        tokens = self.texts_to_sequences(captions_list)
        tokens = pad_sequences(tokens, maxlen=self.temporal_length, padding='post', truncating='post')
        y_in = tokens[:, 0:-1]
        y_out = tokens[:, 1:]
        return y_in, y_out
        
    def unicode_to_ascii(self, s):
        return ''.join(c for c in unicodedata.normalize('NFD', s)
            if unicodedata.category(c) != 'Mn')

    def preprocess_sentence(self, w, start_end = True):
        w = self.unicode_to_ascii(w.lower().strip())
        w = re.sub(r"([?.!,Ã‚Â¿])", r" \1 ", w)
        w = re.sub(r'[" "]+', " ", w)
        w = re.sub(r"[^a-zA-Z?.!,Ã‚Â¿]+", " ", w)
        w = w.strip()
        if start_end:
            w = self.mark_start + w + self.mark_end
            for i in range(12):
                w = w+self.pad
        return w

    def mark_captions(self, captions_list):
        captions_marked = [self.preprocess_sentence(caption)
                            for caption in captions_list]
        return captions_marked
    
    def get_full_caption_dict(self, path):
        df = pd.read_csv(path,encoding='utf-8')
        df = df.iloc[:,1::]
        df = df.values.tolist()
        caption_dictionary = {}
        for data in df:
            count = 0
            for caption in data[1:]:
                if str(caption)=='nan':
                    break
                else:
                    count+=1
            caption_dictionary.update({data[0]:self.mark_captions(data[1:count])})
        return caption_dictionary
    
    def get_caption_dict(self,path):
        captions = pd.read_csv(path)
        parents = captions['FileName']
        caption_list = captions[['0','1','2','3','4']].values
        caption_dict = dict()
        for i in range(len(parents)):
            caption_dict.update({parents[i]:self.mark_captions(caption_list[i])})
        return caption_dict
    
    def create_tokenizer(self,caption_dictionary):
        cap_list = []
        for parent,rows in caption_dictionary.items():
            for row in rows:
                cap_list.append(row)
        return cap_list
    def clean_cap(self, captions):
        clean_captions = []
        for caption in captions:
            caption = caption.split(' ')
            clean_caption = []
            for word in caption:
                if word not in ['eeee','pppp','ssss','.']:
                    clean_caption.append(word)
            clean_captions.append(clean_caption)
        return clean_captions

    def get_data_list(self,path):
        return list(pd.read_csv(path)['0'])

    def get_parent(self,path):
        return '_'.join(path.split('_')[1:4])

    def data_generator(self,mode=0, data_size=1000000):
        assert mode in [0,1,2],"Invalid mode"
        mode = self.mode_dict[mode]
        data_dir = 'data_pickle'

        data_path = mode+'.csv'
        data_list = get_data_list(os.path.join(data_dir,data_path))
        data_len = min(data_size,len(data_list))
    #     print("Working with ",data_len," data items out of ",len(data_list))
        caption_dict = self.caption_dictionary

        for data in data_list[:data_len]:
            parent = get_parent(data)
            y = random.choice(caption_dict[parent])
            y_in, y_out = self.captions_to_tokens([y])
            with open(os.path.join(data_dir,mode,data),'rb') as f:
                X = pickle.load(f)
            yield tf.convert_to_tensor(X,dtype=tf.float64), tf.convert_to_tensor(y_in[0],dtype=tf.int64), tf.convert_to_tensor(y_out[0],dtype=tf.int64), tf.convert_to_tensor(parent,dtype=tf.string)

tokenizer = opt.get_tokenizer()


In [5]:

class M_Encoder(tf.keras.Model):
    def __init__(self,option):
        name = '_'.join(['encoder',option.encoder_type,str(option.encoder_units)])
        super(M_Encoder, self).__init__(name=name)
        self.option=option
        assert option.encoder_units>0 and type(option.encoder_units)==int,"Encoder type must be positive integer"
        
        self.timeDistDense = TimeDistributed(Dense(option.encoder_units, activation = 'relu',name='dense'), name='time_distributed_1')
        
        self.encoder = []
        for i in range(option.layer_count):
            if option.encoder_type=='GRU':
                self.encoder.append(Bidirectional(GRU(option.encoder_units, return_state=False, return_sequences = True, dropout = .5, name='en_gru'+str(i).zfill(2)),
                                        name='encoder'+str(i).zfill(2)))
            elif option.encoder_type=='LSTM':
                self.encoder.append(Bidirectional(LSTM(option.encoder_units, return_state=False, return_sequences = True, dropout = .5, name='en_lstm'+str(i).zfill(2)),
                                         name='encoder'+str(i).zfill(2)))
            else:
                assert False,"invalid encoder type"
            
        if option.seq_join_out>0:
            self.timeDistDense2 = TimeDistributed(Dense(option.seq_join_out, activation = 'relu',name='dense'), name='time_distributed_2')            
            
        self.build((None, option.max_len_target, 4096))
    
    def join_seq(self, x):
        x = tf.reshape(x, (-1,self.option.seq_join_out*self.option.max_len_target))
        return x
    
    def call(self,inputs):
        x_time = self.timeDistDense(inputs)
        x_en = x_time
        x_en_list = []
        for i in range(self.option.layer_count):
            x_en = self.encoder[i](x_en)
            x_en_list.append(x_en)
        if self.option.seq_join_out>0:
            x_time = self.timeDistDense2(x_time)
            x_time = self.join_seq(x_time)
            x_time = tf.expand_dims(x_time,axis=-2)
        return x_en, x_time, x_en_list

class M_JoinSeq(tf.keras.Model):
    def __init__(self,option):
        name = '_'.join(['joinseq',str(option.decoder_units)])
        super(M_JoinSeq, self).__init__(name=name)
        self.join_concat = Concatenate(axis=2, name='join_concat')
        self.join_dense = Dense(2*option.encoder_units+option.embed_out, activation='relu', name='join_dense')

    def call(self,inputs):
        x = self.join_concat(inputs)
        x = self.join_dense(x)
        return x
    
class M_Embedding(tf.keras.Model):
    def __init__(self,option):
        name = '_'.join(['embedding',str(option.num_words),str(option.embed_out)])
        super(M_Embedding, self).__init__(name=name)
        self.word2idx = option.get_tokenizer().word_index
        embeddings_index = self.embeddings_index_creator(option.embed_path)
        self.embed_in = len(self.word2idx) + 1
        self.embedding_matrix = self.embedding_matrix_creator(embeddings_index, word_index=self.word2idx,embedding_out=option.embed_out)
        self.embedding = Embedding(self.embed_in, option.embed_out, name='embedding', trainable = False)
        self.build((None,))
        self.set_weights([self.embedding_matrix])
        print('Embedding Layer Created')
        
    def embeddings_index_creator(self, embed_path):
        embeddings_index = {}
        with open(embed_path, encoding='utf-8') as f:
            for line in tqdm(f,file=None):
                values = line.split()
                word = values[0]
                try:
                    coefs = np.asarray(values[1:], dtype='float32')
                    embeddings_index[word] = coefs
                except:
                    pass
            f.close()
        return embeddings_index
    
    def embedding_matrix_creator(self, embeddings_index, word_index, embedding_out):
        embedding_matrix = np.zeros((len(word_index) + 1, embedding_out))
        for word, i in word_index.items():
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
        return embedding_matrix
    
    def call(self,inputs):
        x = self.embedding(inputs)
        return x

class M_Decoder(tf.keras.Model):
    def __init__(self,option):
        name = '_'.join(['decoder',option.decoder_type,str(option.decoder_units)])
        super(M_Decoder, self).__init__(name=name)
        
        assert option.decoder_units>0 and type(option.decoder_units)==int,"Encoder type must be positive integer"
        
        if option.decoder_type=='GRU':
            self.decoder = GRU(option.decoder_units, return_state = True, return_sequences = True, dropout = .5,name='decoder')
        elif option.decoder_type=='LSTM':
            self.decoder = LSTM(option.decoder_units, return_state = True, return_sequences = True, dropout = .5,name='decoder')
        else:
            assert False,"invalid encoder type"
        
        self.decoder_dense = Dense(option.num_words, activation = 'sigmoid', name='decoder_dense')
        
        self.build((None, 1, 2*option.encoder_units+option.embed_out))
    
    def call(self,inputs,initial_state=None):
        x = self.decoder(inputs,initial_state=initial_state)
        if type(x)==list:
            out = self.decoder_dense(x[0])
            return out, x[1:]
        else:
            out = self.decoder_dense(x)
            return x
    
class M_Attention(tf.keras.Model):
    def __init__(self,option):
        name = '_'.join(['attention'])
        super(M_Attention,self).__init__(name=name)
        self.attn_dense0 = Dense(2*option.encoder_units,activation='tanh',name='attn_dense0')
        self.attn_repeat_layer = RepeatVector(option.max_len_target,name='repeat_vector')
        self.attn_concat_layer = Concatenate(axis=-1,name='attn_concat')
        self.attn_dense1 = Dense(10, activation='tanh', name='attn_dense1')
        self.attn_dense2 = Dense(1, activation=self.softmax_over_time, name='attn_dense2')
        self.attn_dot = Dot(axes=1,name='attn_dot')
        
        self.context_last_word_concat_layer = Concatenate(axis=2,name='concat_last_word')
    
        self.build([(None,option.max_len_target,2*option.encoder_units),(None,1,option.embed_out),(1,option.decoder_units,)])

    def softmax_over_time(self,x):
        assert (K.ndim(x)>2),"x dims too small"
        e = K.exp(x - K.max(x,axis = 1, keepdims = True))
        s = K.sum(e, axis = 1, keepdims = True)
        return e/s

    def one_step_attention(self, h,st_1):
        st_1 = self.attn_repeat_layer(st_1)
        x = self.attn_concat_layer([h,st_1])
        x = self.attn_dense1(x)
        x = self.attn_dense2(x)
        context = self.attn_dot([x,h])
        return context
    
    def call(self, inputs):
        en_output, xt, s = inputs
        s = self.attn_dense0(s)
        context = self.one_step_attention(en_output,s)
        print(context)
        decoder_input = self.context_last_word_concat_layer([context,xt])
        return decoder_input

In [15]:
# encoder = M_Encoder(opt)
# out = encoder(np.random.rand(10,15,4096))
# print(out[0].shape)
# # encoder.summary()

# attention = M_Attention(opt)
# out = attention([np.random.rand(10,15,512),np.random.rand(10,1,100),np.random.rand(10,512)])
# print(out.shape)

decoder = M_Decoder(opt)
# decoder.summary()
out = decoder.call(np.random.rand(10,1,612),initial_state=[])
# print(out[0].shape)

In [20]:
decoder.layers[0]._layers 

[<tensorflow.python.keras.layers.recurrent.LSTMCell at 0x7f4d8d1626a0>,
 ListWrapper([])]

In [8]:
class M_Model(tf.keras.Model):
    def __init__(self,option):
        name = self.get_model_name(option)
        super(M_Model, self).__init__(name=name)
        assert option.layer_count > 0 and type(option.layer_count)==int,'Layer_count must be positive'
        assert option.attention==True,'Code for no attention is not yet available'

        self.option = option
    
        self.word2idx = option.get_tokenizer().word_index
        self.idx2word = { v: k for k, v in self.word2idx.items()}

        self.eos = self.word2idx['eeee']

        self.encoder = M_Encoder(option)

        self.embedding = option.get_embedding()

        self.decoder = M_Decoder(option)

        self.attention = [M_Attention(option)] if option.attention else None
        
        self.join_seq = M_JoinSeq(option) if option.seq_join_out else None 

        self.stacker = Lambda(self.stack_and_transpose,name='stacker')

        self.argmax = Lambda(self.arg_max_func, name='argmax')
        self.flatten = Flatten(name='flatten')
        
        self.hist = None
        
    def build_model(self):
        build_input_shape = []
        build_input_shape.append((None, self.option.max_len_target,4096))
        build_input_shape.append((None, 1))
        if self.attention:
            build_input_shape.append((None, self.option.decoder_units))
            build_input_shape.append((None, self.option.decoder_units))
        self.build(input_shape=build_input_shape)
    
    def arg_max_func(self, x):
        x = tf.math.argmax(x,axis=-1)
        x = tf.expand_dims(x,-1)
        # x = tf.expand_dims(x,-1)
        return x

    def stack_and_transpose(self,x):
        x = K.stack(x)
        x = K.permute_dimensions(x, pattern=(1,0,3,2))
        x = tf.squeeze(x,axis=-1)
        return x

    def get_model_name(self,option):
        model_name='_'.join(['en',option.encoder_type,'de',option.decoder_type,'layers',str(option.layer_count)])
        if option.attention:
            model_name=model_name+'_withAttention'
        return model_name
    
    def decode_sequence(self, dataset, start=0, length=1, log=False, return_parents=False, save=True):
        out_paragraph = []
        BLEU_1 = 0
        BLEU_2 = 0
        BLEU_3 = 0
        BLEU_4 = 0
        original = []
        parents = []
        for i in range(0, length):
            input_seq,y_in,y_out,parent = dataset[start+i]
            parent = parent.decode()
            parents.append(parent)
            input_data = []
            input_data.append(tf.expand_dims(input_seq,axis = 0))
            target_seq = np.zeros((1,1))
            target_seq[0,0] = self.word2idx['ssss']
            input_data.append(target_seq)
            if self.attention:
                s = np.zeros((1,self.option.decoder_units))
                input_data.append(s)
                c = np.zeros((1,self.option.decoder_units))
                input_data.append(c)

            outputs = self.predict(input_data)
            output_seq = []
            for out in outputs[0]:
                idx = self.argmax(out).numpy()[0]
                if self.eos == idx or self.word2idx['pppp']==idx:
                    break
                word = ' '
                if idx>0:
                    word = self.idx2word[idx]
                    output_seq.append(word)
            sentence = ' '.join(output_seq)
            out_paragraph.append(sentence)
            references = self.option.get_tokenizer().clean_cap(self.option.get_tokenizer().caption_dictionary[parent])
            BLEU_1+=corpus_bleu([references], [output_seq], weights=(1  ,  0,  0,  0))*100.00
            BLEU_2+=corpus_bleu([references], [output_seq], weights=(1/2,1/2,  0,  0))*100.00
            BLEU_3+=corpus_bleu([references], [output_seq], weights=(1/3,1/3,1/3,  0))*100.00
            BLEU_4+=corpus_bleu([references], [output_seq], weights=(1/4,1/4,1/4,1/4))*100.00
            if log:
                print([references], [output_seq])
            sentence = ' '.join(references[0])
            original.append(sentence)
        scores = {'BLEU_1':BLEU_1/max(length,1),'BLEU_2':BLEU_2/max(length,1),'BLEU_3':BLEU_3/max(length,1),'BLEU_4':BLEU_4/max(length,1)}
        if save:
            decoded_dict = {}
            for i in range(len(out_paragraph)):
                decoded_dict.update({i:{'pred':out_paragraph[i],'real':original[i],'parent':parents[i]}})
            try:
                json.dump(decoded_dict,open(os.path.join(self.option.model_path, str(self.hist.epoch[-1]+1).zfill(2)+'_sample.json'),'w'))
            except:
                json.dump(decoded_dict,open(os.path.join(self.option.model_path, '00_sample.json'),'w'))
        if return_parents:
            return out_paragraph, scores, original, parents
        return out_paragraph, scores, original
    
    def custom_fit(self, dataset, val_data=None, epochs=1, reset=False):
        save_path=self.option.checkpoints_path
        if epochs:
            total_batches = sum(1 for _ in dataset.padded_batch(self.option.batch_size).as_numpy_iterator())
            print('Total Batches:',total_batches)
        if os.path.exists(self.option.history_path) and not reset:
            self.hist = tf.keras.callbacks.History()
            self.load_weights(save_path)
            self.hist.set_model(self)
            self.hist.on_train_begin()
            self.hist.history = json.load(open(self.option.history_path, 'r'))
            self.hist.epoch = self.hist.history['epoch']
            curr_epoch = self.hist.epoch[-1]+1
            print("Starting training from ",curr_epoch," epochs")
        else:
            self.hist = tf.keras.callbacks.History()
            self.hist.set_model(self)
            self.hist.on_train_begin()
            curr_epoch = 0
            self.save_weights(save_path)
            print("Checkpoint Initialized")
        for epoch in range(curr_epoch,epochs+curr_epoch):
            score=0
            loss = 0
            accuracy = 0
            batch_count = 0
            data = dataset.shuffle(900, reshuffle_each_iteration=True)
            data = data.padded_batch(self.option.batch_size).as_numpy_iterator()
            for i in trange(total_batches):
                element = next(data)
                X, y_in, y_out,parent = element
                BATCH_SIZE = X.shape[0]
                input_list = []
                input_list.append(X)
                input_list.append(y_in)
                if self.option.attention:
                    z1 = np.zeros((BATCH_SIZE,self.option.decoder_units))
                    input_list.append(z1)
                    z2 = np.zeros((BATCH_SIZE,self.option.decoder_units))
                    input_list.append(z2)
                loss_t, accuracy_t = self.train_on_batch(
                    input_list, 
                    tf.keras.utils.to_categorical(y_out, num_classes = self.option.num_words)
                )
                if not np.any(np.isnan(loss_t)): 
                    loss+=loss_t
                    accuracy+=accuracy_t
                    batch_count+=1
            if not val_data==None:
                _,score,_ = self.decode_sequence(val_data,start=0,length=5+epoch*10)
            if batch_count:
                loss=loss/batch_count
                accuracy=accuracy/batch_count
                print("Batches in epoch ", batch_count)
                self.hist.on_epoch_end(epoch=epoch,logs={'loss':loss,'accuracy':accuracy,'bleu':score,'epoch':epoch})
                print('Epoch:',epoch,' loss:',loss,' acc:',accuracy,' bleu:',score)
                self.save_weights(save_path)
                json.dump(self.hist.history, open(self.option.history_path, 'w'))
        return self.hist
        
    def call(self, inputs, training=False):
        encoder_inputs, decoder_inputs, init_s, init_c = inputs
        state = [init_s]
        if self.option.decoder_type=='LSTM': state.append(init_c)

        en_output, en_time, en_output_list = self.encoder(encoder_inputs)
        outputs = []
        
        if not training: target_seq = decoder_inputs

        for t in range(self.option.max_len_target):
            if training:
                selector = Lambda(lambda x,t: x[:, t:t+1],arguments={'t':t},name='lambda'+str(t))
                target_seq = selector(decoder_inputs,t)   

            xt = self.embedding(target_seq)
            decoder_input = self.attention[0]([en_output,xt,state[0]])
            if self.join_seq: decoder_input = self.join_seq([decoder_input,en_time])
            out, state = self.decoder(decoder_input, initial_state=state)
            outputs.append(out)

            if not training:
                flat = self.flatten(out)
                idx = self.argmax(flat)
                target_seq = idx
        outputs = self.stacker(outputs)
        return outputs

In [9]:
class M_Novel_Model(M_Model):
    def __init__(self,option):
        super(M_Novel_Model, self).__init__(option=option)
        self.attention = []
        for layer in range(option.layer_count):
            self.attention.append(M_Attention(option))
        if option.layer_count>1: 
            self.enc_stack = Concatenate(-1,name='enc_stacker')
            self.stacked_dense = Dense(2*self.option.encoder_units, activation='relu', name='stacked_dense')

    def call(self, inputs, training=False):
        encoder_inputs, decoder_inputs, init_s, init_c = inputs
        state = [init_s]
        if self.option.decoder_type=='LSTM': state.append(init_c)

        en_output, en_time, en_output_list = self.encoder(encoder_inputs)
        outputs = []
        
        if not training: target_seq = decoder_inputs

        for t in range(self.option.max_len_target):
            if training:
                selector = Lambda(lambda x,t: x[:, t:t+1],arguments={'t':t},name='lambda'+str(t))
                target_seq = selector(decoder_inputs,t)   

            xt = self.embedding(target_seq)
            encoder_out = []
            for layer in range(len(en_output_list)):
                encoder_out.append(self.attention[layer]([en_output_list[layer],xt,state[0]]))
            if len(encoder_out)>1: 
                decoder_input = self.enc_stack(encoder_out)
                decoder_input = self.stacked_dense(decoder_input)
                
            else: decoder_input = encoder_out[0]
            
            if self.join_seq: decoder_input = self.join_seq([decoder_input,en_time])
            out, state = self.decoder(decoder_input, initial_state=state)
            outputs.append(out)

            if not training:
                flat = self.flatten(out)
                idx = self.argmax(flat)
                target_seq = idx
        outputs = self.stacker(outputs)
        return outputs

In [10]:
model = M_Novel_Model(opt)
model.build_model()
# input_list = [np.random.rand(1,opt.max_len_target,4096),np.zeros((1,1)),np.random.rand(1,opt.decoder_units),np.random.rand(1,opt.decoder_units)]
# out = model(input_list)
# print(out.shape)
# input_list = [np.random.rand(opt.batch_size,opt.max_len_target,4096),np.zeros((opt.batch_size,opt.max_len_target)),np.random.rand(opt.batch_size,opt.decoder_units),np.random.rand(opt.batch_size,opt.decoder_units)]
# out = model(input_list,training=True)
# print(out.shape)
model.summary()

400000it [00:14, 28372.71it/s]


Embedding Layer Created
Tensor("attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_1/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_2/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_3/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_4/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_5/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_6/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_7/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_8/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_9/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)
Tensor("attention_10/attn_dot/MatMul:0", shape=(None, 1, 512), dtype=float64)


In [11]:
train_dataset = tf.data.Dataset.from_generator(opt.get_tokenizer().data_generator,(tf.float64,tf.int64,tf.int64,tf.string),((15,4096),(15),(15),()),args=[2,50])
train_dataset = train_dataset.shuffle(900, reshuffle_each_iteration=True)

In [117]:
# train_dataset = list(train_dataset.padded_batch(opt.batch_size).as_numpy_iterator())

In [13]:
validation_dataset = tf.data.Dataset.from_generator(opt.get_tokenizer().data_generator,(tf.float64,tf.int64,tf.int64,tf.string),((15,4096),(15),(15),()),args=[0])
validation_dataset = validation_dataset.shuffle(900, reshuffle_each_iteration=True)
validation_dataset = list(validation_dataset.as_numpy_iterator())

UnknownError: NameError: name 'get_data_list' is not defined
Traceback (most recent call last):

  File "/home/mushfiqur11/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 243, in __call__
    ret = func(*args)

  File "/home/mushfiqur11/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 309, in wrapper
    return func(*args, **kwargs)

  File "/home/mushfiqur11/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 785, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "<ipython-input-5-17baf499fe40>", line 114, in data_generator
    data_list = get_data_list(os.path.join(data_dir,data_path))

NameError: name 'get_data_list' is not defined


	 [[{{node PyFunc}}]]

In [119]:
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
             loss = tf.keras.losses.CategoricalCrossentropy(),
             metrics = ['accuracy']
             )

In [120]:
history = model.custom_fit(train_dataset, reset=True, val_data=validation_dataset, epochs=2)

  0%|          | 0/5 [00:00<?, ?it/s]

Total Batches: 5
Checkpoint Initialized


100%|██████████| 5/5 [00:42<00:00,  8.43s/it]


Batches in epoch  5
Epoch: 0  loss: 8.634870338439942  acc: 0.2533333333333333  bleu: {'BLEU_1': 0.0, 'BLEU_2': 0.0, 'BLEU_3': 0.0, 'BLEU_4': 0.0}


100%|██████████| 5/5 [00:03<00:00,  1.27it/s]


Batches in epoch  5
Epoch: 1  loss: 8.430844497680663  acc: 0.47733333333333333  bleu: {'BLEU_1': 0.0, 'BLEU_2': 0.0, 'BLEU_3': 0.0, 'BLEU_4': 0.0}


In [20]:
decoded_outputs = model.decode_sequence(validation_dataset,length=20,return_parents=True)

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [86]:
a,b,c,d = decoded_outputs
decoded_dict = {}
for i in range(len(a)):
    decoded_dict.update({i:{'pred':a[i],'real':c[i],'parent':d[i]}})
json.dump(decoded_dict,open('output_sample.json','w'))

In [43]:
opt.get_tokenizer().data_generator

<bound method TokenizerWrap.data_generator of <__main__.TokenizerWrap object at 0x7eff56263690>>

In [44]:
data_generator

<function __main__.data_generator(mode=0, data_size=1000000)>

In [103]:
model.hist.epoch

[]

In [152]:
model.name

'en_LSTM_de_LSTM_layers_2_withAttention'

In [9]:
ref = opt.get_tokenizer().caption_dictionary

In [48]:
references = opt.get_tokenizer().clean_cap(ref["3opDcpPxllE_50_66"])
output_seq = ["a man is driving a car".split(" ")]

In [49]:
print('b1',corpus_bleu([references], output_seq, weights=(1,0,0,0))*100.00)
print('b2',corpus_bleu([references], output_seq, weights=(1/2,1/2,0,0))*100.00)
print('b3',corpus_bleu([references], output_seq, weights=(1/3,1/3,1/3,0))*100.00)
print('b4',corpus_bleu([references], output_seq, weights=(1/4,1/4,1/4,1/4))*100.00)

b1 100.0
b2 100.0
b3 100.0
b4 90.36020036098448


In [39]:
references[0]

['a', 'powdered', 'substance', 'is', 'being', 'shifted', 'into', 'a', 'pan']