In [28]:
import tensorflow as tf
import numpy as np
import os
import json
from time import time
tf.keras.backend.set_floatx('float64')

In [2]:
from models_and_utils.utils import TokenizerWrap
from models_and_utils.models import M_Model, M_Embedding, M_Novel_Model

In [22]:
class Option():
    def __init__(self,model_version):
        self.model_version=model_version
        self.model_path=str(self.model_version).zfill(3)+'_model'
        self.checkpoints_path=os.path.join(self.model_path,'checkpoints/')
        self.history_path=os.path.join(self.model_path,'history.json')
        if not os.path.exists(self.model_path):os.mkdir(self.model_path)
        if not os.path.exists(self.checkpoints_path):os.mkdir(self.checkpoints_path)

        self.encoder_type='LSTM'    #EDIT
        self.encoder_units=256      #EDIT
        self.decoder_type='LSTM'    #EDIT
        self.decoder_units=2 * self.encoder_units
        self.layer_count=2          #EDIT
        self.beam_width=1
        self.batch_size=10          #EDIT
        self.attention=True
        self.max_len_target=15
        self.temporal_length=16
        self.embed_path='glove.6B.100d.txt'
        self.embed_out=100
        self.caption_path='MSVD_captions.csv'
        self.num_words=8000         #EDIT
        self.seq_join_out=15
        self.tokenizer=None
        self.embedding=None
        self.model=None
        self.name=None
        self.novel_model=False
        
        self.save_option()
        
    def get_tokenizer(self):
        if self.tokenizer == None:
            self.tokenizer = TokenizerWrap(self)
        return self.tokenizer
    def get_embedding(self):
        if self.embedding == None:
            self.embedding = M_Embedding(self)
        return self.embedding
    def get_model(self):
        if self.model == None:
            self.model = self.new_model()
        return self.model
    def new_model(self):
        if not self.novel_model: self.model = M_Model(self)
        else: self.model = M_Novel_Model(self)
        self.name = self.model.name
        self.save_update()
        return self.model
    def save_option(self):
        json.dump(self.__dict__,open(os.path.join(self.model_path,'info.json'),'w'))
    def save_update(self):
        info = json.load(open(os.path.join(self.model_path,'info.json'),'r'))
        info.update({'name':self.name})
        json.dump(info,open(os.path.join(self.model_path,'info.json'),'w'))

In [23]:
opt = Option(3)

In [24]:
model = opt.get_model()
model.build_model()
input_list = [np.random.rand(1,opt.max_len_target,4096),np.zeros((1,1)),np.random.rand(1,opt.decoder_units),np.random.rand(1,opt.decoder_units)]
out = model(input_list)
print(out.shape)
input_list = [np.random.rand(opt.batch_size,opt.max_len_target,4096),np.zeros((opt.batch_size,opt.max_len_target)),np.random.rand(opt.batch_size,opt.decoder_units),np.random.rand(opt.batch_size,opt.decoder_units)]
out = model(input_list,training=True)
print(out.shape)
model.summary()

400000it [00:12, 31027.44it/s]


Embedding Layer Created
(1, 15, 8000)
(10, 15, 8000)
Model: "en_LSTM_de_LSTM_layers_2_withAttention"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_LSTM_256 (M_Encoder) multiple                  3678223   
_________________________________________________________________
embedding_8000_100 (M_Embedd multiple                  1229800   
_________________________________________________________________
decoder_LSTM_512 (M_Decoder) multiple                  6408000   
_________________________________________________________________
attention (M_Attention)      multiple                  272917    
_________________________________________________________________
joinseq_512 (M_JoinSeq)      multiple                  512856    
_________________________________________________________________
stacker (Lambda)             multiple                  0         
_________________________________________

In [31]:
train_dataset = tf.data.Dataset.from_generator(opt.get_tokenizer().data_generator,(tf.float64,tf.int64,tf.int64,tf.string),((15,4096),(15),(15),()),args=[2,1000])
train_dataset = train_dataset.shuffle(900, reshuffle_each_iteration=True)

In [26]:
validation_dataset = tf.data.Dataset.from_generator(opt.get_tokenizer().data_generator,(tf.float64,tf.int64,tf.int64,tf.string),((15,4096),(15),(15),()),args=[0])
validation_dataset = validation_dataset.shuffle(900, reshuffle_each_iteration=True)
validation_dataset = list(validation_dataset.as_numpy_iterator())

In [27]:
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
             loss = tf.keras.losses.CategoricalCrossentropy(),
             metrics = ['accuracy']
             )

In [32]:
start = time()
history = model.custom_fit(train_dataset, reset=True, val_data=validation_dataset, epochs=2)
end = time()
print(end-start)3

Total Batches: 100
Checkpoint Initialized


100%|██████████| 100/100 [03:07<00:00,  1.87s/it]


Batches in epoch  100
Epoch: 0  loss: 7.348166389465332  acc: 0.46106666666666685  bleu: {'BLEU_1': 0.0, 'BLEU_2': 0.0, 'BLEU_3': 0.0, 'BLEU_4': 0.0}


100%|██████████| 100/100 [02:38<00:00,  1.58s/it]


Batches in epoch  100
Epoch: 1  loss: 5.123807325363159  acc: 0.4648666666666666  bleu: {'BLEU_1': 0.0, 'BLEU_2': 0.0, 'BLEU_3': 0.0, 'BLEU_4': 0.0}
385.9173333644867
