In [None]:
pwd

In [1]:
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, CSVLogger, ModelCheckpoint
from lipnet.lipreading.generators import RandomSplitGenerator
from lipnet.lipreading.callbacks import Statistics, Visualize
from lipnet.lipreading.curriculums import Curriculum
from lipnet.core.decoders import Decoder
from lipnet.lipreading.helpers import labels_to_text
from lipnet.utils.spell import Spell
from lipnet.new_model import LipNet
import numpy as np
import datetime
import os

Using TensorFlow backend.


In [None]:
np.random.seed(55)

CURRENT_PATH = os.path.abspath('')
DATASET_DIR  = os.path.join(CURRENT_PATH, 'datasets')
print(DATASET_DIR)
OUTPUT_DIR   = os.path.join(CURRENT_PATH, 'results')
LOG_DIR      = os.path.join(CURRENT_PATH, 'logs')
FACE_PREDICTORS = os.path.join(CURRENT_PATH, '..', '..', 'common', 'predictors', 'shape_predictor_68_face_landmarks.dat')
PREDICT_GREEDY      = False
PREDICT_BEAM_WIDTH  = 200
PREDICT_DICTIONARY  = os.path.join(CURRENT_PATH,'..','..','common','dictionaries','grid.txt')

def curriculum_rules(epoch):
    return { 'sentence_length': -1, 'flip_probability': 0.5, 'jitter_probability': 0.05 }

def train(run_name, start_epoch, stop_epoch, img_c, img_w, img_h, frames_n, absolute_max_string_len, minibatch_size):
    curriculum = Curriculum(curriculum_rules)
    lip_gen = RandomSplitGenerator(dataset_path=DATASET_DIR,
                                minibatch_size=minibatch_size,
                                img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n,
                                absolute_max_string_len=absolute_max_string_len,
                                curriculum=curriculum, start_epoch=start_epoch).build(val_split=0.2)

    lipnet = LipNet(img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, 
                            absolute_max_string_len=absolute_max_string_len, output_size=lip_gen.get_output_size())
    lipnet.summary()

    adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    lipnet.model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam)

    # load weight if necessary
    if start_epoch > 0:
        weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        lipnet.model.load_weights(weight_file)

    spell = Spell(path=PREDICT_DICTIONARY)
    decoder = Decoder(greedy=PREDICT_GREEDY, beam_width=PREDICT_BEAM_WIDTH,
                      postprocessors=[labels_to_text, spell.sentence])

    # define callbacks
    statistics  = Statistics(lipnet, lip_gen.next_val(), decoder, 256, output_dir=os.path.join(OUTPUT_DIR, run_name))
    visualize   = Visualize(os.path.join(OUTPUT_DIR, run_name), lipnet, lip_gen.next_val(), decoder, num_display_sentences=minibatch_size)
    tensorboard = TensorBoard(log_dir=os.path.join(LOG_DIR, run_name))
    csv_logger  = CSVLogger(os.path.join(LOG_DIR, "{}-{}.csv".format('training',run_name)), separator=',', append=True)
    checkpoint  = ModelCheckpoint(os.path.join(OUTPUT_DIR, run_name, "weights{epoch:02d}.h5"), monitor='val_loss', save_weights_only=True, mode='auto', period=1)

    lipnet.model.fit_generator(generator=lip_gen.next_train(), 
                        steps_per_epoch=lip_gen.default_training_steps, epochs=stop_epoch,
                        validation_data=lip_gen.next_val(), validation_steps=lip_gen.default_validation_steps,
                        callbacks=[checkpoint, statistics, visualize, lip_gen, tensorboard, csv_logger], 
                        initial_epoch=start_epoch, 
                        verbose=1,
                        max_q_size=5,
                        workers=1,
                        pickle_safe=True)

if __name__ == '__main__':
    run_name = "attention_layer"
    train(run_name, 0, 100, 3, 100, 50, 75, 32, 25)

/datasets/home/home-02/56/656/kkashilk/LipNet/training/random_split/datasets

Loading dataset list from cache...
Found 739 videos for training.
Found 184 videos for validation.

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
the_input (InputLayer)       (None, 75, 100, 50, 3)    0         
_________________________________________________________________
zero1 (ZeroPadding3D)        (None, 77, 104, 54, 3)    0         
_________________________________________________________________
conv1 (Conv3D)               (None, 75, 50, 25, 32)    7232      
_________________________________________________________________
batc1 (BatchNormalization)   (None, 75, 50, 25, 32)    128       
_________

Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().




[Epoch 0] Out of 256 samples: [CER: 20.523 - 0.824] [WER: 5.973 - 0.995] [BLEU: 0.389 - 0.389]

Epoch 2/100
Epoch 1: Curriculum(train: False, sentence_length: -1, flip_probability: 0.5, jitter_probability: 0.05)


[Epoch 1] Out of 256 samples: [CER: 24.941 - 1.000] [WER: 6.000 - 1.000] [BLEU: 0.000 - 0.000]

Epoch 3/100
Epoch 2: Curriculum(train: False, sentence_length: -1, flip_probability: 0.5, jitter_probability: 0.05)


[Epoch 2] Out of 256 samples: [CER: 24.926 - 1.000] [WER: 6.000 - 1.000] [BLEU: 0.000 - 0.000]

Epoch 4/100
Epoch 3: Curriculum(train: False, sentence_length: -1, flip_probability: 0.5, jitter_probability: 0.05)


[Epoch 3] Out of 256 samples: [CER: 24.125 - 0.967] [WER: 5.977 - 0.996] [BLEU: 0.373 - 0.373]

Epoch 5/100
Epoch 4: Curriculum(train: False, sentence_length: -1, flip_probability: 0.5, jitter_probability: 0.05)


[Epoch 4] Out of 256 samples: [CER: 23.891 - 0.963] [WER: 5.961 - 0.993] [BLEU: 0.409 - 0.409]

Epoch 6/100