In [1]:
from speech_classes import *
from helpers import *

X_vector_len = 16000

# Get feature and label matrices

* sklearn label encoder: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html
* sklearn one hot encoder: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html
* sklearn split: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html

In [2]:
train = SpeechList.get_train('train/audio')
Xy_train_valid = train.get_X_and_y_matrices(X_vector_len=X_vector_len, split=0.75)



SKIPPED train/audio/_background_noise_/README.md File format b'# Ba'... not understood.


# Make graph

In [11]:
import tensorflow as tf

from sklearn.utils import shuffle
from datetime import datetime, timedelta

import os
import shutil

class TestLSTM(tf.Graph):
    def __init__(self, g_cnfg):
        super().__init__()
        self.name = 'TestLSTM'
        self.build(g_cnfg)

    def weight_variable(self, shape, weights_stddev, name):
        initial = tf.truncated_normal(shape, stddev=weights_stddev)
        return tf.Variable(initial, name=name)
    
    def bias_variable(self, biases_initial, shape, name):
        initial = tf.constant(biases_initial, shape=shape)
        return tf.Variable(initial, name=name)

    def build(self, cnfg):
        self.g_cnfg = cnfg
        with self.as_default():
            global_step = tf.Variable(0, trainable=False)
            self.keep_prob = tf.placeholder(tf.float32)
            self.is_training = tf.placeholder(tf.bool)

            self.X = tf.placeholder(tf.float32, [None, cnfg.X_vector_len])
            self.y = tf.placeholder(tf.float32, [None, cnfg.y_vector_len])

            W1 = self.weight_variable([cnfg.X_vector_len, cnfg.n_hidden], 0.015, 'W1')
            b1 = self.bias_variable(0.1, [cnfg.n_hidden], 'b1')
            Xw1 = tf.matmul(self.X, W1) + b1
            X2 = tf.nn.relu(Xw1)

            W2 = self.weight_variable([cnfg.n_hidden, cnfg.y_vector_len], 0.015, 'W2')
            b2 = self.bias_variable(0.1, [cnfg.y_vector_len], 'b2')
            self.logits = tf.matmul(X2, W2) + b2

            self.logloss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=self.logits))

            learning_rate = tf.train.exponential_decay(cnfg.lr_initial, global_step, 
                                                       cnfg.lr_decay_steps, cnfg.lr_decay_rate, 
                                                       staircase=True)

            self.optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(self.logloss, global_step=global_step)

            tf.summary.scalar('logloss', self.logloss)
            tf.summary.scalar('learning_rate', learning_rate)

            for var in tf.trainable_variables():
                tf.summary.histogram(var.op.name, var)

            self.summarizer = tf.summary.merge_all()
            
    def make_tb_ckp_dir(self, cnfg_name):
        # Make tensorboard and checkpoint directories
        self.tb_dir = '_'.join(['tb', self.name, cnfg_name])
        self.ckp_dir = '_'.join(['ckp', self.name, cnfg_name])
        
        for dir_ in [self.tb_dir, self.ckp_dir]:
            if os.path.isdir(dir_):
                shutil.rmtree(dir_)
            os.makedirs(dir_)

        # Sub directories for checkpoint
        os.makedirs(self.ckp_dir + '/hourly')
        os.makedirs(self.ckp_dir + '/best')
        
    def get_next_batch(self):
        if self.offset == 0:  # Shuffle every epoch
            self.X_train, self.y_train = shuffle(self.X_train, self.y_train)

        if self.offset <= (self.len_X_train - self.batch_size):  # Enough for next batch
            X_batch = self.X_train[self.offset: self.offset+self.batch_size, :]
            y_batch = self.y_train[self.offset: self.offset+self.batch_size, :]
            self.offset += self.batch_size
        else:
            X_batch = self.X_train[self.offset: self.len_X_train, :]  # All to the end
            y_batch = self.y_train[self.offset: self.len_X_train, :]
            self.offset = 0
        
        return X_batch, y_batch

    def save_basics(self, step, ll_train, ll_valid, summary):
        self.log.record(step, ll_train, ll_valid)  # Log file
        self.log.save()
        self.writer.add_summary(summary, step)  # Tensorboard
        
    def make_ckp(self, saver, sub_dir, step):
        path_ckp = saver.save(self.sess, '/'.join([self.ckp_dir, sub_dir, 'model']), 
                              global_step=step, latest_filename=sub_dir+'_checkpoint') 
        return path_ckp
        
    def make_ckp_if_hour_passed(self, step):
        if datetime.now() <= self.last_hr_model_time + timedelta(hours=1):
            return
        
        path_ckp = self.make_ckp(self.saver_hourly, 'hourly', step)
        self.last_hr_model_time = datetime.now()
        
        if self.annotate:
            print('step {:,} Hourly model saved @ {:%m/%d/%Y %H:%M:%S}'.format(step, datetime.now()))

    def make_ckp_if_best(self, step):
        if self.ave_ll_valid > self.log.best_model_ll:
            return
        
        path_ckp = self.make_ckp(self.saver_best, 'best', step)
        self.patient_till = self.ave_ll_valid + self.early_stopping_patience
        self.log.update_best_model(self.patient_till)
        
        if self.annotate:
            print('Step {:,} Best model saved @ {:%m/%d/%Y %H:%M:%S} [Ave valid ll] {:.3f}'.format(step, datetime.now(), self.ave_ll_valid))
        
    def train_model(self, cnfg, Xy_train_valid, annotate=True):
        # Prep
        self.batch_size = cnfg.batch_size
        self.early_stopping_patience = cnfg.early_stopping_patience

        self.X_train, self.y_train, X_valid, y_valid = Xy_train_valid
        self.len_X_train = len(self.X_train)
        
        self.annotate = annotate

        self.make_tb_ckp_dir(cnfg.name)
        self.log = Log(self.name, self.g_cnfg, cnfg)  # Create log
        
        with tf.Session(graph=self) as self.sess: 
            # Initializations
            tf.global_variables_initializer().run()  # Graph variables
            self.writer = tf.summary.FileWriter(self.tb_dir, self.sess.graph)  # Tensorboard
            self.saver_hourly = tf.train.Saver(max_to_keep=None)  # Model saver for hourly models
            self.saver_best = tf.train.Saver()  # Model saver for best models

            # Training loop
            for step in range(0, cnfg.max_step):
                if step == 0:  # Only first time
                    self.log.train_start = datetime.now()
                    print('Training starts @ {:%m/%d/%Y %H:%M:%S}'.format(self.log.train_start))
                    
                    self.offset = 0
                    self.last_hr_model_time = datetime.now()
                    self.patient_till = float('inf')

                X_batch, y_batch = self.get_next_batch()
                
                _, summary = self.sess.run([self.optimizer, self.summarizer], 
                                           feed_dict={self.X: X_batch, self.y: y_batch, 
                                                      self.keep_prob: cnfg.dropout_keep_prob, 
                                                      self.is_training: True})
                
                if step % cnfg.log_every == 0:  # Keep track of training progress                    
                    ll_train = self.logloss.eval(feed_dict={self.X: X_batch, self.y: y_batch, 
                                                            self.keep_prob: 1.0, self.is_training: False})
                    ll_valid = self.logloss.eval(feed_dict={self.X: X_valid, self.y: y_valid, 
                                                            self.keep_prob: 1.0, self.is_training: False})
                    
                    self.save_basics(step, ll_train, ll_valid, summary)
                    self.ave_ll_valid = self.log.ave_ll_valid[-1]
                    
                    self.make_ckp_if_hour_passed(step)
                    self.make_ckp_if_best(step)
                    
                    # Done if patience is over
                    if (step > cnfg.start_step_early_stopping) & (self.ave_ll_valid > self.patient_till):
                        print('Early stopping now')
                        break

                if (annotate) & (step % cnfg.print_every == 0):
                    print('Step {:,} ends @ {:%m/%d/%Y %H:%M:%S} [Train ll] {:.3f} [Ave valid ll] {:.3f}'.format(step, datetime.now(), ll_train, self.ave_ll_valid))

            # The End
            log.train_end = datetime.now()
            print('Training ends @ {:%m/%d/%Y %H:%M:%S}'.format(log.train_end))
            
            self.log.save()
            self.make_ckp(self.saver_hourly, 'hourly', step)
            
    def load_and_predict(self, path2ckp, X_test):
        # User specifies checkpoint
        with tf.Session(graph=self) as sess:
            tf.global_variables_initializer().run()
            
            saver = tf.train.Saver()
            saver.restore(sess, path2ckp)  # Load model
            
            y_test = self.logits.eval(feed_dict={self.X: X_test,  
                                                 self.keep_prob: 1.0,
                                                 self.is_training: False})
        return y_test
            
    def predict(self, X_test, ckp_dir=None):
        # Use best model i.e. model with best ave ll valid
        if ckp_dir is None:
            ckp_dir = self.ckp_dir
        
        ckp = max(os.listdir(ckp_dir + '/best'), key=os.path.getctime)
        path2ckp = '/'.join([ckp_dir, 'best', ckp])
        
        return self.load_and_predict(path2ckp, X_test)


# Train model
* Hmm, sparse matrix doesnt work

In [12]:
g_cnfg = Config('trial_graph')
g_cnfg.X_vector_len = X_vector_len
g_cnfg.y_vector_len = Xy_train_valid[1].shape[1]
g_cnfg.n_hidden = 200
g_cnfg.lr_initial = 0.1
g_cnfg.lr_decay_steps = 1000
g_cnfg.lr_decay_rate = 0.7  # 1.0 means no decay
print(g_cnfg)

graph = TestLSTM(g_cnfg)

t_cnfg = Config('trial_run_01')
t_cnfg.max_step = 100000000
t_cnfg.batch_size = 1
t_cnfg.dropout_keep_prob = 0.5
t_cnfg.log_every = 1
t_cnfg.print_every = 5
t_cnfg.n_ave_ll_valid = 8
t_cnfg.start_step_early_stopping = 20000
t_cnfg.early_stopping_patience =0.1
print(t_cnfg)

graph.train_model(t_cnfg, Xy_train_valid)

name: trial_graph
X_vector_len: 16,000
lr_decay_rate: 0.7
lr_decay_steps: 1,000
lr_initial: 0.1
n_hidden: 200
y_vector_len: 31

name: trial_run_01
batch_size: 1
dropout_keep_prob: 0.5
early_stopping_patience: 0.1
log_every: 1
max_step: 100,000,000
n_ave_ll_valid: 8
print_every: 5
start_step_early_stopping: 20,000

Training starts @ 11/27/2017 05:06:36
Step 0 ends @ 11/27/2017 05:08:58 [Train ll] 168538.500 [Ave valid ll] 3993866.500
Step 5 ends @ 11/27/2017 05:09:22 [Train ll] 27377696505856.000 [Ave valid ll] 13370494890463022.000
Step 10 ends @ 11/27/2017 05:09:43 [Train ll] 30713604734976.000 [Ave valid ll] 2119144037852232789524480.000
Step 13 Best model saved @ 11/27/2017 05:09:57 [Ave valid ll] nan
Step 14 Best model saved @ 11/27/2017 05:10:02 [Ave valid ll] nan
Step 15 Best model saved @ 11/27/2017 05:10:07 [Ave valid ll] nan
Step 15 ends @ 11/27/2017 05:10:07 [Train ll] 0.739 [Ave valid ll] nan
Step 16 Best model saved @ 11/27/2017 05:10:13 [Ave valid ll] nan
Step 17 Best mode

InvalidArgumentError: Nan in summary histogram for: W1_1
	 [[Node: W1_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](W1_1/tag, W1/read)]]

Caused by op 'W1_1', defined at:
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-db053a293750>", line 10, in <module>
    graph = TestLSTM(g_cnfg)
  File "<ipython-input-11-d0bf9501b173>", line 13, in __init__
    self.build(g_cnfg)
  File "<ipython-input-11-d0bf9501b173>", line 54, in build
    tf.summary.histogram(var.op.name, var)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/summary/summary.py", line 192, in histogram
    tag=tag, values=values, name=scope)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 188, in _histogram_summary
    "HistogramSummary", tag=tag, values=values, name=name)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/Users/kumiko/anaconda/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Nan in summary histogram for: W1_1
	 [[Node: W1_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](W1_1/tag, W1/read)]]


# Predict test labels

In [None]:
test = SpeechList.get_test('test/audio')
X_test = test.get_feature_matrix(vector_len=X_vector_len)
y_test = graph.predict(X_test)  # If right after training

# Predict by specifying a checkpoint

In [None]:
test = SpeechList.get_test('test/audio')
X_test = test.get_feature_matrix(vector_len=X_vector_len)

path2log = 'who am i?'
log = pickle.load(open(path2log, 'rb'))

graph = TestLSTM(log.g_cnfg)

y_test = graph.predict(X_test, ckp_dir='hohoho')  # Best model in a checkpoint directory
y_test = graph.load_and_predict(path2ckp, X_test)  # Specific model