In [1]:
import os
import time
import numpy as np
import tensorflow as tf

import core.plot_saver as ps
import core.data_gen as dg

from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib

import cPickle as pickle

training_data = "train_data_100.pickle"

frequencies = 128
segment_size = 100
total_speakers = 100
#train_loops = 30

In [2]:
def tf_log_dir():
    current_workdir = os.getcwd()
    tstamp = int(time.time())
    relative_path = "../data/tf_logs/sess_%s" % tstamp
    dirty_path = os.path.join(current_workdir, relative_path)
    return os.path.realpath(dirty_path)

# Parse training data to matrices

def create_train_data():
    with open('../data/training/TIMIT_extracted/' + training_data, 'rb') as f:
      (X, y, speaker_names) = pickle.load(f)

    X_t, X_v, y_t, y_v = dg.splitter(X, y, 0.125, 8)
    return X_t, y_t, X_v, y_v

# Create data

X_t, y_t, X_v, y_v = create_train_data()
train_gen = dg.batch_generator(X_t, y_t, batch_size=128, segment_size=segment_size)
val_gen = dg.batch_generator(X_v, y_v, batch_size=128, segment_size=segment_size)
batches_t = ((X_t.shape[0]+128 -1 )// 128)*128
batches_v = ((X_v.shape[0]+128 -1 )// 128)*128

In [3]:
# Create basic net infrastructure

out_labels = tf.placeholder(tf.float32, shape=(None, segment_size))

#def create_net(x_input):

# Dimensions (-1, 128, 100, 1)
x_input = tf.placeholder(tf.float32, shape=(None, frequencies, segment_size, 1))

with tf.name_scope('Convolution_1'):
    # Output Dimensions (-1, 128, 100, 16)
    conv1 = tf.layers.conv2d(inputs=x_input, filters=16, kernel_size=[8, 8], padding="same", activation=tf.nn.relu)

with tf.name_scope('MaxPooling_1'):
    # Output Dimensions (-1, 63, 49, 16)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[4, 4], strides=2)

with tf.name_scope('Convolution_2'):
    # Output Dimensions (-1, 63, 49, 32)
    conv2 = tf.layers.conv2d(inputs=pool1, filters=32, kernel_size=[6, 6], padding="same", activation=tf.nn.relu)

with tf.name_scope('MaxPooling_2'):
    # Output Dimensions (-1, 30, 23, 32)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[4, 4], strides=2)

with tf.name_scope('Reshape'):
    dim_flatted = int(np.prod(pool2.shape[1:]))
    flat = tf.reshape(pool2, [-1, dim_flatted])

# Dense layer with units as total # of speakers * 10
with tf.name_scope('Dense_1'):
    # Output Dimensions (-1, 30, 23, 100)
    dense1 = tf.layers.dense(inputs=flat, units=total_speakers, activation=tf.nn.relu)
    #print("Output dimensions of Dense_1 is ", dense1.get_shape())

with tf.name_scope('Dense_2'):
    network = tf.layers.dense(inputs=dense1, units=total_speakers, activation=tf.nn.softmax)
    #print("Output dimensions of Dense_2 is ", output_layer.get_shape())
    
    #return output_layer

In [4]:
# Run the network

with tf.name_scope('Optimizer'):
    # Cross entropy and optimizer
    
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=out_labels))
    tf.summary.scalar('loss', cross_entropy)
    optimizer = tf.train.AdamOptimizer().minimize(cross_entropy)

In [None]:
# Training

train_loops = 50

sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Tensorboard
tb_merged = tf.summary.merge_all()
tb_saver = tf.train.Saver()
tb_train_writer = tf.summary.FileWriter(tf_log_dir(), sess.graph)
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()

for train in range(train_loops):
    start_time = time.time()
    
    # Get next batch
    x_b_t, y_b = train_gen.next()
    # Reshape the x_b batch with channel as last dimension
    x_b = np.reshape(x_b_t, [128, 128, 100, 1])
    
    # Execute training
    _, loss_value = sess.run([optimizer, cross_entropy], feed_dict={ x_input: x_b, out_labels: y_b }, options=run_options, run_metadata=run_metadata)
    
    round_time = time.time() - start_time
    print("Round %d time: %s" % (train, round_time))
    
    tb_summary_str = sess.run(tb_merged, feed_dict={ x_input: x_b, out_labels: y_b })
    tb_train_writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(train))
    tb_train_writer.add_summary(tb_summary_str, train)
    tb_train_writer.flush()

Round 0 time: 2.32994294167
Round 1 time: 2.33097505569
Round 2 time: 2.33398795128
Round 3 time: 2.29593110085
Round 4 time: 2.3004720211
Round 5 time: 2.30020785332
Round 6 time: 2.25950789452
Round 7 time: 2.29637098312
Round 8 time: 2.36751413345
Round 9 time: 2.30102992058
Round 10 time: 2.27247095108
Round 11 time: 2.2541539669
Round 12 time: 2.3006310463
Round 13 time: 2.26172494888
Round 14 time: 2.32049393654
Round 15 time: 2.28292417526
Round 16 time: 2.26547694206
Round 17 time: 2.27108502388
Round 18 time: 2.31908106804
Round 19 time: 2.31048202515
Round 20 time: 2.3701581955
