In [1]:
import tensorflow as tf
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import time
import random

## load all npz files from npz_files directory.

In [2]:
import os

npz_files_directory_train = '/home/rohan/Desktop/label-generator/raw_files/npz-files/train'
npz_files_directory_test = '/home/rohan/Desktop/label-generator/raw_files/npz-files/test'
# collect all files from npz directory.
train_files = list()
for f in os.listdir(npz_files_directory_train):
    train_files.append(f)
    
test_files = list()
for f in os.listdir(npz_files_directory_test):
    test_files.append(f)

In [3]:
train_files = [file for file in train_files if file[-3:] == 'npz']
test_files = [file for file in test_files if file[-3:] == 'npz']

In [4]:
print('number of train files', len(train_files))
print('number of test files', len(test_files))

number of train files 10
number of test files 7


## load all numpy arrays into training data x and y.
structure of the npz files = ['c1', 'c2', 'c3', 'Z', 'labels'].
c1, c2, c3 => numpy array containing all class 1, 2, 3 bounding boxes respectively.
Z => the raw PSD files.
labels => pixel wise labels for psd files. same dimensions as the PSD files.

In [5]:
def load_data(directory, files_list):
    training_data_x = list()
    training_data_y = list()

    count = 0
    for file_name in files_list:
        data1 = np.load(directory + '/' + file_name)
        training_data_x.append(data1['Z'])
        training_data_y.append(data1['labels'])
        count += 1
    print('files loaded', count)
    return training_data_x, training_data_y 

In [6]:
training_data_x, training_data_y = load_data(npz_files_directory_train, train_files)

files loaded 10


In [7]:
test_data_x, test_data_y = load_data(npz_files_directory_test, test_files)

files loaded 7


In [8]:
print('shape of training_data_x', len(training_data_x))
print('shape of training_data_y', len(training_data_y))

shape of training_data_x 10
shape of training_data_y 10


In [9]:
print('shape of training_data_x', len(test_data_x))
print('shape of training_data_y', len(test_data_y))

shape of training_data_x 7
shape of training_data_y 7


In [10]:
print('shape of training_data_x samples', training_data_x[0].shape)
print('shape of training_data_y samples', training_data_y[0].shape)

shape of training_data_x samples (1953, 512)
shape of training_data_y samples (1953, 512)


## variable structures:-
1. train_x contains all Z for all files.
2. train_y contains all label for all files.

## convert training_data_x to shape (size of all timesteps, 512)

In [11]:
def processFrequencies(freq, num_steps):
    frequencies = np.zeros((512, 16), dtype=np.float32)
    assert freq.shape[0] == 512
    freq = np.reshape(freq, (512, 1))
    for i in range(freq.shape[0]):
        if freq[i:i+num_steps, 0].shape[0] == num_steps:
            frequencies[i, :] = freq[i:i+num_steps, 0]
        else:
#             print(freq[i:i+num_steps, 0].shape[0])
            frequencies[i, :] = np.pad(freq[i:i+num_steps, 0], (0, 16-freq[i:i+num_steps, 0].shape[0]), 'edge')
    frequencies = np.reshape(frequencies, (512, 16, 1))
    return frequencies

In [12]:
def processLabels(labels):
    labels = np.reshape(labels, (512, 1))
    labels_reshaped = np.zeros((512, 4))
    for i in range(labels.shape[0]):
        labels_reshaped[i, labels[i, 0]] = 1
    return labels_reshaped

## Convert all x_train data to serial x_train data.

In [13]:
# global parameters.
x_train, y_train, x_test, y_test = list(), list(), list(), list()
num_steps = 16

In [14]:
def convertToSerialList(x, y, x_copy, y_copy):
    assert len(x) == len(y)
    for i in range(len(x)):
        for j in range(x[i].shape[0]):
            x_copy.append(processFrequencies(x[i][j,:] , num_steps))
            y_copy.append(processLabels(y[i][j,:]))

In [15]:
convertToSerialList(training_data_x, training_data_y, x_train, y_train)

In [16]:
print('length of x is ', len(x_train))
print('length of y is ', len(y_train))

length of x is  19531
length of y is  19531


In [17]:
print('shape of x is ', x_train[0].shape)
print('shape of x is ', y_train[0].shape)

shape of x is  (512, 16, 1)
shape of x is  (512, 4)


In [18]:
convertToSerialList(test_data_x, test_data_y, x_test, y_test)

In [19]:
print('length of x is ', len(x_test))
print('length of y is ', len(y_test))

length of x is  13672
length of y is  13672


In [20]:
print('shape of x is ', x_test[0].shape)
print('shape of x is ', y_test[0].shape)

shape of x is  (512, 16, 1)
shape of x is  (512, 4)


## Creating the RNN model.

In [21]:
# RNN Global Parameter models.
num_steps = 16
frequency_shape = [512, num_steps, 1]
labels_shape = [512, 4]

hidden_layer_dimension = 32
number_layers = 2
use_dropout = False
dropout = 0.0
num_layers = 2
batch_size = 1
learning_rate = 0.001
num_epochs = 10
step_size = 512 // batch_size
num_samples_train = len(x_train)
num_samples_test = len(x_test)
shuffled_indexed_train = [i for i in range(num_samples_train)]
shuffled_indexed_test = [i for i in range(num_samples_test)]

In [22]:
class Input:
    def __init__(self):
        """
        creates two run able objects -> inputs for feeding inputs and labels.
        """
        self.freq_placeholder = tf.placeholder(tf.float32, frequency_shape)
        self.labels_placeholder = tf.placeholder(tf.float32, labels_shape)
        
        self.dataset = tf.data.Dataset.from_tensor_slices((self.freq_placeholder, self.labels_placeholder))
        self.dataset = self.dataset.batch(batch_size)
        self.dataset = self.dataset.prefetch(16)
        
        self.iterator = self.dataset.make_initializable_iterator()
        self.inputs, self.output_labels = self.iterator.get_next()

In [23]:
class Model:
    def __init__(self, inputs, output_labels):
        self.init_state = tf.placeholder(tf.float32, [number_layers, 2, batch_size, hidden_layer_dimension])
        state_per_layer_list = tf.unstack(self.init_state, axis=0)
        rnn_state_tuples = tuple([tf.nn.rnn_cell.LSTMStateTuple(state[0], state[1]) for state in state_per_layer_list])

        if use_dropout:
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout)
            
        self.state = current_state = np.zeros((num_layers, 2, batch_size, hidden_layer_dimension))

        if num_layers > 1:
            cell_list = [self.createLSTMCells() for _ in range(num_layers)]
            cell = tf.nn.rnn_cell.MultiRNNCell(cell_list, state_is_tuple=True)
        elif num_layers == 1:
            cell = createLSTMCells()

        self.output, self.state = tf.nn.dynamic_rnn(cell, 
                                          inputs,
                                          dtype=tf.float32, 
                                          initial_state=rnn_state_tuples)
        
        # extract the last output for time=num_seq from the output.
        self.output = tf.transpose(self.output, [1, 0, 2])
        self.output = tf.gather(self.output, int(self.output.shape[0]-1))
        
        # softmax layer
        weight = tf.Variable(tf.truncated_normal([hidden_layer_dimension, 4], stddev=0.01))
        bias = tf.Variable(tf.constant(0.1, shape=[4]))
        self.logits = tf.nn.softmax(tf.matmul(self.output, weight) + bias)
        cross_entropy = -tf.reduce_sum(output_labels * tf.log(self.logits))
        
        optimizer = tf.train.RMSPropOptimizer(learning_rate)
        self.optimizer = optimizer.minimize(cross_entropy)
        
        # calculate accuracy every 10 epochs.
        self.acc, self.accuracy_ops = tf.metrics.accuracy(labels=tf.argmax(output_labels, 1),
                                            predictions=tf.argmax(self.logits, 1)
                                           )
        
    def createLSTMCells(self):
        cell = tf.contrib.rnn.LSTMCell(hidden_layer_dimension, forget_bias=1.0)
        return cell

In [None]:
with tf.Session() as sess:
    i = Input()
    m = Model(i.inputs, i.output_labels)
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    current_state = np.zeros((num_layers, 2, batch_size, hidden_layer_dimension))
    for epoch in range(1):
        start_time = time.time()
        print('epoch #', epoch, 'started')
        accuracy = 0
        random.shuffle(shuffled_indexed_train)
        for sample_index in shuffled_indexed_train:
            sess.run(i.iterator.initializer, feed_dict={i.freq_placeholder: x_train[sample_index], 
                                                        i.labels_placeholder: y_train[sample_index]})
            parameter1 = 0
            for step in range(step_size):
                parameter1, softmax_op, current_state = sess.run([m.accuracy_ops, m.optimizer, m.state], 
                                                        feed_dict={m.init_state: current_state})
        if epoch % 50 == 0: 
            accuracy = 0
            for sample_index in shuffled_indexed_test:
                sess.run(i.iterator.initializer, feed_dict={i.freq_placeholder: x_test[sample_index], 
                                                        i.labels_placeholder: y_test[sample_index]})
                for step in range(step_size):
                    accuracy, current_state = sess.run([m.accuracy_ops, m.state], 
                                                        feed_dict={m.init_state: current_state})
            print('test accuracy at epoch #', epoch)
        end_time = time.time()
        print('epoch #', epoch, 'ended - ',  end_time - start_time)

epoch # 0 started
