In [69]:
import os
import random
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import utils
from sklearn.model_selection import train_test_split
from keras.models import Model, Sequential
from keras.layers import *

In [2]:
fileloc = '/scratch/sk7898/pedbike'
filestrs = ['Human/human_radial_cuts_stft', 'Bike/bike_radial_cuts_stft']
window = 256
split = 0.1
filenames = []
data = []
labels = []
seqs = []

[[filenames.append(os.path.join(os.path.join(fileloc, filestr), filename))
  for filename in os.listdir(os.path.join(fileloc, filestr))] for filestr in filestrs]

[[data.append(np.fromfile(open(os.path.join(os.path.join(fileloc, filestr), filename), "r"), dtype=np.float64).reshape(-1, window*2))
  for filename in os.listdir(os.path.join(fileloc, filestr))] for filestr in filestrs]
data = np.array(data)

[labels.append(int((fname.split('_')[-1]).split('p')[0])) for fname in filenames]
labels = np.array(labels)

[seqs.append(int(fname.split('_')[-3])) for fname in filenames]
seqs = np.array(seqs)

indices = np.arange(len(filenames))
indices_train, indices_val = train_test_split(indices, test_size=split)

x_train = np.array([data[i] for i in indices_train])
x_val = np.array([data[i] for i in indices_val])

y_train = np.array([labels[i] for i in indices_train])
y_val = np.array([labels[i] for i in indices_val])

seqs_train = np.array([seqs[i] for i in indices_train])
seqs_val = np.array([seqs[i] for i in indices_val])

In [82]:
def histedges_equalN(seq_lengths, n_bins):
    npt = len(seq_lengths)
    return np.interp(np.linspace(0, npt, n_bins + 1),
                     np.arange(npt),
                     np.sort(seq_lengths))

def element_to_bucket_id(x, buckets_min, buckets_max):
    seq_length = x.shape[0]
    conditions_c = np.logical_and(np.less_equal(buckets_min, seq_length),
                                  np.less(seq_length, buckets_max))
    bucket_id = np.min(np.where(conditions_c))
    return bucket_id

def pad_sequence(x, max_len=None, padding_value=0):
    orig_length = x.shape[0]
    new_x = np.zeros((max_len, 512), dtype=np.float64)
    new_x[0:orig_length,:] = x
    return new_x
    
class train_generator(keras.utils.Sequence):            
    def _permute(self):
        #Shuffle the buckets
        self.b_ids = np.random.permutation(self.n_bins)
        
        # Shuffle bucket contents
        for key in self.b_ids:
            xbin = np.array(self.buckets[key]['x'])
            ybin = np.array(self.buckets[key]['y'])
            #print(xbin.shape)
            index_array = np.random.permutation(len(self.buckets[key]['x']))
            self.buckets[key]['x'] = xbin[index_array]
            self.buckets[key]['y'] = ybin[index_array]

    def on_epoch_end(self):
        self._permute()
    
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return self.n_bins
    
    def __init__(self, n_bins, data, labels, seq_lengths, padding=None, padding_value=None):
        bucket_sizes, bucket_boundaries = np.histogram(seq_lengths, bins = histedges_equalN(seq_lengths, n_bins))
        #print(bucket_sizes)
        #print(bucket_boundaries)

        data_buckets = dict()
        boundaries = list(bucket_boundaries)
        buckets_min = boundaries[:-1]
        buckets_max = boundaries[1:]
        buckets_max[n_bins-1] += 1
        #print(buckets_min)
        #print(buckets_max)
        
        for x, y in zip(data, labels):
            b_id = element_to_bucket_id(x, buckets_min, buckets_max)
            if padding:
                if x.shape[0] < buckets_max[b_id]:
                    max_len = buckets_max[b_id] - 1
                    x = pad_sequence(x, max_len=int(max_len), padding_value=padding_value)
                    
            if b_id in data_buckets.keys():
                data_buckets[b_id]['x'].append(x)
                data_buckets[b_id]['y'].append(y)
            else:
                data_buckets[b_id] = {} 
                data_buckets[b_id]['x'] = [x]
                data_buckets[b_id]['y'] = [y]    
    
        self.n_bins = n_bins
        self.buckets = data_buckets
        self._permute()
        
    def __getitem__(self, idx):
        key = self.b_ids[idx]
        return self.buckets[key]['x'], self.buckets[key]['y']

In [14]:
def val_generator(x_val, y_val):
    idx = 0
    while True:
        for x, y in zip(x_val, y_val):
            idx += 1 
            x = x.reshape(1, x.shape[0], x.shape[1])
            label = np.zeros((1,1), dtype=np.int16)
            label[0] = y
            if idx >= y_val.shape[0]:
                break
            return x, label

In [124]:
def cnn_embedding_model(input_shape):
    model = Sequential()
    model.add(Conv2D(filters=20,\
                     kernel_size=5,\
                     strides=1,\
                     padding='same',\
                     name='conv_1',\
                     input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Activation('relu', name='relu_1'))
    model.add(MaxPooling2D(pool_size=(1, 2)))
    model.add(Flatten())
    return model

In [102]:
batch_size = 64
n_bins = int(len(indices_train)/batch_size)
epochs = 1

train_gen = train_generator(n_bins, x_train, y_train, seq_lengths=seqs_train, padding=True, padding_value=0.0)
val_gen = val_generator(x_val, y_val)

In [125]:
#This model will take as input a 5D tensor of shape (batch_shape, timesequence/nb_frames, nb_row, nb_col, nb_channel)
#input shape: nb_frames, nb_row, nb_col, nb_channel
windows = 32
window_dim = 16
input_shape = (windows, window_dim, 1)
base_model = cnn_embedding_model(input_shape)

#Added for BatchNormalization. See https://github.com/keras-team/keras/issues/2310
from keras import backend as K
K.set_learning_phase(1)

inputs = Input(shape=(None, 512), name='input_1')
y = Reshape((-1,) + input_shape, input_shape=(None, 512))(inputs)
#print(y.get_shape())

y = TimeDistributed(base_model)(y)
y = LSTM(5, return_sequences=False)(y)
outputs = Dense(1, kernel_initializer='normal')(y)
model = Model(inputs=inputs, outputs=outputs)

model.compile(optimizer="adam", loss="mean_squared_error", metrics=['mae'])
H = model.fit_generator(train_gen, validation_data=val_gen, validation_steps=1, steps_per_epoch=n_bins, epochs=epochs)

Epoch 1/1
