In [1]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import os
import random
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import utils
from sklearn.model_selection import train_test_split
from keras.models import Model, Sequential
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers.normalization import BatchNormalization
from keras.layers import *

Using TensorFlow backend.


In [2]:
np_load_old = np.load
np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)

def get_data(fileloc, window=256):
    x_train = np.load(os.path.join(fileloc, "train.npy"))
    x_val = np.load(os.path.join(fileloc, "val.npy"))
    x_test = np.load(os.path.join(fileloc, "test.npy"))

    y_train = np.load(os.path.join(fileloc, "train_lbls.npy"))
    y_val = np.load(os.path.join(fileloc, "val_lbls.npy"))
    y_test = np.load(os.path.join(fileloc, "test_lbls.npy"))

    seqs_train = np.load(os.path.join(fileloc, "train_seqs.npy"))
    seqs_val = np.load(os.path.join(fileloc, "val_seqs.npy"))
    seqs_test = np.load(os.path.join(fileloc, "test_seqs.npy"))
    
    return x_train, x_val, x_test, y_train, y_val, y_test, seqs_train, seqs_val, seqs_test

In [3]:
def histedges_equalN(seq_lengths, n_bins):
    npt = len(seq_lengths)
    return np.interp(np.linspace(0, npt, n_bins + 1),
                     np.arange(npt),
                     np.sort(seq_lengths))

def element_to_bucket_id(x, buckets_min, buckets_max):
    seq_length = x.shape[0]
    conditions_c = np.logical_and(np.less_equal(buckets_min, seq_length),
                                  np.less(seq_length, buckets_max))
    bucket_id = np.min(np.where(conditions_c))
    return bucket_id

def pad_sequence(x, max_len=None, padding_value=0):
    orig_length = x.shape[0]
    new_x = np.zeros((max_len, 512), dtype=np.float64)
    new_x[0:orig_length,:] = x
    return new_x
    
class train_generator(keras.utils.Sequence):            
    def _permute(self):
        #Shuffle the buckets
        self.b_ids = np.random.permutation(self.n_bins)
        
        # Shuffle bucket contents
        for key in self.b_ids:
            xbin = np.array(self.buckets[key]['x'])
            ybin = np.array(self.buckets[key]['y'])
            #print(xbin.shape)
            index_array = np.random.permutation(len(self.buckets[key]['x']))
            self.buckets[key]['x'] = xbin[index_array]
            self.buckets[key]['y'] = ybin[index_array]

    def on_epoch_end(self):
        self._permute()
    
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return self.n_bins
    
    def __init__(self, n_bins, data, labels, seq_lengths, padding=None, padding_value=None):
        bucket_sizes, bucket_boundaries = np.histogram(seq_lengths, bins = histedges_equalN(seq_lengths, n_bins))
        #print(bucket_sizes)
        #print(bucket_boundaries)

        data_buckets = dict()
        boundaries = list(bucket_boundaries)
        buckets_min = boundaries[:-1]
        buckets_max = boundaries[1:]
        buckets_max[n_bins-1] += 1
        #print(buckets_min)
        #print(buckets_max)
        
        for x, y in zip(data, labels):
            b_id = element_to_bucket_id(x, buckets_min, buckets_max)
            if padding:
                if x.shape[0] < buckets_max[b_id]:
                    max_len = buckets_max[b_id] - 1
                    x = pad_sequence(x, max_len=int(max_len), padding_value=padding_value)
                    
            if b_id in data_buckets.keys():
                data_buckets[b_id]['x'].append(x)
                data_buckets[b_id]['y'].append(y)
            else:
                data_buckets[b_id] = {} 
                data_buckets[b_id]['x'] = [x]
                data_buckets[b_id]['y'] = [y]    
    
        self.n_bins = n_bins
        self.buckets = data_buckets
        self._permute()
        
    def __getitem__(self, idx):
        key = self.b_ids[idx]
        #print(self.buckets[key]['x'].shape)
        return self.buckets[key]['x'], self.buckets[key]['y']

In [4]:
class val_generator(keras.utils.Sequence): 
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return self.n_bins
    
    def __init__(self, data, labels): 
        self.x, self.y = data, labels
        self.n_bins = data.shape[0]
        
    def __getitem__(self, idx):
        x = self.x[idx].reshape(1, self.x[idx].shape[0], self.x[idx].shape[1])
        label = np.zeros((1,1), dtype=np.int16)
        label[0] = self.y[idx]
        #print(x.shape)
        return x, label

In [5]:
def conv1d_embedding_model(input_shape, reshape=None):
    model = Sequential()
    if reshape:
        model.add(Reshape(reshape, input_shape=input_shape))
        model.add(Conv1D(filters=32, kernel_size=8, activation='relu'))
    else:
        model.add(Conv1D(filters=32, kernel_size=8, activation='relu', input_shape=input_shape))
    model.add(Conv1D(filters=32, kernel_size=4, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    return model

In [6]:
def lstm_embedding_model(input_shape, reshape=None):
    model = Sequential()
    if reshape:
        model.add(Reshape(reshape, input_shape=input_shape))
        model.add(LSTM(32, return_sequences=True, name='embedding_lstm_1'))
    else:
        model.add(LSTM(32, return_sequences=True, input_shape=input_shape, name='embedding_lstm_1'))
    return model

In [7]:
def lstm_counting_model(model, train_gen, val_gen, test_gen, n_bins, epochs=1, learning_rate=0.001):
    adam = keras.optimizers.Adam(lr=learning_rate)
    
    model.add(Masking(mask_value=0.0))
    model.add(LSTM(32, return_sequences=False, name='counting_lstm_1'))
    model.add(Dense(30, activation='relu', name='counting_dense_1'))
    model.add(Dense(1, name='counting_dense_2'))
    
    model.compile(loss='mean_squared_error', optimizer=adam, metrics=['mae'])
    model.summary()
        
    H = model.fit_generator(train_gen, validation_data=val_gen, validation_steps=len(val_gen),\
                            steps_per_epoch=n_bins, epochs=epochs)
    H_test = model.evaluate_generator(test_gen, steps=len(test_gen))
    return H, H_test

In [8]:
data_type = 'stft'

if data_type == 'stft':
    fileloc = '/scratch/sk7898/pedbike/window_256/downstream_stft'
else:
    fileloc = '/scratch/sk7898/pedbike/window_256/downstream_time'
    
x_train, x_val, x_test, y_train, y_val, y_test, seqs_train, seqs_val, seqs_test = get_data(fileloc)
assert x_train.shape[0] == y_train.shape[0] == seqs_train.shape[0]

In [9]:
window = 256
batch_size, epochs = 64, 10
n_bins = int(len(seqs_train)/batch_size)

train_gen = train_generator(n_bins, x_train, y_train, seq_lengths=seqs_train, padding=True, padding_value=0.0)
val_gen = val_generator(x_val, y_val)
test_gen = val_generator(x_test, y_test)

**Conv1D Model with STFT**

In [10]:
n_timesteps, n_features = None, 512
input_shape=(n_timesteps, n_features)

#Embedding model
model = conv1d_embedding_model(input_shape)
#LSTM for counting
H, H_test = lstm_counting_model(model, train_gen, val_gen, test_gen, n_bins)


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, None, 32)          131104    
_________________________________________________________________
conv1d_2 (Conv1D)            (None, None, 32)          4128      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, None, 32)          0         
_________________________________________________________________
masking_1 (Masking)          (None, None, 32)          0         
_________________________________________________________________
counting_lstm_1 (LSTM)       (None, 32)                8320      
_________________________________________________________________
counting_dense_1 (Dense)     (None, 30)                990       
_________________________

UnimplementedError: TensorArray has size zero, but element shape [?,32] is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
	 [[{{node counting_lstm_1/TensorArrayStack/TensorArrayGatherV3}}]]

**LSTM-LSTM Model with STFT**

In [None]:
n_timesteps, n_features = None, 512
input_shape = (n_timesteps, n_features)

model = lstm_embedding_model(input_shape)
H, H_test = lstm_counting_model(model, train_gen, val_gen, test_gen, n_bins)

In [None]:
data_type = 'time'
window = 256
batch_size, epochs = 64, 10

if data_type == 'stft':
    fileloc = '/scratch/sk7898/pedbike/window_256/downstream_stft'
else:
    fileloc = '/scratch/sk7898/pedbike/window_256/downstream_time'
    
x_train, x_val, x_test, y_train, y_val, y_test, seqs_train, seqs_val, seqs_test = get_data(fileloc)
assert x_train.shape[0] == y_train.shape[0] == seqs_train.shape[0]

n_bins = int(len(seqs_train)/batch_size)

train_gen = train_generator(n_bins, x_train, y_train, seq_lengths=seqs_train, padding=True, padding_value=0.0)
val_gen = val_generator(x_val, y_val)
test_gen = val_generator(x_test, y_test)

**Conv1D Model with Time-Series**

In [None]:
n_timesteps, n_features = None, 512
input_shape=(n_timesteps, n_features)

model = conv1d_embedding_model(input_shape, reshape=(-1, 2))
H, H_test = lstm_counting_model(model, train_gen, val_gen, test_gen, n_bins)

**LSTM-LSTM Model with Time-Series**

In [None]:
n_timesteps, n_features = None, 512
input_shape = (n_timesteps, n_features)

model = lstm_embedding_model(input_shape, reshape=(-1, 2))
H, H_test = lstm_counting_model(model, train_gen, val_gen, test_gen, n_bins)