In [None]:
import tensorflow as tf
import numpy as np
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Reshape, Permute
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import ELU
from keras.layers.recurrent import GRU
from keras.utils.data_utils import get_file

In [None]:
# Architecture of CRNN model

# Pre-setting
if K.image_dim_ordering() == 'th':
    input_shape = (1, 96, 1366)
    channel_axis = 1
    freq_axis  = 2
    time_axis = 3
    print('K.image_dim_ordering is th')
else:
    # K.image_dim_ordering() == 'tf'
    K.set_image_dim_ordering('th')
    input_shape = (1, 96, 1366)
    channel_axis = 1
    freq_axis  = 2
    time_axis = 3
    print('K.image_dim_ordering has been changed to {}'.format(K.image_dim_ordering()))

# Input
melgram_inputs = Input(shape=input_shape)
x = ZeroPadding2D(padding=(0, 37))(melgram_inputs)
print('x.shape={}'.format(x.shape)) # (?, 1, 96, 1440)
x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x)
print('x.shape={}'.format(x.shape)) # (?, 1, 96, 1440)

# Convolution layer 1
x = Convolution2D(64, 3, 3, border_mode='same', name='conv1')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)
x = Dropout(0.1, name='dropout1')(x)
print('x.shape={}'.format(x.shape)) # (?, 64, 48, 720)

# Convolution layer 2
x = Convolution2D(128, 3, 3, border_mode='same', name='conv2')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x)
x = Dropout(0.1, name='dropout2')(x)
print('x.shape={}'.format(x.shape)) # (?, 128, 16, 240)

# Convolution layer 3
x = Convolution2D(128, 3, 3, border_mode='same', name='conv3')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x)
x = Dropout(0.1, name='dropout3')(x)
print('x.shape={}'.format(x.shape)) # (?, 128, 4, 60)

# Convolution layer 4
x = Convolution2D(128, 3, 3, border_mode='same', name='conv4')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x)
x = Dropout(0.1, name='dropout4')(x)
print('x.shape={}'.format(x.shape)) # (?, 128, 1, 15)

# Reshaping the output of CNN for the input of RNN
x = Permute((3, 1, 2))(x) # original output of CNN: 128x1x15
print('x.shape={}'.format(x.shape)) # (?, 15, 128, 1)
x = Reshape((15, 128))(x)
print('x.shape={}'.format(x.shape)) # (?, 15, 128)

# 2-layers RNN
x = GRU(output_dim=32, return_sequences=True, name='gru1')(x)
print('x.shape={}'.format(x.shape)) # =(?, 15, 32)
x = GRU(32, return_sequences=False, name='gru2')(x)
x = Dropout(0.3)(x)
print('x.shape={}'.format(x.shape)) # =(?, 32)

# Fully-connected layer (output layer)
predictions = Dense(50, activation='sigmoid', name='output')(x)
print('predictions.shape={}'.format(predictions.shape)) # =(?, 50)

In [None]:
# AUC-ROC as the evaluation index

def AUC_ROC(y_true, y_pred):  
    tprs = tf.stack([binary_TPR(y_true, y_pred, k) for k in np.linspace(0, 1, 100)], axis=0)  
    fprs = tf.stack([binary_FPR(y_true, y_pred, k) for k in np.linspace(0, 1, 100)], axis=0)  
    fprs = tf.concat([tf.ones((1,)) , fprs], axis=0)
    binSizes = -(fprs[1:]-fprs[:-1])
    s = tprs*binSizes
    return K.sum(s, axis=0)

def binary_FPR(y_true, y_pred, threshold=K.variable(value=0.5)):  
    y_pred = K.cast(y_pred >= threshold, 'float32')  
    N = K.sum(1 - y_true) # N = Condition negative = FP+TN
    FP = K.sum(y_pred - y_pred * y_true)  
    return FP/N

def binary_TPR(y_true, y_pred, threshold=K.variable(value=0.5)):  
    y_pred = K.cast(y_pred >= threshold, 'float32')  
    P = K.sum(y_true) # P = Condition positive = TP+FN
    TP = K.sum(y_pred * y_true)  
    return TP/P

In [None]:
# Build the model

model = Model(input=melgram_inputs, output=predictions)
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', AUC_ROC])

In [None]:
# Load the dataset

X_train = np.load('D:/sets/train_x.npy')
y_train = np.load('D:/sets/train_y.npy')
print('X_train.shape={}, y_train.shape={}'.format(X_train.shape, y_train.shape))

X_valid = np.load('D:/sets/valid_x.npy')
y_valid = np.load('D:/sets/valid_y.npy')
print('X_valid.shape={}, y_valid.shape={}'.format(X_valid.shape,y_valid.shape))

X_test = np.load('D:/sets/test_x.npy')
y_test = np.load('D:/sets/test_y.npy')
print('X_valid.shape={}, y_valid.shape={}'.format(X_test.shape, y_test.shape))

In [None]:
tf.global_variables_initializer().run()

In [None]:
# Train the model

model.fit(X_train, y_train, batch_size=16, nb_epoch=40, verbose=1, validation_data=(X_valid, y_valid))

In [None]:
# Test the model

test_result = model.evaluate(X_test, y_test, batch_size=16)
print('Loss in test set', test_result[0])
print('Accuracy in test set', test_result[1])
print('AUC-ROC in test set', test_result[2])