In [2]:
import os
import timeit
import tensorflow as tf
from tensorflow.python.lib.io import file_io
import numpy as np
from collections import defaultdict
from random import shuffle
from scipy import signal
from scipy.io import wavfile
from scipy import interpolate

In [3]:
tf.logging.set_verbosity(tf.logging.ERROR)

In [4]:
!ls train
!ls train/one

one  two
ch1  ch2  ch3  ch4  ch5  ch6  ch7  ch8


### Set path variables

In [5]:
train_paths = ['train', 'train_2','train_3']
test_path = 'test_data'
label_paths = ['one', 'two']
channel_paths = ['ch1', 'ch2', 'ch3', 'ch4']

### Set global variables

In [6]:
TFR_TRAIN = 'train.tfrecord'
TFR_VALID = 'valid.tfrecord'
TFR_TEST = 'test.tfrecord'
BUCKET = 'gs://robolab/time_test/'

NUM_CLASSES = 2

IMG_HEIGHT = 80 # four concatenated cropped specs = 4 * 20
IMG_WIDTH = 71

### Get dictionary of filepaths with corresponding labels

In [6]:
# train_list[label] = [ { file_name: file_path_1, file_path_2, ... }, ... ]

train_list = defaultdict(list)

for path in train_paths:
    
    for label in label_paths:
        
        train_dict = defaultdict(list)
        
        for channel in channel_paths:
            
            for file in os.listdir(os.path.join(path, label, channel)):
                
                train_dict[file].append(os.path.join(path, label, channel, file))
                
        train_list[label].append(train_dict)

### Log spectogram function

In [24]:
def log_spectogram(wav,
                    sample_rate=8000,
                    crop=None,
                    nfft=None,
                    nperseg=128,
                    noverlap=None,
                    eps=1e-10,
                    spec_only=True):
    
    freqs, times, spectro = signal.spectrogram(wav,
                                               fs=sample_rate,
                                               window='hann',
                                               noverlap=noverlap,
                                               nperseg=nperseg,
                                               nfft=None,
                                               detrend=False)
    
    if spec_only:
        return np.log(spectro.astype(np.float32) + eps)
    else:
        return freqs, times, np.log(spectro.astype(np.float32) + eps)

### Get 4-channel file lists and corresponding labels

In [8]:
# files = [ [ ch1, ch2, ch3, ch4 ], ... ]
# labels = [ label_1, label_2, ... ]

files = []
labels = []
corrupt_files = 0

for label in train_list:
    
    for dictionary in train_list[label]:
        
        for file_name in dictionary:
            
            # get list of filepaths and filesizes for all 4 channels
            sizes = []
            paths = []
            
            for file_path in dictionary[file_name]:
                if 'wav' in file_path:
                    fs, wav = wavfile.read(file_path)
                    sizes.append(len(wav))
                    paths.append(file_path)
            
            # skip sample list if channel is missing
            if len(sizes) == 4:
                files.append(paths)
                labels.append(label)
            else:
                corrupt_files += 1
                
print('Missind data:', corrupt_files)

Missind data: 2


### Shuffle and split data

In [9]:
RATIO = 0.9

size = round(RATIO * len(files))

shuff_files = []
shuff_labels = []
index_shuf = list(range(len(files)))

shuffle(index_shuf)

for i in index_shuf:
    shuff_files.append(files[i])
    shuff_labels.append(labels[i])
    
train_files = shuff_files[:size]
valid_files = shuff_files[size:]

train_labels = shuff_labels[:size]
valid_labels = shuff_labels[size:]

print('{:<25}{}'.format('Train samples', len(train_files)))
print('{:<25}{}'.format('Train labels', len(train_labels)))

print('{:<25}{}'.format('Validation samples', len(valid_files)))
print('{:<25}{}'.format('Validation labels', len(valid_labels)))

Train samples            4608
Train labels             4608
Validation samples       512
Validation labels        512


### Create train tfrecord

In [7]:
def bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [11]:
# one by one, convert spectograms and enumerated labels to bytes and write them to TF-record file

enum_labels = {'one': 0, 'two': 1}

writer = tf.python_io.TFRecordWriter(BUCKET + TFR_TRAIN)

for files, label in zip(train_files, train_labels):
    
    cropped_specs = []
    
    for file in files:

        fs, wav = wavfile.read(file)
        
        # pad if size is not equal to sampling rate = 8000
        if len(wav) < fs:
        
            pad_front = (fs - len(wav)) // 2
            pad_end = (fs - len(wav)) - pad_front

            wav = np.concatenate(
                (np.random.rand(pad_front,) * 10, wav, np.random.rand(pad_end,) * 10), axis=0)
        
        elif len(wav) > fs:
            wav = wav[:fs]
        
        cropped_specs.append(log_spectogram(wav)[:20,:] / 255)

    log_spec = np.concatenate(cropped_specs, axis=0)
    np_label = np.asarray([enum_labels[label]], dtype=np.int32)

    train_arr_raw = log_spec.tostring()
    label_label_raw = np_label.tostring()

    example = tf.train.Example(features=tf.train.Features(feature={
        'image_raw': bytes_feature(train_arr_raw),
        'label': bytes_feature(label_label_raw)}))

    writer.write(example.SerializeToString())

writer.close()

### Create validation tfrecord

In [12]:
enum_labels = {'one': 0, 'two': 1}

writer = tf.python_io.TFRecordWriter(BUCKET + TFR_VALID)

for files, label in zip(valid_files, valid_labels):
    
    cropped_specs = []
    
    for file in files:

        fs, wav = wavfile.read(file)
        
        # pad if size is not equal to sampling rate
        if len(wav) < fs:
        
            pad_front = (fs - len(wav)) // 2
            pad_end = (fs - len(wav)) - pad_front

            wav = np.concatenate(
                (np.random.rand(pad_front,) * 10, wav, np.random.rand(pad_end,) * 10), axis=0)
        
        elif len(wav) > fs:
            wav = wav[:fs]
        
        spec = log_spectogram(wav)
        cropped_specs.append(spec[:20,:] / 255)

    log_spec = np.concatenate(cropped_specs, axis=0)
    np_label = np.asarray([enum_labels[label]], dtype=np.int32)

    train_arr_raw = log_spec.tostring()
    label_label_raw = np_label.tostring()

    example = tf.train.Example(features=tf.train.Features(feature={
        'image_raw': bytes_feature(train_arr_raw),
        'label': bytes_feature(label_label_raw)}))

    writer.write(example.SerializeToString())

writer.close()

### TF-Parsers

In [8]:
def parser(serialized_example):

    features = tf.parse_single_example(
        serialized_example,
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.string)})

    image = tf.decode_raw(features['image_raw'], tf.float32)
    image.set_shape([IMG_HEIGHT * IMG_WIDTH])

    label = tf.decode_raw(features['label'], tf.int32)
    label.set_shape([1])

    return image, label

In [9]:
def test_parser(serialized_example):

    features = tf.parse_single_example(
        serialized_example,
        features={'image_raw': tf.FixedLenFeature([], tf.string),
                  'image_id': tf.FixedLenFeature([], tf.string)})

    image = tf.decode_raw(features['image_raw'], tf.float32)
    image.set_shape([IMG_HEIGHT * IMG_WIDTH])

    return image

### Input functions

In [10]:
def train_input_fn():

    # get dataset from tf_record
    dataset = tf.data.TFRecordDataset(BUCKET + TFR_TRAIN)

    # map parser over dataset samples
    dataset = dataset.map(parser)
    dataset = dataset.shuffle(1000)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.repeat(1)
    iterator = dataset.make_one_shot_iterator()

    features, labels = iterator.get_next()

    return features, labels

In [11]:
def eval_input_fn():

    # get dataset from tf_record
    dataset = tf.data.TFRecordDataset(BUCKET + TFR_TRAIN)

    # map parser over dataset samples
    dataset = dataset.map(parser)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.repeat(1)
    iterator = dataset.make_one_shot_iterator()

    features, labels = iterator.get_next()

    return features, labels

In [12]:
def valid_input_fn():

    # get dataset from tf_record
    dataset = tf.data.TFRecordDataset(BUCKET + TFR_VALID)

    # map parser over dataset samples
    dataset = dataset.map(parser)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.repeat(1)
    iterator = dataset.make_one_shot_iterator()

    features, labels = iterator.get_next()

    return features, labels

In [13]:
def predict_input_fn():

    # get dataset from tf_record
    dataset = tf.data.TFRecordDataset(BUCKET + TFR_TEST)

    # map parser over dataset samples
    dataset = dataset.map(test_parser)
    dataset = dataset.batch(1)
    dataset = dataset.repeat(1)
    iterator = dataset.make_one_shot_iterator()

    features = iterator.get_next()

    return features

### CNN model

In [14]:
def cnn_model_fn(features, labels, mode):
    
    input_layer = tf.reshape(features, [-1, IMG_HEIGHT, IMG_WIDTH], name='inputs')
    input_layer = tf.expand_dims(input_layer, axis=3)

    conv_layer_1 = tf.layers.conv2d(
        inputs=input_layer,
        filters=8,
        kernel_size=[2, 2],
        padding='same',
        activation=tf.nn.relu)

    pool_layer_1 = tf.layers.max_pooling2d(
        inputs=conv_layer_1,
        pool_size=[2, 2],
        strides=2,
        padding='same')

    conv_layer_2 = tf.layers.conv2d(
        inputs=pool_layer_1,
        filters=32,
        kernel_size=[2, 2],
        padding='same',
        activation=tf.nn.relu)

    pool_layer_2 = tf.layers.max_pooling2d(
        inputs=conv_layer_2,
        pool_size=[2, 2],
        strides=2,
        padding='same')

    reshape_layer = tf.layers.flatten(pool_layer_2)

    dense_layer = tf.layers.dense(
        inputs=reshape_layer,
        units=256,
        activation=tf.nn.relu)
    
    is_train = False

    if mode == tf.estimator.ModeKeys.TRAIN:
        is_train = True

    dropout_layer = tf.layers.dropout(
        inputs=dense_layer,
        rate=0.2,
        training=is_train)

    logits_layer = tf.layers.dense(
        inputs=dropout_layer,
        units=NUM_CLASSES)

    predictions = {
        'classes':tf.argmax(logits_layer, axis=1),
        'probabilities':tf.nn.softmax(logits_layer, axis=1)}

    serving_output = tf.estimator.export.ClassificationOutput(scores=predictions['probabilities'])
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          export_outputs={'x':serving_output})

    loss = tf.losses.sparse_softmax_cross_entropy(
        labels=labels,
        logits=logits_layer)

    accuracy = tf.metrics.accuracy(
        labels=labels,
        predictions=tf.argmax(logits_layer, axis=1),
        name='accu_op')

    if mode == tf.estimator.ModeKeys.TRAIN:
        train_optimizer = tf.train.AdamOptimizer(learning_rate=LR).minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_optimizer)

    # mode = EVAL
    eval_metric_ops = {'accuracy':accuracy}

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

### Train function

In [15]:
def train_and_evaluate(estimator, epochs=1):

    all_train_log = []
    all_valid_log = []

    for epoch in range(epochs):

        estimator.train(input_fn=train_input_fn)

        train_log = estimator.evaluate(input_fn=eval_input_fn)
        print('epoch: {} of {}'.format(epoch + 1, epochs))
        print('train: acc={:.3f}\tloss={:.3f}'.format(train_log['accuracy'], train_log['loss']))
        
        valid_log = estimator.evaluate(input_fn=valid_input_fn)
        print('valid: acc={:.3f}\tloss={:.3f}'.format(valid_log['accuracy'], valid_log['loss']))

        all_train_log.append(train_log)
        all_valid_log.append(valid_log)

    return all_train_log, all_valid_log

### Configure estimator

In [16]:
OUTDIR = BUCKET + 'output'

cnn_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir=OUTDIR)

### Hypers

In [17]:
BATCH_SIZE = 10
EPOCHS = 30
LR = 0.001

### Train

In [19]:
start = timeit.default_timer()

train_log, valid_log = train_and_evaluate(cnn_classifier, epochs=EPOCHS)

end = timeit.default_timer()

epoch: 1 of 30
train: acc=0.835	loss=0.405
valid: acc=0.861	loss=0.384
epoch: 2 of 30
train: acc=0.874	loss=0.296
valid: acc=0.883	loss=0.276
epoch: 3 of 30
train: acc=0.877	loss=0.291
valid: acc=0.887	loss=0.279
epoch: 4 of 30
train: acc=0.887	loss=0.270
valid: acc=0.893	loss=0.254
epoch: 5 of 30
train: acc=0.886	loss=0.260
valid: acc=0.887	loss=0.247
epoch: 6 of 30
train: acc=0.889	loss=0.254
valid: acc=0.898	loss=0.239
epoch: 7 of 30
train: acc=0.892	loss=0.258
valid: acc=0.895	loss=0.250
epoch: 8 of 30
train: acc=0.878	loss=0.269
valid: acc=0.875	loss=0.263
epoch: 9 of 30
train: acc=0.891	loss=0.248
valid: acc=0.893	loss=0.245
epoch: 10 of 30
train: acc=0.896	loss=0.244
valid: acc=0.893	loss=0.237
epoch: 11 of 30
train: acc=0.892	loss=0.240
valid: acc=0.889	loss=0.244
epoch: 12 of 30
train: acc=0.898	loss=0.238
valid: acc=0.891	loss=0.242
epoch: 13 of 30
train: acc=0.860	loss=0.302
valid: acc=0.861	loss=0.301
epoch: 14 of 30
train: acc=0.905	loss=0.222
valid: acc=0.902	loss=0.232
e

In [23]:
print('Time spent:', round((end - start) / 60) , 'min')

Current machine: 8 CPUs, 30 Gb memory, no GPUs
Time spent: 14 min


### Get test data

In [21]:
test_files = defaultdict(list)

for folder in os.listdir(test_path):
    for file in os.listdir(os.path.join(test_path, folder)):
        test_files[folder].append(os.path.join(test_path, folder, file))
    test_files[folder].sort()

In [22]:
test_files['6WH2MEKA']

['test_data/6WH2MEKA/ch1.wav',
 'test_data/6WH2MEKA/ch2.wav',
 'test_data/6WH2MEKA/ch3.wav',
 'test_data/6WH2MEKA/ch4.wav']

### Create test tfrecord file

In [25]:
secret_labels = []

writer = tf.python_io.TFRecordWriter(BUCKET + TFR_TEST)

for folder in test_files:
    
    cropped_specs = []
    
    for file in test_files[folder]:

        fs, wav = wavfile.read(file)
        
        # pad if size is not equal to sampling rate
        if len(wav) < fs:
        
            pad_front = (fs - len(wav)) // 2
            pad_end = (fs - len(wav)) - pad_front

            wav = np.concatenate(
                (np.random.rand(pad_front,) * 10, wav, np.random.rand(pad_end,) * 10), axis=0)
        
        elif len(wav) > fs:
            wav = wav[:fs]
        
        cropped_specs.append(log_spectogram(wav)[:20,:] / 255)

    log_spec = np.concatenate(cropped_specs, axis=0)
    secret_labels.append(folder)
    
    secret_label_raw = bytes(folder, 'utf-8')
    test_arr_raw = log_spec.tostring()

    example = tf.train.Example(features=tf.train.Features(
        feature={'image_raw': bytes_feature(test_arr_raw),
                 'image_id': bytes_feature(secret_label_raw)}))

    writer.write(example.SerializeToString())

writer.close()

### Predict

In [26]:
start = timeit.default_timer()

predict_generator = cnn_classifier.predict(input_fn=predict_input_fn)

preds = []
predict_dictlist = []

while True:
    item = next(predict_generator, None)
    if item == None:
        break
    predict_dictlist.append(item)

for i in range(len(predict_dictlist)):
    
    class_ = predict_dictlist[i]['classes']
    preds.append(class_)

end = timeit.default_timer()

print('Time spent:', round((end - start) / 60) , 'min')

Time spent: 0 min


### Write submission file to gs bucket

In [27]:
sub_filename = 'submission.csv'
submission_filepath = os.path.join(BUCKET, 'subs', sub_filename)

preds = [i + 1 for i in preds]

with file_io.FileIO(submission_filepath, mode='w') as fout:
    fout.write('filename,label\n')
    for pred, secret_label in zip(preds, secret_labels):
        fout.write('{},{}\n'.format(secret_label, pred))

#### Copy submission folder from gs bucket to local dir if needed

In [28]:
!gsutil cp -r gs://robolab/subs .

Copying gs://robolab/subs/submission.csv...
/ [1 files][ 10.8 KiB/ 10.8 KiB]                                                
Operation completed over 1 objects/10.8 KiB.                                     
