In [1]:
import tensorflow as tf
import os
import numpy as np
import scipy.io.wavfile as wavfile
import scipy.signal as sps
import IPython.display as ipd
import utility
from IPython.display import clear_output

In [2]:
print(tf.__version__)

2.1.0


In [4]:
datapath_root = 'FSDKaggle2018'

test_path = os.path.join(datapath_root, 'audio_test')
test_files = os.listdir(test_path)
N_test = len(test_files)

train_path = os.path.join(datapath_root, 'audio_train')
train_files = os.listdir(train_path)
N_train = len(train_files)

In [5]:
ipd.Audio(os.path.join(test_path, test_files[0])) # load a local WAV file

In [6]:
ipd.Audio(os.path.join(train_path, train_files[0])) # load a local WAV file

In [11]:
# Based on: http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

fs = 16e3
tfrecords_placement = 'fsd-tfrecs'
filelists = [test_files, train_files, ]
setnames =  ['test', 'train']
datapaths =  [test_path, train_path]
for filelist, setname, datapath in zip(filelists, setnames, datapaths):
    tfrecords_filename = f'{setname}.tfrec'
    tfrecords_path = os.path.join(tfrecords_placement, tfrecords_filename)

    writer = tf.io.TFRecordWriter(tfrecords_path)
    for i, f in enumerate(filelist):
        fs_raw, x = wavfile.read(os.path.join(datapath, f))
        print(f'Processing: \t {f} \t {i}/{len(filelist)}')
        id_string = f.split('.')[0]
        if len(x.shape) > 1:
            x = np.mean(x, axis=1, dtype=np.int16)
         
        n_samples = len(x)
        T = n_samples/fs_raw
        
        x = utility.pcm2float(x)
        x = sps.resample(x, int(fs*T))
        x = utility.float2pcm(x)
        
        example = tf.train.Example(features=tf.train.Features(feature={
            'sound_raw': _bytes_feature(x.tostring()),
            'id': _bytes_feature(id_string.encode()),
            }))
        writer.write(example.SerializeToString())
        #break
    writer.close()

Processing: 	 dc16466b.wav 	 0/1600
Processing: 	 32393924.wav 	 1/1600
Processing: 	 38ae005d.wav 	 2/1600
Processing: 	 18bc6c72.wav 	 3/1600
Processing: 	 aeb8716a.wav 	 4/1600
Processing: 	 65ec2612.wav 	 5/1600
Processing: 	 84f1e21c.wav 	 6/1600
Processing: 	 872be6c6.wav 	 7/1600
Processing: 	 191087dd.wav 	 8/1600
Processing: 	 75c43c78.wav 	 9/1600
Processing: 	 85f729aa.wav 	 10/1600
Processing: 	 e9b6db58.wav 	 11/1600
Processing: 	 99c9915c.wav 	 12/1600
Processing: 	 7b2f865c.wav 	 13/1600
Processing: 	 d1ee15b3.wav 	 14/1600
Processing: 	 f3dc4e70.wav 	 15/1600
Processing: 	 52028c0d.wav 	 16/1600
Processing: 	 c7b3c09e.wav 	 17/1600
Processing: 	 c9e0106a.wav 	 18/1600
Processing: 	 023eab1f.wav 	 19/1600
Processing: 	 a0c9407e.wav 	 20/1600
Processing: 	 9e4016e8.wav 	 21/1600
Processing: 	 42557556.wav 	 22/1600
Processing: 	 0cb0d029.wav 	 23/1600
Processing: 	 058e63ea.wav 	 24/1600
Processing: 	 870d8912.wav 	 25/1600
Processing: 	 5924a0f4.wav 	 26/1600
Processing:

In [8]:
fs_raw, x = wavfile.read(os.path.join(datapath, f))
ipd.Audio(x, rate=fs_raw)

In [12]:
feature_description = {
    'sound_raw': tf.io.FixedLenFeature([], tf.string, default_value=''),
    'id': tf.io.FixedLenFeature([], tf.string, default_value=''),
}

def _parse_function(example_proto):
  # Parse the input tf.Example proto using the dictionary above.
  return tf.io.parse_single_example(example_proto, feature_description)

filenames = os.path.join(tfrecords_placement, "train.tfrec")
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.shuffle(buffer_size=100)
#dataset = dataset.repeat()  # Repeat the input indefinitely.
dataset = dataset.map(_parse_function)
dataset = dataset.batch(1)

for i, b in enumerate(dataset):
    print('***')
    print(b['id'])
    for sr in b['sound_raw']:
        sound = np.frombuffer(sr.numpy(),  dtype=np.int16)
        display(ipd.Audio(sound, rate=fs))
    print(i)
    if i > 5:
        break



***
tf.Tensor([b'ed3bb289'], shape=(1,), dtype=string)


0
***
tf.Tensor([b'd433497c'], shape=(1,), dtype=string)


1
***
tf.Tensor([b'26f2aaee'], shape=(1,), dtype=string)


2
***
tf.Tensor([b'c3980cfc'], shape=(1,), dtype=string)


3
***
tf.Tensor([b'2005b905'], shape=(1,), dtype=string)


4
***
tf.Tensor([b'df75d2e7'], shape=(1,), dtype=string)


5
***
tf.Tensor([b'c8ada51c'], shape=(1,), dtype=string)


6
