In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import librosa
import numpy as np
import tensorflow as tf

In [3]:
from phaunos_ml.utils.feature_utils import AudioSegmentExtractor
from phaunos_ml.utils.audio_utils import audiofile2tfrecord
from phaunos_ml.utils.tf_serialization_utils import serialized2example, serialized2data

In [4]:
%matplotlib inline
import matplotlib.pyplot as plt

In [5]:
ROOT_PATH = './data/birdid_dataset/'

EXAMPLE_DURATION = 2
EXAMPLE_HOP_DURATION = 1

In [6]:
audio_relpath = 'birds/audio/XC370891.M.wav'
audio_filename = os.path.join(ROOT_PATH, audio_relpath)

In [7]:
audio, sr = librosa.load(audio_filename)
print(f'Audio shape: {audio.shape}')
print(f'Sample rate: {sr}')

Audio shape: (5359104,)
Sample rate: 22050


In [8]:
# Configure feature extractor
featex = AudioSegmentExtractor(sr, example_duration=EXAMPLE_DURATION, example_hop_duration=EXAMPLE_HOP_DURATION)
featex

phaunos_ml.utils.feature_utils.AudioSegmentExtractor. Config: {'sr': 22050, 'example_duration': 2, 'example_hop_duration': 1, 'dtype': <class 'numpy.float32'>}

In [9]:
# Write TFRecord
audiofile2tfrecord(
    ROOT_PATH,
    audio_relpath,
    os.path.join(ROOT_PATH, 'features'),
    featex
)

In [16]:
# Inspect TFRecord
dataset = tf.data.TFRecordDataset([os.path.join(ROOT_PATH, 'features/positive', audio_relpath.replace('.wav', '.tf'))])

In [17]:
dataset = dataset.map(lambda data: serialized2data(data, [0]))

In [18]:
dataset

<MapDataset shapes: ((None, None), (1,), (), (2,)), types: (tf.float32, tf.float32, tf.string, tf.float32)>

In [19]:
data = [d for d in dataset]

In [20]:
data[0]

(<tf.Tensor: id=1281, shape=(1, 44100), dtype=float32, numpy=
 array([[ 0.        ,  0.        ,  0.        , ..., -0.00286865,
          0.00073242,  0.0039978 ]], dtype=float32)>,
 <tf.Tensor: id=1282, shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>,
 <tf.Tensor: id=1283, shape=(), dtype=string, numpy=b'birds/audio/XC370891.M.tf'>,
 <tf.Tensor: id=1284, shape=(2,), dtype=float32, numpy=array([0.       , 1.9999547], dtype=float32)>)

In [21]:
for i in range(len(data)):
    assert np.array_equal(audio[22050*i:22050*i+44100], data[i][0].numpy()[0,:len(audio[22050*i:22050*i+44100])])