# Introduction

I wanted to take some of the exploratory data analysis (EDA) done in the TensorFlow **[Simple audio recognition: Recognizing keywords](https://www.tensorflow.org/tutorials/audio/simple_audio#top_of_page)** tutorial and apply it to the **[TFRecords](https://www.kaggle.com/ryanholbrook/tfrecords-basics)** that are available in this competition.

Please feel free to copy and edit this notebook and use it in your own analyses. And be sure to tag me if you extend this notebook and build your model using **[Tensor Processing Units (TPUs)](https://www.kaggle.com/docs/tpu)** - I'd love to see what you create!

# Set up environment

In [None]:
# set up environment
import math, re, os

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt  

from IPython import display
from kaggle_datasets import KaggleDatasets

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
GCS_PATH = KaggleDatasets().get_gcs_path()
BATCH_SIZE = 16

TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/tfrecords/train/*.tfrec')
TEST_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/tfrecords/test/*.tfrec')

# Dataset functions

In [None]:
def decode_audio(audio_binary):
    audio, _ = tf.audio.decode_wav(audio_binary)
    return tf.squeeze(audio, axis=-1)

def string_split_semicolon(column):
    split_labels_sc = tf.strings.split(column, sep=';')
    return split_labels_sc

def string_split_comma(column):
    split_labels_c = tf.strings.split(column, sep=',')
    return split_labels_c

def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "audio_wav"    : tf.io.FixedLenFeature([], tf.string), 
        "label_info"   : tf.io.FixedLenFeature([], tf.string) 
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    audio = decode_audio(example['audio_wav'])    
    first_split = string_split_semicolon(example['label_info'])
    remove_quotes = tf.strings.regex_replace(first_split, '"', "") 
    second_split = string_split_comma(remove_quotes)  
    species_id = tf.gather_nd(second_split, [0, 0])  
    return audio, species_id 

def read_unlabeled_tfrecord(example):
    UNLABELED_TFREC_FORMAT = {
        "recording_id" : tf.io.FixedLenFeature([], tf.string),      
        "audio_wav"    : tf.io.FixedLenFeature([], tf.string) 
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    audio = decode_audio(example['audio_wav'])
    idnum = example['recording_id']
    return audio, idnum 

def load_dataset(filenames, labeled=True, ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False  
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE) 
    dataset = dataset.with_options(ignore_order)  
    dataset = dataset.map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord, num_parallel_calls=AUTOTUNE)
    return dataset 

def get_training_dataset():
    dataset = load_dataset(TRAINING_FILENAMES, labeled=True)
    dataset = dataset.repeat()  
    dataset = dataset.shuffle(128)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE) 
    return dataset

def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

In [None]:
NUM_TRAINING_FILES = count_data_items(TRAINING_FILENAMES)
NUM_TEST_FILES = count_data_items(TEST_FILENAMES)

print('Dataset: {} training files, {} unlabeled test files'.format(NUM_TRAINING_FILES, NUM_TEST_FILES))

# Data structures

In [None]:
for audio, species_id in get_training_dataset().take(3):
    print(audio.numpy().shape, species_id.numpy().shape)

for audio, species_id in get_training_dataset().take(1):
    print("\naudio examples:", audio.numpy())
    print("species_id examples:", species_id.numpy())

# Graphing waveforms

In [None]:
training_ds = load_dataset(TRAINING_FILENAMES, labeled=True)
test_ds = load_dataset(TEST_FILENAMES, labeled=False)

In [None]:
# training dataset

rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 12))

for i, (audio, species_id) in enumerate(training_ds.take(n)):
    r = i // cols
    c = i % cols
    ax = axes[r][c]
    ax.plot(audio.numpy())
    ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
    label = species_id.numpy().decode('utf-8')
    ax.set_title(label)

plt.show()

In [None]:
# test dataset

rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 12))

for i, (audio, recording_id) in enumerate(test_ds.take(n)):
    r = i // cols
    c = i % cols
    ax = axes[r][c]
    ax.plot(audio.numpy())
    ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
    label = recording_id.numpy().decode('utf-8')
    ax.set_title(label)

plt.show()

# Waveform shape, spectogram shape, and audio playback

In [None]:
def get_spectrogram(audio):
    waveform = tf.cast(audio, tf.float32)
    spectrogram = tf.signal.stft(waveform, frame_length=255, frame_step=128)
    spectrogram = tf.abs(spectrogram)
    return spectrogram

In [None]:
# training data 

for waveform, label in training_ds.take(1):
    label = label.numpy().decode('utf-8')
    spectrogram = get_spectrogram(waveform)

print('Label:', label)
print('Waveform shape:', waveform.shape)
print('Spectrogram shape:', spectrogram.shape)
print('Audio playback')
display.display(display.Audio(waveform, rate=16000))

In [None]:
# test data

for waveform, recording_id in test_ds.take(1):
    recording_id = recording_id.numpy().decode('utf-8')
    spectrogram = get_spectrogram(waveform)

print('Recording ID:', recording_id)
print('Waveform shape:', waveform.shape)
print('Spectrogram shape:', spectrogram.shape)
print('Audio playback')
display.display(display.Audio(waveform, rate=16000))

# Visualizing spectrograms

In [None]:
def plot_spectrogram(spectrogram, ax):
    log_spec = np.log(spectrogram.T)
    height = log_spec.shape[0]
    X = np.arange(2880000, step=128)  
    Y = range(height)  
    ax.pcolormesh(X, Y, log_spec)

In [None]:
# training data

fig, axes = plt.subplots(2, figsize=(12, 8))
timescale = np.arange(waveform.shape[0])
axes[0].plot(timescale, waveform.numpy())
axes[0].set_title('Waveform')
axes[0].set_xlim([0, 2880000])
plot_spectrogram(spectrogram.numpy(), axes[1])  
axes[1].set_title('Spectrogram')
plt.show()

In [None]:
def get_spectrogram_and_label_id(audio, label):
    spectrogram = get_spectrogram(audio)
    label_id = label
    return spectrogram, label_id

spectrogram_ds = training_ds.map(get_spectrogram_and_label_id)

In [None]:
rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 10))
for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):
    r = i // cols
    c = i % cols
    ax = axes[r][c]
    plot_spectrogram(np.squeeze(spectrogram.numpy()), ax)
    label = label_id.numpy().decode('utf-8')
    ax.set_title(label)

plt.show()