## What are TPUs?
The Tensor Processing Unit (TPU) is a custom integrated chip, designed specifically to accelerate the process of training machine learning models. 

## TPUs for free at Kaggle
**You can use up to 30 hours per week of TPUs and up to 9h at a time in a single session.**
**For more info you can visit [here](https://www.kaggle.com/docs/tpu).**

## Why do we need TFRecord format?
The TFRecord format is tensorflow's custom data format which is simple for storing a sequence of binary records. The advantages of using TFRecords are amazingly more efficient storage, fast I/O, self-contained files, etc. The main advantage of TPUs are faster I/O which results in faster model training.

For understanding the basics of TFRecords, please visit Ryan Holbrook notebook: [TFRecords Basics](https://www.kaggle.com/ryanholbrook/tfrecords-basics).

**In this notebook you will learn how to convert audio dataset into TFRecord format.**

# Useful resources which helped me:
- https://www.kaggle.com/servietsky/fast-import-audio-and-save-spectrograms/notebook
- https://www.tensorflow.org/tutorials/load_data/tfrecord
- https://www.kaggle.com/mgornergoogle/five-flowers-with-keras-and-xception-on-tpu
- https://towardsdatascience.com/a-practical-guide-to-tfrecords-584536bc786c
- https://keras.io/examples/keras_recipes/creating_tfrecords/
- https://www.kaggle.com/lqdisme/dog-breed-identification
- https://cloud.google.com/blog/products/ai-machine-learning/what-makes-tpus-fine-tuned-for-deep-learning
- https://pub.towardsai.net/writing-tfrecord-files-the-right-way-7c3cee3d7b12
- https://www.kaggle.com/daisukelab/creating-fat2019-preprocessed-data

# Imports

In [None]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
import numpy as np 
import pandas as pd
import os, random
import librosa
import matplotlib.pyplot as plt
import gc
import time
from tqdm import tqdm, tqdm_notebook; tqdm.pandas() # Progress bar
import math

from tensorflow.keras.utils import to_categorical
seed = 1234
np.random.seed(seed)

t_start = time.time()

import warnings

def fxn():
    warnings.warn("deprecated", DeprecationWarning)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()
warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Paramaters
taking only 10 samples from each folder to save time 

In [None]:
# Preprocessing parameters (it can be changed according to convenience)
SEED = 42
FRAC = 0.2     # Validation fraction
SR = 44100     # sampling rate
MAXLEN= 60    # seconds
N_MELS = 128

def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
seed_everything(SEED)

# Data Processing and encoding labels

In [None]:
ROOT = "/kaggle/input/birdsong-recognition/"

# Read train
df = pd.read_csv(os.path.join(ROOT, 'train.csv'))[['ebird_code', 'filename', 'duration']]
df['path'] = ROOT+'train_audio/' + df['ebird_code'] + "/" + df['filename']

classes = set(random.sample(df['ebird_code'].unique().tolist(), 15)) 


df = df[df.ebird_code.apply(lambda x: x in classes)].reset_index(drop=True)
keys = set(df.ebird_code)
values = np.arange(0, len(keys))
code_dict = dict(zip(sorted(keys), values))
df['label'] = df['ebird_code'].apply(lambda x: code_dict[x])

# Preprocessing functions


In [None]:
samples = SR* 5
def read_audio(path):
    '''
    Reads in the audio file and returns
    an array that we can turn into a melspectogram
    '''
    y, _ = librosa.core.load(path, sr=44100)
    # trim silence
    if 0 < len(y): # workaround: 0 length causes error
        y, _ = librosa.effects.trim(y)
    if len(y) > samples: # long enough
        y = y[0:0+samples]
    else: # pad blank
        padding = samples - len(y)
        offset = padding // 2
        y = np.pad(y, (offset, samples - len(y) - offset), 'constant')
    return y

def audio_to_melspectrogram(audio):
    '''
    Convert to melspectrogram after audio is read in
    '''
    spectrogram = librosa.feature.melspectrogram(audio, 
                                                 sr=SR,
                                                 n_mels=N_MELS,
                                                 hop_length=347,
                                                 n_fft=N_MELS,
                                                 fmin=20,
                                                 fmax=SR//2)
    return librosa.power_to_db(spectrogram).astype(np.float32)

def read_as_melspectrogram(path):
    '''
    Convert audio into a melspectrogram 
    so we can use machine learning
    '''
    mels = audio_to_melspectrogram(read_audio(path))
    return mels

def convert_wav_to_image(df, path):
    X = []
    for _,row in tqdm_notebook(df.iterrows()):
        if row['filename'] != 'XC195038.mp3' :
            x = read_as_melspectrogram('{}/{}/{}'.format(path[0],str(row['ebird_code']) ,str(row['filename'])))
            X.append(x.transpose())
    return X

def normalize(img):
    '''
    Normalizes an array 
    (subtract mean and divide by standard deviation)
    '''
    eps = 0.001
    if np.std(img) != 0:
        img = (img - np.mean(img)) / np.std(img)
    else:
        img = (img - np.mean(img)) / eps
    return img

def normalize_dataset(X):
    '''
    Normalizes list of arrays
    (subtract mean and divide by standard deviation)
    '''
    normalized_dataset = []
    for img in X:
        normalized = normalize(img)
        normalized_dataset.append(normalized)
    return normalized_dataset

In [None]:
X = np.array(convert_wav_to_image(df, ['../input/birdsong-recognition/train_audio']))
normalized_x = normalize_dataset(X)
reshape_x = np.array(normalized_x)
reshape_x = np.reshape(reshape_x, (-1,636,128,1))

# One-hot encoding the labels

In [None]:
y = df['label'].values
labels = to_categorical(y, num_classes=15)

# Visualize the spectrograms

In [None]:
plt.figure(figsize=(15,10))
plt.imshow(X[0]);
print(y[0])

# Divide into train and test

In [None]:
X_train = reshape_x[:-333]
X_test = reshape_x[:333]
y_train = labels[:-333]
y_test = labels[:333]

In [None]:
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

# Funtions for feature creation
The following functions can be used to convert a value to a type compatible which takes a scalar input values and returns a tf.train.Feature.

In [None]:
def _bytes_feature(value):
    if isinstance(value, type(tf.constant(0))): # if value ist tensor
        value = value.numpy() # get value of tensor
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_array(array):
    array = tf.io.serialize_tensor(array)
    return array

# Writing and Converting to TFRecord
Now, we'll create a dictionary to store the actual image, height, width and depth of the image and the label where we first serialize the array and then convert it to a bytes_feature. All these key:value mappings make up the features for one Example.

In [None]:
def parse_single_image(image, label=None):
    if label is None:
            data = {
                'height' : _int64_feature(image.shape[0]),
                'width' : _int64_feature(image.shape[1]),
                'depth' : _int64_feature(image.shape[2]),
                'raw_image' : _bytes_feature(serialize_array(image))
    }
    else:
        data = {
                'height' : _int64_feature(image.shape[0]),
                'width' : _int64_feature(image.shape[1]),
                'depth' : _int64_feature(image.shape[2]),
                'raw_image' : _bytes_feature(serialize_array(image)),
                'label' : _bytes_feature(serialize_array(label))
        }
    out = tf.train.Example(features=tf.train.Features(feature=data))

    return out

In [None]:
import tqdm
def write_images_to_tfr(images, filename, labels=None):
    max_files=500
    out_dir="./"
    splits = (len(images)//max_files) + 1 #determine how many shards are needed
    if len(images)%max_files == 0:
        splits-=1
    print(f"\nUsing {splits} shard(s) for {len(images)} files, with up to {max_files} samples per shard")
    
    file_count = 0

    for i in tqdm.tqdm(range(splits)):
        current_shard_name = "{}{}_{}{}.tfrecords".format(out_dir, i+1, splits, filename)
        writer = tf.io.TFRecordWriter(current_shard_name)

        current_shard_count = 0
        while current_shard_count < max_files: 
            index = i*max_files+current_shard_count
            if index == len(images): 
                break
            if labels is None:  
                current_image = images[index]
                out = parse_single_image(image=current_image)

            else:
                current_image = images[index]
                current_label = labels[index]
                out = parse_single_image(image=current_image, label=current_label)
    
            writer.write(out.SerializeToString())
            current_shard_count+=1
            file_count += 1

        writer.close()
    print(f"\nWrote {file_count} elements to TFRecord")
    return file_count

In [None]:
write_images_to_tfr(X_train, "train_tfrecord", y_train)

In [None]:
write_images_to_tfr(X_test, "test_tfrecord", y_test)