# Dataset Preparation

In [83]:
import sys
sys.path.insert(0, r"D:\\DATASCI\\TFM\\Repo\\drum-ai\\data")

import librosa
from IPython.display import Audio, Image
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

## 1. Sound Processing

In [84]:
SAMPLE_FILE = 'D:\DATASCI\TFM\Repo\drum-ai\data\\test_bank_\kick\kick1.wav'
SAMPLE_RATE = 16000
SAMPLE_LEN = 1   # Seconds
SAMPLE_SIZE = int(SAMPLE_RATE*SAMPLE_LEN)

### 1.1. Playing Audios

In [85]:
Audio(filename=SAMPLE_FILE, rate=SAMPLE_RATE)

### 1.2. Decoding Audio

In [86]:
@tf.function
def decode_wav(wav_content, n_channels=1):
    wf = tf.audio.decode_wav(wav_content,
                             desired_channels=n_channels)
    return tf.transpose(wf.audio)

In [87]:
waveform = decode_wav(tf.io.read_file(SAMPLE_FILE), n_channels=1)
waveform

<tf.Tensor: shape=(1, 16000), dtype=float32, numpy=
array([[-3.0517578e-05, -3.0517578e-05, -3.0517578e-05, ...,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00]], dtype=float32)>

### 1.3. Audio Preprocessing

#### 1.3.1. Cropping and Padding Data

In [88]:
@tf.function
def crop_and_pad(signal, size=8000):
    shape = tf.shape(signal)
    if tf.math.equal(shape[1], size):
        return signal
    if tf.math.greater(shape[1], size):
        return signal[:, :size]
    pad_size = size - shape[1]
    paddings = tf.concat(([[0, 0]], [[0, pad_size]]), axis=0)
    return tf.pad(signal, paddings, 'CONSTANT', constant_values=-1)

In [89]:
data = crop_and_pad(waveform, size=SAMPLE_LEN)
Audio(data, rate=SAMPLE_RATE)

#### 1.3.2. Fourier transform

In [124]:
@tf.function
def fourier_transform(signal, frame_length=255, frame_step=4):
    stfts = tf.signal.stft(signal, frame_length, frame_step)
    return tf.abs(stfts)

In [125]:
spectral = fourier_transform(data)
spectral

<tf.Tensor: shape=(1, 3937, 129), dtype=float32, numpy=
array([[[0.26805413, 0.15847492, 0.06617221, ..., 0.00724136,
         0.00130331, 0.00095618],
        [0.2899716 , 0.17111452, 0.05560503, ..., 0.00722019,
         0.00133457, 0.00091755],
        [0.31132305, 0.18579511, 0.04324112, ..., 0.00724979,
         0.0012907 , 0.0009445 ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ]]], dtype=float32)>