In [1]:
import librosa as li
import numpy as np
import IPython.display as ipd
import os
from pydub import AudioSegment as aud
import tensorflow as tf

In [2]:
def get_features(path, sr=44100):
    y = li.load(path, sr=sr)[0]
    s = li.feature.melspectrogram(y=y, sr=sr)
    log_s = li.power_to_db(s)
    features = li.feature.mfcc(y=y, sr=sr, S=log_s, n_mfcc=40)
    features = np.mean(features, axis=1)
    return features

In [3]:
def stretch(y):
    faster = li.effects.time_stretch(y, 1.1)
    slower = li.effects.time_stretch(y, 0.9)
    return [slower, faster]

In [4]:
def pitch_shift(y, sr):
    y1 = li.effects.pitch_shift(y, sr, n_steps=-1)
    y2 = li.effects.pitch_shift(y, sr, n_steps=1)
    return [y1, y2]

In [5]:
def add_noise(y):
    wn = np.random.randn(len(y))
    y_wn = y + 0.0025*wn
    return [y_wn]

In [6]:
def augment_data(y, sr):
    data = [y,]
    data += pitch_shift(y, sr)
    new = []
    for i in data:
        new += stretch(i)
    data += new
    new = []
    for i in data:
        new += add_noise(i)
    data += new
    return data

In [7]:
def get_data(paths):
    data = []
    for path in paths:
        y, sr = li.load(path, sr=22050)
        data += augment_data(y, sr)
    return data

In [8]:
def get_paths(path):
    paths = [path + '/' + i for i in os.listdir(path)]
    paths.sort()
    return paths[1:]

In [9]:
def get_labeled_data(path):
    label = [path.split('/')[-1]]
    data = []
    paths = li.util.find_files(path)
    for i in paths:
        data.append(get_features(i))
    return (data, label * len(paths))

In [23]:
data, labels = get_labeled_data('/Users/dmitry/Documents/Work/ML/data/1')

In [None]:
data2, labels2 = get_labeled_data('/Users/dmitry/Documents/Work/ML/data/0')

In [142]:
len(data2)

4817

In [143]:
d = data[:2000] + data2[:4000]
l = labels[:2000] + labels2[:4000]
vd = data[2000:] + data2[4000:]
vl = labels[2000:] + labels2[4000:]
print(len(d), len(l))
print(len(vd), len(vl))

6000 6000
1336 1336


In [160]:
ll = list(map(lambda i: float(i), vl))
ll = np.reshape(ll, (len(vl), 1))
ll

array([[1.],
       [1.],
       [1.],
       ...,
       [0.],
       [0.],
       [0.]])

In [157]:
dataset = tf.data.Dataset.from_tensor_slices((d, ll))

In [161]:
len(ll)

1336

In [158]:
dataset.element_spec

(TensorSpec(shape=(40,), dtype=tf.float32, name=None),
 TensorSpec(shape=(1,), dtype=tf.float64, name=None))

In [162]:
dataset = dataset.repeat().shuffle(6000).batch(1500)

In [163]:
dataset

<BatchDataset shapes: ((None, 40), (None, 1)), types: (tf.float32, tf.float64)>

In [164]:
val_dataset = tf.data.Dataset.from_tensor_slices((vd, ll))

In [169]:
val_dataset.element_spec

(TensorSpec(shape=(40,), dtype=tf.float32, name=None),
 TensorSpec(shape=(1,), dtype=tf.float64, name=None))

In [172]:
val_dataset

<BatchDataset shapes: ((None, 40), (None, 1)), types: (tf.float32, tf.float64)>

In [171]:
val_dataset = val_dataset.shuffle(1336).batch(128)

In [223]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(40, activation='relu', input_shape=(40,)),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(20, activation='relu'),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax'),
])

In [224]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [225]:
history = model.fit(dataset, epochs=10, steps_per_epoch=tf.math.ceil(6000/1500), 
                    validation_data=val_dataset,
                    validation_steps=1336/128)

Train for 4.0 steps, validate for 10.4375 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [232]:
model.save('Feature_v4.h5')

In [236]:
test = get_features('/Users/dmitry/Documents/Work/ML/data/0/1.wav')



In [237]:
test = np.reshape(test, (1,40))

In [238]:
model.predict(test)

array([[9.9996805e-01, 3.1998836e-05]], dtype=float32)