In [92]:
import tensorflow as tf
import librosa
import numpy as np
import IPython.display as ipd
import os
import pandas as pd
import pyaudio
import wave

## Loading samples

In [57]:
X = pd.read_csv(r"data\\samples.csv", compression='zip', index_col=0)
Y = pd.read_csv(r"data\\samples_y.csv", compression='zip', index_col=0)

In [114]:
X_val = X[:120]
X_train = X[120:]
Y_val = Y[:120]
Y_train = Y[120:]

In [118]:
X_train.shape

(3665, 2580)

## Preparing dataset

In [69]:
dataset = tf.data.Dataset.from_tensor_slices((X.values, Y.values))

In [70]:
shuffled_dataset = dataset.repeat().shuffle(len(X)).batch(128)

In [87]:
val_dataset = shuffled_dataset.take(700) 
train_dataset = shuffled_dataset.skip(700)

In [183]:
val_dataset = tf.data.Dataset.from_tensor_slices((X_val.values, Y_val.values)).repeat().shuffle(len(X_val)).batch(32)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train.values, Y_train.values)).repeat().shuffle(len(X_train)).batch(256)

## Preparing model

In [208]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(20, activation='relu', input_shape=(X.shape[1],)),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(20, activation='relu'),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax'),
])

In [209]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

## Train model

In [210]:
model.fit(train_dataset, epochs=10, validation_data=val_dataset, steps_per_epoch=15, validation_steps=4)

Train for 15 steps, validate for 4 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x27fdca790c8>

## Obtaining answer

#### Recording sample

In [319]:
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 1.5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

* recording
* done recording


#### Sample processing

In [320]:
x, sr = librosa.load(WAVE_OUTPUT_FILENAME)
mfcc = librosa.feature.mfcc(y=x, sr=sr, hop_length=len(x)//128, n_mfcc=20)
mfcc = mfcc.flatten()
print(mfcc.shape)

ipd.Audio(WAVE_OUTPUT_FILENAME)

(2580,)


#### Model testing

In [321]:
# model.predict(X.values[2200].reshape(1, X.shape[1]))
model.predict(mfcc.reshape(1, mfcc.shape[0]))

array([[0.88833755, 0.11166241]], dtype=float32)

#### Saving model

In [322]:
model.save('models\\128_hop_20_nmfcc(almost_almost_success).h5')