In [26]:
#path
import os
from os.path import isdir, join
from pathlib import Path

# Scientific Math 
import numpy as np
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import tensorflow as tf
import plotly.offline as py
import plotly.graph_objs as go

#Deep learning
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras import Input, layers
from tensorflow.keras import backend as K
import tensorflow as tf

import random
import copy
import librosa


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline

In [2]:
pwd

'C:\\Users\\Michal\\Desktop\\semestr2\\MGU\\project3\\notebooks'

## Load data from scratch

In [4]:
dirs = ['../data/test/test/audio']

all_wavs = []
all_names = []
for direct in dirs:
    waves = [f for f in os.listdir(direct) if f.endswith('.wav')]
    
    for i, wav in enumerate(waves):
        if i%100 == 0:
            print(i/len(waves))
        samples, sample_rate = librosa.load(join(join(direct), wav), sr = 16000)
        samples = librosa.resample(samples, sample_rate, 8000)
        all_wavs.append([wav, samples])    

0.0
0.0006307636024170862
0.0012615272048341723
0.0018922908072512583
0.0025230544096683447
0.003153818012085431
0.0037845816145025165
0.004415345216919603
0.005046108819336689
0.0056768724217537755
0.006307636024170862
0.006938399626587948
0.007569163229005033
0.00819992683142212
0.008830690433839206
0.009461454036256292
0.010092217638673379
0.010722981241090465
0.011353744843507551
0.011984508445924637
0.012615272048341723
0.01324603565075881
0.013876799253175896
0.01450756285559298
0.015138326458010066
0.015769090060427152
0.01639985366284424
0.017030617265261325
0.017661380867678413
0.018292144470095497
0.018922908072512585
0.01955367167492967
0.020184435277346757
0.02081519887976384
0.02144596248218093
0.022076726084598014
0.022707489687015102
0.023338253289432186
0.023969016891849274
0.02459978049426636
0.025230544096683447
0.02586130769910053
0.02649207130151762
0.027122834903934703
0.02775359850635179
0.028384362108768876
0.02901512571118596
0.029645889313603048
0.0302766529160

split wav, label

In [6]:
print(len(all_wavs))

158538


In [7]:
np.save("../data/test/all_wavs.npy", all_wavs)

## Load data fast

In [3]:
saved = np.load("../data/test/all_wavs.npy", allow_pickle=True)
saved.shape

(158538, 2)

In [4]:
saved = saved.tolist()

In [7]:
wavs_names = []
wavs = []
for i in range(len(saved)):
    wavs_names.append(saved[i][0])
    wavs.append(saved[i][1])

In [10]:
wavs = np.asarray(wavs)
wavs.shape

(158538, 8000)

## Load model

In [13]:
model = tf.keras.models.load_model('../models/rec/model-098-0.842355.h5')

In [12]:
wavs = np.expand_dims(wavs, axis=2)
wavs.shape

(158538, 8000, 1)

In [15]:
predictions = model.predict(wavs, batch_size=1024)

In [17]:
predictions.shape

(158538, 12)

## Predictions to submission file

In [18]:
label_dict = {'yes': 0,
             'no': 1,
             'up': 2,
             'down': 3,
             'left': 4,
             'right': 5,
             'on': 6,
             'off': 7,
             'stop': 8,
             'go': 9,
             'unknown': 10,
             'silence': 11}

In [22]:
num_to_label = {value: key for key, value in label_dict.items()}
num_to_label

{0: 'yes',
 1: 'no',
 2: 'up',
 3: 'down',
 4: 'left',
 5: 'right',
 6: 'on',
 7: 'off',
 8: 'stop',
 9: 'go',
 10: 'unknown',
 11: 'silence'}

In [24]:
predicted_labels = [num_to_label[num] for num in predictions.argmax(1)]

['no',
 'unknown',
 'unknown',
 'silence',
 'unknown',
 'on',
 'unknown',
 'unknown',
 'go',
 'unknown',
 'off',
 'unknown',
 'unknown',
 'yes',
 'no',
 'stop',
 'right',
 'down',
 'go',
 'stop',
 'unknown',
 'unknown',
 'right',
 'unknown',
 'unknown',
 'yes',
 'unknown',
 'on',
 'off',
 'down',
 'unknown',
 'unknown',
 'unknown',
 'stop',
 'down',
 'unknown',
 'go',
 'yes',
 'silence',
 'right',
 'off',
 'down',
 'unknown',
 'off',
 'unknown',
 'off',
 'yes',
 'no',
 'unknown',
 'off',
 'unknown',
 'unknown',
 'stop',
 'go',
 'on',
 'unknown',
 'on',
 'unknown',
 'left',
 'unknown',
 'unknown',
 'stop',
 'unknown',
 'up',
 'unknown',
 'off',
 'yes',
 'left',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'off',
 'unknown',
 'silence',
 'unknown',
 'no',
 'unknown',
 'off',
 'unknown',
 'no',
 'unknown',
 'unknown',
 'unknown',
 'go',
 'unknown',
 'down',
 'no',
 'off',
 'silence',
 'on',
 'left',
 'unknown',
 'down',
 'on',
 'unknown',
 'go',
 'unknown',
 'unknown',
 'on',
 'silenc

In [28]:
submission_file = pd.read_csv(os.path.join("../data/test", "sample_submission.csv"))
submission_file.head()

Unnamed: 0,fname,label
0,clip_000044442.wav,silence
1,clip_0000adecb.wav,silence
2,clip_0000d4322.wav,silence
3,clip_0000fb6fe.wav,silence
4,clip_0001d1559.wav,silence


## Check if order is allright

In [32]:
submission_file.fname.values.tolist() == wavs_names

True

In [33]:
submission_file["label"] = predicted_labels

In [34]:
submission_file.to_csv(os.path.join("../submissions", "submission_lstm.csv"), index=False)