In [None]:
import tensorflow as tf
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


In [None]:

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
from os.path import isdir, join
from pathlib import Path
import pandas as pd
import tensorflow as tf
# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
import librosa

from sklearn.decomposition import PCA

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import librosa.display

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import pandas as pd

import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [None]:
import keras
print(keras.__version__)

In [None]:
!apt-get install -y p7zip-full
!7z x ../input/tensorflow-speech-recognition-challenge/train.7z


In [None]:
train_audio_path = 'train/audio/'
print(os.listdir(train_audio_path))

In [None]:
dirs = [f for f in os.listdir(train_audio_path) if isdir(join(train_audio_path, f))]
dirs.sort()
print('Number of labels: ' + str(len(dirs[1:])))
print(dirs)

In [None]:
labels=os.listdir(train_audio_path)

#find count of each label and plot bar graph
no_of_recordings=[]
for label in labels:
    waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')]
    no_of_recordings.append(len(waves))
    
#plot
plt.figure(figsize=(30,5))
index = np.arange(len(labels))
plt.bar(index, no_of_recordings)
plt.xlabel('Commands', fontsize=12)
plt.ylabel('No of recordings', fontsize=12)
plt.xticks(index, labels, fontsize=15, rotation=60)
plt.title('No. of recordings for each command')
plt.show()

labels=["up","down","left","right"]

In [None]:
#resampling to 8000Hz
all_wave = []
all_label = []
for label in labels:
    print(label)
    waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')]
    for wav in waves:
        samples, sample_rate = librosa.load(train_audio_path + '/' + label + '/' + wav, sr = 16000)
       # samples = librosa.resample(samples, sample_rate, 8000)
        if(len(samples)== 16000) : 
            all_wave.append(samples)
            all_label.append(label)


In [None]:
def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, times, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, times, np.log(spec.T.astype(np.float32) + eps)

In [None]:
raw_file=np.array(all_wave)
freqs, times, spectrogram =log_specgram(raw_file, 16000)


In [None]:
np.shape(spectrogram)

In [None]:
duration_of_recordings=[]
for label in labels:
    waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')]
    for wav in waves:
        sample_rate, samples = wavfile.read(train_audio_path + '/' + label + '/' + wav)
        duration_of_recordings.append(float(len(samples)/sample_rate))
    
plt.hist(np.array(duration_of_recordings))

In [None]:
L = 8000
def pad_audio(samples):
    if len(samples) >= L:
        return samples
    else:
        return np.pad(samples, pad_width=(L - len(samples), 0), mode='constant', constant_values=(0, 0))

In [None]:
po=pad_audio(samples)
plr=[]
plr.append(float(len(po)/sample_rate))
plt.hist(plr)

In [None]:
from __future__ import print_function, division
from builtins import range, input


from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from glob import glob

In [None]:
#changing dimensions
temp3=spectrogram.reshape((spectrogram.shape[2],spectrogram.shape[1],spectrogram.shape[0],1))
print(np.shape(temp3))



#plotting spectrogram
temp = spectrogram[:,:,2061];
fig = plt.figure(figsize=(14, 8))
ax2 = fig.add_subplot(111)
ax2.imshow(temp, aspect='auto', origin='lower')
np.shape(temp)


In [None]:
#converting 1 ch spectrogram to 3 channel
x=np.zeros((8534,161,99,3))
x[:,:,:,0]=temp3[:,:,:,0]
x[:,:,:,1]=temp3[:,:,:,0]
x[:,:,:,2]=temp3[:,:,:,0]
x.shape

In [None]:
#training initials
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y=le.fit_transform(all_label)
classes= list(le.classes_)
from keras.utils import np_utils
y=np_utils.to_categorical(y, num_classes=len(labels))

In [None]:
#SPLITTING DATA FOR TRAINING
from sklearn.model_selection import train_test_split
x_tr, x_val, y_tr, y_val = train_test_split(np.array(x),np.array(y),stratify=y,test_size = 0.2,random_state=777,shuffle=True)

In [None]:
import tensorflow.keras as K
import tensorflow as tf
from keras.regularizers import l2
input_t = K.Input(shape=(161, 99, 3))
res_model = K.applications.ResNet50(include_top=False, weights="imagenet",
                                        input_tensor=input_t)

for layer in res_model.layers[:143]:        layer.trainable = False
    # Check the freezed was done ok
for i, layer in enumerate(res_model.layers):
        print(i, layer.name, "-", layer.trainable)

to_res = (161, 99)

model = K.models.Sequential()
model.add(K.layers.Lambda(lambda image: tf.image.resize(image, to_res)))
model.add(res_model)
model.add(K.layers.Flatten())
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(256, activation='relu'))
model.add(K.layers.Dropout(0.5))
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(128, activation='relu'))
model.add(K.layers.Dropout(0.5))
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(64, activation='relu'))
model.add(K.layers.Dropout(0.5))
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(4, activation='softmax'))


#model.summary()




In [None]:
#Trainn
from keras.callbacks import EarlyStopping, ModelCheckpoint
model.compile(loss=K.losses.categorical_crossentropy,
             optimizer=K.optimizers.Adam(lr = 0.0001),
             metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=0.00001) 
mc = ModelCheckpoint('best_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
history = model.fit(x_tr, y_tr, validation_data=(x_val, y_val),
          batch_size=32, 
          epochs=20,
          callbacks=[es,mc],            
          verbose=1)

In [None]:
#Trainn

from keras.callbacks import EarlyStopping, ModelCheckpoint
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=0.00001) 
mc = ModelCheckpoint('best_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
history=model.fit(x_tr, y_tr ,epochs=20, callbacks=[es,mc],  batch_size=32, validation_data=(x_val,y_val))

In [None]:
model.summary() 

In [None]:
from matplotlib import pyplot 
pyplot.plot(history.history['loss'], label='train') 
pyplot.plot(history.history['val_loss'], label='test') 
pyplot.legend()
pyplot.show()

In [None]:
model.save_weights("weights.h5")
model.load_weights("weights.h5")

In [None]:
model.save('tfsrfyp_model.h5')

In [None]:
from keras.models import load_model
new_model = load_model('tfsrfyp_model.h5')
new_model.summary()

In [None]:
model.save_weights('./ tfsrfyp_model.pt') 

In [None]:
model.save_weights('my_model_weights.h5')

In [None]:
model.load_weights('my_model_weights.h5')

In [None]:
from tensorflow import keras
model = keras.models.load_model('./checkpoint')

In [None]:
import soundfile as sd
import matplotlib.pyplot as plt
import time
#import tensorflow.keras.backend as K
import numpy as np 
from scipy.io.wavfile import write
from scipy.io.wavfile import read
from scipy.io import wavfile
from pydub import AudioSegment

In [None]:
 pip install pyaudio 

In [None]:
pip install sounddevice

In [None]:
import sounddevice as sd
from scipy.io.wavfile import write

fs = 44100  # Sample rate
seconds = 3  # Duration of recording

myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=2)
sd.wait()  # Wait until recording is finished
write('output.wav', fs, myrecording)  # Save as WAV file 

In [None]:
import pyaudio
import wave
 
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 1
WAVE_OUTPUT_FILENAME = "file.wav"
 
audio = pyaudio.PyAudio()
 
# start Recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK)
print ("recording")
frames = []
 
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)
print ("finished recording")
 
 
# stop Recording
stream.stop_stream()
stream.close()
audio.terminate()
 
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()

In [None]:
#test data
test_audio_path = r'D:\IST\FYP\dataset\test\audio\clip_0000adecb.wav'
print(test_audio_path)
samples, sample_rate = librosa.load(test_audio_path )
samples = librosa.resample(samples, sample_rate, 8000)

In [None]:
def predict(test_audio_path):
    prob=model.predict(test_audio_path)
    index=np.argmax(prob[0])
    return classes[index]

import random
index=random.randint(0,len(x_val)-1)
samples=x_val[index].ravel()
print("Audio:",classes[np.argmax(y_val[index])])
ipd.Audio(samples, rate=8000)
print("Text:",predict(samples))

In [None]:

testwav[], testsr = librosa.load(test_audio_path)
        

In [None]:
test=model.predict(testwav)

In [None]:
import sys
print(sys.version)

In [None]:
import tensorflow as tf;
print(tf.__version__)

In [None]:
import keras; 
print(keras.__version__)