In [17]:
# from mobile_net import get_model
import librosa
import tensorflow as tf
import glob
import numpy as np
import random
import wave
import pickle

In [18]:
gpus = tf.config.list_physical_devices('GPU')
gpu = gpus[0]
tf.config.experimental.set_memory_growth(gpu, True)
tf.config.list_logical_devices('GPU')

[LogicalDevice(name='/device:GPU:0', device_type='GPU')]

In [19]:
noise_path = '/shareddrive/working/data_code/data/neg_data/_background_noise_/chunks/1s_chunks/*'
silence_path = '/shareddrive/working/data_code/data/neg_data/silence/chunks/1s_chunks/*'
word_path = '/shareddrive/working/data_code/data/neg_data/spcmd/all_words/original/*'

In [20]:
word_files = glob.glob(word_path)
silence_files = glob.glob(silence_path)
noise_files = glob.glob(noise_path)

In [21]:
def get_duration(audio_path):
    with wave.open(audio_path, 'rb') as wav_file:
        sample_rate = wav_file.getframerate()
        num_frames = wav_file.getnframes()
        duration = num_frames / sample_rate
    return duration

In [22]:
limit = 1.5
adele_path = '/shareddrive/working/data_code/data/adele/augmented/*'
adele_folders = glob.glob(adele_path)
adele_files = [j for i in adele_folders for j in glob.glob(f'{i}/*')]
# adele_files = [j for i in adele_folders for j in glob.glob(f'{i}/*') if get_duration(j) <= limit]
hilfe_path = '/shareddrive/working/data_code/data/hilfe/augmented/*'
hilfe_folders = glob.glob(hilfe_path)
hilfe_files = [j for i in hilfe_folders for j in glob.glob(f'{i}/*')]
# hilfe_files = [j for i in hilfe_folders for j in glob.glob(f'{i}/*') if get_duration(j) <= limit]

In [23]:
len(adele_files),len(hilfe_files)

(10950, 10935)

In [24]:
file_path_and_labels = list()
file_path_and_labels.extend([(i,1) for i in adele_files])
file_path_and_labels.extend([(i,2) for i in hilfe_files])
avg_files = (len(adele_files) + len(hilfe_files)) // 2
file_path_and_labels.extend([(i,0) for i in random.sample(word_files,avg_files)])
file_path_and_labels.extend([(i,0) for i in noise_files])
# file_path_and_labels.extend([(i,0) for i in silence_files])
random.shuffle(file_path_and_labels)

In [25]:
sr = 16000
max_seconds = 1
pad_or_trunc = lambda a,i : a[0:i] if len(a) > i else a if len(a) == i else np.pad(a,(0, (i-len(a))))

def process_data(y,sr,max_seconds):
    y = pad_or_trunc(y,sr*max_seconds)
    features = librosa.feature.melspectrogram(y=y,sr=sr,n_fft=1024)
    return features
    
def get_processed_data(audio_file):
    y,_ = librosa.load(audio_file,sr=sr)
    features = process_data(y,sr,max_seconds)
    return features
try:
    with open('f_and_l.pickle','rb') as f:
        features_and_labels = pickle.load(f)
except:
    features_and_labels = [(get_processed_data(i),j) for i,j in file_path_and_labels]
    with open('f_and_l.pickle','wb') as f:
        pickle.dump(features_and_labels,f)
finally:
    print(features_and_labels[0][0].shape)

(128, 32)


In [26]:


class DataSequenceRaw(tf.keras.utils.Sequence):

    def __init__(self,data,batch_size):
        self.data_size = len(data)
        audios,labels = zip(*data)
        self.X = np.array(audios)
        self.Y = np.array(labels)
        self.batch_size = batch_size

        
    def __len__(self):
        return int(np.ceil(self.data_size / self.batch_size))

    def __getitem__(self,idx):
        s = idx * self.batch_size
        e = (idx + 1) * self.batch_size
        X = self.X[s:e]
        Y = self.Y[s:e]
        return X,Y


In [27]:
def _get_train_val_size(total_examples,train_percent=70):
    
    train_ratio = round(train_percent)/100
    remaining_percent = 100 - train_percent
    test_percent = round(remaining_percent/3)*2
    val_percent = remaining_percent - test_percent
    val_ratio = val_percent/100
    train_size = round(total_examples * train_ratio)
    val_size = round(total_examples * val_ratio)
    return train_size,val_size

def get_data_raw(data,train_percent=70,batch_size=32):

    train_size,val_size = _get_train_val_size(len(data),train_percent)

    train_examples = data[:train_size]
    val_examples = data[train_size:train_size+val_size]
    test_examples = data[train_size+val_size:]

    train = DataSequenceRaw(train_examples,batch_size=batch_size)
    test = DataSequenceRaw(test_examples,batch_size=1)
    val = DataSequenceRaw(val_examples,batch_size=batch_size)

    return train,test,val

train,test,val = get_data_raw(features_and_labels,train_percent=80)

In [28]:
shape = train[0][0][0].shape
input_shape = [*shape,1]

In [29]:
def get_model(
        input_shape,
        output_neurons=1,
        output_activation='sigmoid',
        loss=tf.keras.losses.binary_crossentropy,
        lr=0.0001
):
    _input = tf.keras.layers.Input(shape=input_shape)
    # x = tf.keras.layers.Conv1D(200,kernel_size=3,padding='valid',activation='relu')(_input)
    # x = tf.keras.layers.Dropout(0.5)(x)
    # x = tf.keras.layers.GRU(150,return_sequences=False)(x)
    # x = tf.keras.layers.Flatten()(x)
    # x = tf.keras.layers.Dense(1024,activation='relu')(x)
    # x = tf.keras.layers.Dropout(0.5)(x)
    # x = tf.keras.layers.Dense(512,activation='relu')(x)
    # x = tf.keras.layers.Dense(64,activation='relu')(x)
    # x = tf.keras.layers.Dropout(0.5)(x)
    # x = tf.keras.layers.Dense(32,activation='relu')(x)
    # x = tf.keras.layers.Dropout(0.5)(x)
    # x = tf.keras.layers.Dense(10,activation='relu')(x)
    x = tf.keras.layers.Conv2D(512,kernel_size=3,padding='valid',activation='relu')(_input)
    x = tf.keras.layers.Conv2D(256,kernel_size=3,padding='valid',activation='relu')(x)
    x = tf.keras.layers.Conv2D(128,kernel_size=3,padding='valid',activation='relu')(x)
    x = tf.keras.layers.Conv2D(128,kernel_size=3,padding='valid',activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Conv2D(64,kernel_size=3,padding='valid',activation='relu')(x)
    x = tf.keras.layers.Conv2D(32,kernel_size=3,padding='valid',activation='relu')(x)
    x = tf.keras.layers.Conv2D(16,kernel_size=3,padding='valid',activation='relu')(x)
    x = tf.keras.layers.Conv2D(16,kernel_size=3,padding='valid',activation='relu')(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(1024,activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(512,activation='relu')(x)
    x = tf.keras.layers.Dense(64,activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(32,activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(10,activation='relu')(x)
    outputs = tf.keras.layers.Dense(output_neurons,activation=output_activation,kernel_regularizer=tf.keras.regularizers.L2(l2=0.01))(x)
    model = tf.keras.Model(inputs=_input,outputs=outputs)

    model.compile(
        loss=loss,
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        metrics=['accuracy'],
    )

    return model

model = get_model(
        input_shape=input_shape,
        output_neurons=3,
        output_activation='softmax',
        loss=tf.keras.losses.sparse_categorical_crossentropy,
        lr=0.001
)



In [30]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',factor=0.1,patience=5,mode='max')
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=1,mode='max',restore_best_weights=True,start_from_epoch=10)
with tf.device('/gpu'):
    history = model.fit(train,epochs=100,validation_data=val,verbose=1,callbacks=[reduce_lr,early_stopping])

Epoch 1/100
[1m837/838[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 32ms/step - accuracy: 0.5649 - loss: 1.0464






[1m838/838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 38ms/step - accuracy: 0.5653 - loss: 1.0457 - val_accuracy: 0.8955 - val_loss: 0.3371 - learning_rate: 0.0010
Epoch 2/100
[1m838/838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 26ms/step - accuracy: 0.8967 - loss: 0.3564 - val_accuracy: 0.9532 - val_loss: 0.1746 - learning_rate: 0.0010
Epoch 3/100
[1m838/838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 26ms/step - accuracy: 0.9470 - loss: 0.1996 - val_accuracy: 0.9567 - val_loss: 0.1577 - learning_rate: 0.0010
Epoch 4/100
[1m838/838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 26ms/step - accuracy: 0.9615 - loss: 0.1525 - val_accuracy: 0.9761 - val_loss: 0.0958 - learning_rate: 0.0010
Epoch 5/100
[1m838/838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 26ms/step - accuracy: 0.9713 - loss: 0.1152 - val_accuracy: 0.9816 - val_loss: 0.0641 - learning_rate: 0.0010
Epoch 6/100
[1m838/838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [31]:
model_path = f'/shareddrive/working/model_code/models/mobile_net/trail_1/2/16k_1s_melspec-nfft-1024_a_h_cnn_dense_model.keras'
model.save(model_path)

In [32]:
model.evaluate(test)

[1m4688/4688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - accuracy: 0.9823 - loss: 0.0987


[0.1046655997633934, 0.9801621437072754]