In [1]:
import pandas as pd
import os
import numpy as np
import librosa 
import librosa.display
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D, Flatten
from tensorflow.keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
dftrain = pd.read_csv('traintwd.csv')

In [20]:
def process_audiofiles(path):
    data = np.zeros((145,1025,86))
    hop_length = 512 # in num. of samples
    n_fft = 2048
    audionames = os.listdir(path)
    for audio in audionames[1:]: 
        file = os.path.join(path, audio)
        signal, sample_rate = librosa.load(file, duration = 2.00) 
        stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)
        spectrogram = np.abs(stft)
        log_spectrogram = librosa.amplitude_to_db(spectrogram)
        MFCCs = librosa.feature.mfcc(signal, sample_rate, n_fft=n_fft, hop_length=hop_length, n_mfcc=40)
        
        s = librosa.util.fix_length(spectrogram,86)
        data[audionames.index(audio)] = s  
    return data
X = process_audiofiles("TWDAUDIOS")[:144]

In [6]:
def cohe(df,path):
    ids=[]
    for d in os.listdir(path)[1:]:
        ids.append(d.removesuffix('.wav'))
    df1 = pd.DataFrame({'id' : ids})
    dffinal = df1.merge(df, on ='id')
    Y = dffinal['label']
    Y= to_categorical(Y)
    return Y 
Y=cohe(dftrain,"TWDAUDIOS")

In [7]:
Y.shape

(144, 2)

In [8]:
def modelff(input_shape):
    """
    Function creating the model's graph in Keras.
    
    Argument:
    input_shape -- shape of the model's input data (using Keras conventions)

    Returns:
    model -- Keras model instance
    """
    
    X_input = tf.keras.Input(shape = input_shape)
    
    ### START CODE HERE ###
    
    # Step 1: CONV layer (≈4 lines)
    # Add a Conv1D with 196 units, kernel size of 15 and stride of 4
    X = Conv1D(filters=196, kernel_size=15, strides=4)(X_input)
    # Batch normalization
    X = BatchNormalization()(X)
    # ReLu activation
    X = Activation("relu")(X)
    # dropout (use 0.8)
    X = Dropout(rate=0.8)(X)                                 

    # Step 2: First GRU Layer (≈4 lines)
    # GRU (use 128 units and return the sequences)
    X = GRU(units=128, return_sequences=True)(X)
    # dropout (use 0.8)
    X = Dropout(rate=0.8)(X)
    # Batch normalization.
    X = BatchNormalization()(X)                           
    
    # Step 3: Second GRU Layer (≈4 lines)
    # GRU (use 128 units and return the sequences)
    X = GRU(units=128, return_sequences=True)(X)
    # dropout (use 0.8)
    X = Dropout(rate=0.8)(X)
    # Batch normalization
    X = BatchNormalization()(X)

                            

    # Step 4: Time-distributed dense layer (≈1 line)
    # TimeDistributed  with sigmoid activation 
    X= Flatten()(X)

    X=Dense(2, activation="sigmoid")(X)

    ### END CODE HERE ###

    model = Model(inputs = X_input, outputs = X)
    
    return model

In [9]:
model= modelff((1025, 86)) 
model.compile(loss = "categorical_crossentropy", optimizer= 'adam', metrics = ['accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 3501, 86)]        0         
                                                                 
 conv1d (Conv1D)             (None, 872, 196)          253036    
                                                                 
 batch_normalization (BatchN  (None, 872, 196)         784       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 872, 196)          0         
                                                                 
 dropout (Dropout)           (None, 872, 196)          0         
                                                                 
 gru (GRU)                   (None, 872, 128)          125184    
                                                             

In [13]:
model.fit(X, Y, epochs = 50, batch_size = 2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x23cf3c56c10>

In [17]:
model.fit(X, Y, epochs = 10, batch_size = 2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23c8a69b2b0>

In [18]:

Xtest = process_audiofiles("TWDtest")


In [19]:
res = model.predict(Xtest[:82])
res = np.argmax(res, axis = 1)
df1 = pd.read_csv('testtwd.csv')
df1['label'] = res
df1.to_csv('twd_1.csv', index=False)

In [None]:
df1