In [None]:
import os
import numpy as np
import pandas as pd
import pydub
from scipy.fft import fft
import cv2
from scipy.io import wavfile
import scipy.signal as signal
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from tensorflow.keras.applications.resnet50 import ResNet50
from sklearn.utils import shuffle

In [None]:

validated_file = 'validation_list.txt'
%matplotlib inline
# Load the validated.tsv file into a pandas DataFrame
df = pd.read_csv(validated_file, sep=" ", header=None)

labels = []
feature_vectors = []

# Loop through each row of the DataFrame
for index, row in df.iterrows():
    file_name = row[0]
    file_count = index + 1
    print(f'Processing file {file_count} of 10000: {file_name}')
    
    # Extract the path and sentence from the row
    path = file_name
    sentence = file_name.split("/")[0]
    
    # Load the audio clip and extract the FFT coefficients
    
    sampFreq, sound = wavfile.read(path)
    sound = sound / 2.0**15
    
    frequencies, times, spectrogram = signal.spectrogram(sound, fs=sampFreq, nperseg=1024, noverlap=512)

    # Convert the spectrogram to a logarithmic scale
    spectrogram = np.log(spectrogram + 1e-9)

    # Resize the spectrogram to a fixed size, if necessary
    spectrogram_resized = cv2.resize(spectrogram, (200, 200))

    labels.append(sentence)
    feature_vectors.append(spectrogram_resized)
    
# Stack the feature vectors into a single numpy array
feature_matrix = np.stack(feature_vectors)

# Save the labels and feature matrix to disk as numpy arrays
np.save('labels_specto.npy', labels)
np.save('features_specto.npy', feature_matrix)

In [None]:
# Data Processor 


# Load the features and labels
features = np.load('features_specto.npy')
labels = np.load('labels_specto.npy')


test_features = np.load('features_specto_test.npy')
test_labels  = np.load('labels_specto_test.npy')



# Take the first 6000 elements from the shuffled test data and add to the training data
features = np.concatenate([features, test_features], axis=0)
labels = np.concatenate([labels, test_labels], axis=0)
features, labels = shuffle(features, labels, random_state=92)
features, labels = shuffle(features, labels, random_state=42)

imputer = SimpleImputer(strategy='mean')
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))

features = scaler.fit_transform(features.reshape(-1, 1)).reshape(features.shape)


#Test feature and label pre processing 


# Change the data type to float64
features = features.astype('float64')


unique_labels = np.unique(labels)
label_map = {label: i for i, label in enumerate(unique_labels)}
num_classes = len(unique_labels)

# Convert labels to integers
labels = np.array([label_map[label] for label in labels])

labels = tf.keras.utils.to_categorical(labels)




features = features.reshape(-1, 200,200, 1)
test_features = test_features.reshape(-1,200,200,1)
test_features =features[15000:]
test_labels = labels[15000:]
features  = features[0:15000]
labels = labels[0:15000]



from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create an ImageDataGenerator object and specify the desired transformations
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True
)

# Fit the ImageDataGenerator to the training data
datagen.fit(features)

# Define the batch size
batch_size = 32

# Create a generator for the training data using the ImageDataGenerator
train_generator = datagen.flow(
    features,
    labels,
    batch_size=batch_size
)

# Create a generator for the test data without data augmentation
test_generator = ImageDataGenerator().flow(
    test_features,
    test_labels,
    batch_size=batch_size
)


In [None]:
# Custom Deep CNN Network

model = Sequential()

# First convolutional block
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(200,200, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))

# Second convolutional block
model.add(Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, (3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.5))

model.add(Conv2D(256, (3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(512, (3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


history = model.fit(features,labels, epochs=20, validation_split=0.25)
# Evaluate the model
test_loss, test_acc = model.evaluate(test_features, test_labels, verbose=2)
print('Test accuracy:', test_acc)
# print(num_classes)


In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Metric')
plt.title("Custom Deep CNN Score")
plt.legend()
plt.show()

In [None]:
#Resnet Implementation
mod = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(200,200,1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    tf.keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4096, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4096, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

mod.compile(optimizer= tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = mod.fit(features,labels, epochs=20, validation_split=0.15)

# Evaluate the model
test_loss, test_acc = mod.evaluate(test_features, test_labels, verbose=2)
print('Test accuracy:', test_acc)
print(num_classes)


In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Metric')
plt.title("Resnet Score")
plt.legend()
plt.show()