In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!pip install ipython-autotime
%load_ext autotime

Collecting ipython-autotime
  Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting jedi>=0.16 (from ipython->ipython-autotime)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl (7.0 kB)
Using cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: jedi, ipython-autotime
Successfully installed ipython-autotime-0.3.2 jedi-0.19.1
time: 2.28 ms (started: 2024-08-19 10:31:23 +00:00)


In [None]:
!pip install essentia

Collecting essentia
  Downloading essentia-2.1b6.dev1110-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)
Downloading essentia-2.1b6.dev1110-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.7/13.7 MB[0m [31m59.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: essentia
Successfully installed essentia-2.1b6.dev1110
time: 10.8 s (started: 2024-08-19 10:31:23 +00:00)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
time: 19.6 s (started: 2024-08-19 10:31:34 +00:00)


### 1. Extract Spectrograms from audio files and create dataset with computer audio and error condition labels

In [None]:
log_file_path = '/content/drive/MyDrive/THESIS/thesis_data/THESIS_DATA/spectrogram_images_dataset_p9/spec_processing_errors_img.log'
processed_files_log = '/content/drive/MyDrive/THESIS/thesis_data/THESIS_DATA/spectrogram_images_dataset_p9/processed_files_img.txt'


time: 2.6 ms (started: 2024-08-19 10:31:54 +00:00)


In [None]:
import os
import csv
import numpy as np
import matplotlib.pyplot as plt
import logging
import essentia.standard as es

def extract_spectrogram(file_path, frame_size=2048, hop_size=512):
    """Extracts the spectrogram from an audio file using Essentia."""
    try:
        loader = es.MonoLoader(filename=file_path)
        audio = loader()

        windowing = es.Windowing(type='hann')
        spectrum = es.Spectrum()

        spectrogram = []
        for frame in es.FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True):
            windowed_frame = windowing(frame)
            spectrogram.append(spectrum(windowed_frame))

        return np.array(spectrogram).T
    except Exception as e:
        logging.error(f"Error processing file {file_path}: {e}")
        return None

def save_spectrogram_as_image(spectrogram, output_image_path):
    """Saves the spectrogram as an image."""
    plt.figure(figsize=(10, 4))
    plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='viridis')
    plt.axis('off')  # Remove axes
    plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0)
    plt.close()

def create_dataset_with_spectrograms(root_directory, csv_file_path, output_image_dir):
    """Creates a dataset of spectrograms from 'computer' audio files, labeled by 'error_condition',
    saves the spectrograms as images, and saves the paths and labels to a CSV file."""

    # Ensure the directory for storing images exists
    os.makedirs(output_image_dir, exist_ok=True)

    # Load the list of processed files
    processed_files = set()
    if os.path.exists(processed_files_log):
        with open(processed_files_log, 'r') as f:
            processed_files = set(line.strip() for line in f)

    # Open a CSV file for storing image paths and labels
    with open(csv_file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        if os.path.getsize(csv_file_path) == 0:
            writer.writerow(["image_path", "label"])  # Write header if the file is empty

        # Iterate through phonation mode folders
        for phonation_mode in os.listdir(root_directory):
            phonation_mode_path = os.path.join(root_directory, phonation_mode)

            if os.path.isdir(phonation_mode_path):
                # Iterate through error condition folders
                for error_condition in os.listdir(phonation_mode_path):
                    error_condition_path = os.path.join(phonation_mode_path, error_condition)

                    if os.path.isdir(error_condition_path):
                        for phrase_type in os.listdir(error_condition_path):
                            phrase_type_path = os.path.join(error_condition_path, phrase_type)

                            if os.path.isdir(phrase_type_path):
                                for clip_number in os.listdir(phrase_type_path):
                                    clip_number_path = os.path.join(phrase_type_path, clip_number)

                                    if os.path.isdir(clip_number_path):
                                        # Look for the 'computer' audio file within this clip folder
                                        for file_name in os.listdir(clip_number_path):
                                            if "computer" in file_name and file_name.endswith('.wav'):
                                                file_path = os.path.join(clip_number_path, file_name)

                                                image_filename = f"{phonation_mode}_{error_condition}_{phrase_type}_{clip_number}.png"
                                                image_filepath = os.path.join(output_image_dir, image_filename)

                                                # Check if the image file already exists
                                                if image_filename in processed_files:
                                                    continue

                                                try:
                                                    # Extract the spectrogram
                                                    spectrogram = extract_spectrogram(file_path)

                                                    if spectrogram is not None:
                                                        # Save the spectrogram as an image
                                                        save_spectrogram_as_image(spectrogram, image_filepath)

                                                        # Write the path and label to the CSV
                                                        writer.writerow([image_filepath, error_condition])

                                                        # Add the processed file to the set
                                                        processed_files.add(image_filename)
                                                except Exception as e:
                                                    logging.error(f"Error processing file {file_path}: {e}")
                                                    continue

    # Save the list of processed files
    with open(processed_files_log, 'w') as f:
        for filename in processed_files:
            f.write(f"{filename}\n")


time: 1.9 s (started: 2024-08-19 10:31:54 +00:00)


In [None]:
root_directory = "/content/drive/MyDrive/THESIS/thesis_data/THESIS_DATA/intermediate/p_9"
csv_file_path = "/content/drive/MyDrive/THESIS/thesis_data/THESIS_DATA/spectrogram_images_dataset_p9/spectrogram_images_dataset_p9.csv"
output_dir = "/content/drive/MyDrive/THESIS/thesis_data/THESIS_DATA/spectrogram_images_dataset_p9"
create_dataset_with_spectrograms(root_directory, csv_file_path, output_dir)

time: 10min 58s (started: 2024-08-18 20:05:01 +00:00)


## Build and Train Model

In [None]:
csv_file_path = "/content/drive/MyDrive/THESIS/thesis_data/THESIS_DATA/spectrogram_images_dataset_p9/spectrogram_images_dataset_p9.csv"

time: 2.94 ms (started: 2024-08-19 10:31:56 +00:00)


In [None]:
import os
import csv
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.optimizers import Adam

# Parameters
image_size = (128, 128)  # Resize all images to this size
test_size = 0.2

# Load CSV file
data = pd.read_csv(csv_file_path)

# Extract image paths and labels
image_paths = data['image_path'].values
labels = data['label'].values

# Encode labels
le = LabelEncoder()
encoded_labels = le.fit_transform(labels)
categorical_labels = to_categorical(encoded_labels)

# Load and preprocess images
images = []
for img_path in image_paths:
    img = load_img(img_path, target_size=image_size, color_mode='rgb')
    img_array = img_to_array(img) / 255.0  # Normalize to [0, 1]
    images.append(img_array)

images = np.array(images)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, categorical_labels, test_size=test_size, random_state=42)

time: 8min 51s (started: 2024-08-19 10:31:56 +00:00)


###Trying Keras CNN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the model
model = Sequential()

# Convolutional layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(image_size[0], image_size[1], 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output and add dense layers
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(le.classes_), activation='softmax'))  # Output layer with softmax

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Summary of the model
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


time: 779 ms (started: 2024-08-19 10:40:48 +00:00)


In [None]:
from tensorflow.keras.callbacks import EarlyStopping

# Define early stopping
early_stopping = EarlyStopping(monitor='val_accuracy',  # Monitor validation loss
                               patience=10,          # Number of epochs with no improvement
                               restore_best_weights=True)

time: 2.91 ms (started: 2024-08-18 20:20:54 +00:00)


In [None]:
# Train the model with early stopping
history = model.fit(X_train, y_train,
                    epochs=200,
                    batch_size=32,
                    validation_split=0.2,
                    callbacks=[early_stopping])

Epoch 1/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 1s/step - accuracy: 0.1001 - loss: 1.9608 - val_accuracy: 0.1915 - val_loss: 1.9457
Epoch 2/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.1486 - loss: 1.9507 - val_accuracy: 0.1489 - val_loss: 1.9443
Epoch 3/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1s/step - accuracy: 0.1155 - loss: 1.9474 - val_accuracy: 0.1489 - val_loss: 1.9452
Epoch 4/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.1370 - loss: 1.9486 - val_accuracy: 0.1489 - val_loss: 1.9442
Epoch 5/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.1466 - loss: 1.9464 - val_accuracy: 0.1489 - val_loss: 1.9455
Epoch 6/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.1480 - loss: 1.9462 - val_accuracy: 0.1489 - val_loss: 1.9469
Epoch 7/200
[1m12/12[0m [32m━━━

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy:.4f}")
print(f"Test loss: {test_loss:.4f}")

# Save the model
# model.save('spectrogram_classification_model.h5')

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 424ms/step - accuracy: 0.1355 - loss: 1.9493
Test accuracy: 0.1356
Test loss: 1.9478
time: 2.71 s (started: 2024-08-18 20:24:20 +00:00)


Without early stopping callback

In [None]:
# Train the model with early stopping
history = model.fit(X_train, y_train,
                    epochs=200,
                    batch_size=32,
                    validation_split=0.2)

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy:.4f}")
print(f"Test loss: {test_loss:.4f}")