In [4]:
import os
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
import pickle
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Paths
labels_path = '/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/labeled wavs/final_labels.csv'
augmented_wav_dir = '/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/augmented_wav_files'
initial_wav_dir = '/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/initial wav files'
output_dir = '/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/processed_melspectrograms'
encoded_labels_path = '/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/encoded_labels.pkl'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Load the labels
labels_df = pd.read_csv(labels_path)

# Function to sanitize filenames for consistency (remove spaces and make lowercase)
def sanitize_filename(filename):
    return filename.strip().lower()

# Function to get the label corresponding to a WAV file
def get_label(wav_file):
    # Sanitize filename to match format in the labels
    filename = sanitize_filename(os.path.basename(wav_file))
    label_row = labels_df[labels_df['Filename'].str.strip().str.lower() == filename]

    # Check if the label exists
    if len(label_row) == 0:
        print(f"Warning: No label found for {filename}")
        return None  # If label doesn't exist, return None to skip the file
    else:
        label = label_row['Diagnosis'].values[0]
        return label

# Function to load a WAV file and extract Mel Spectrogram
def extract_melspectrogram(wav_file):
    # Load the audio file
    y, sr = librosa.load(wav_file, sr=4000)  # Resample to 4kHz

    # Check if the audio has any meaningful content
    if len(y) == 0:
        print(f"Warning: {wav_file} is empty.")
        return None

    # Apply Hanning window, FFT of 1024 points, hop-length of 512
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=1024, hop_length=512, n_mels=64, window='hann')

    # Apply log transformation to the amplitudes of the Mel Spectrogram
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)

    # Remove margins and plot only the spectrogram (no axes, colorbars, or labels)
    plt.figure(figsize=(10, 4))
    plt.imshow(log_mel_spectrogram, cmap='hot', aspect='auto', origin='lower')
    plt.axis('off')  # Remove axis labels and ticks

    # Remove margins (ensure no whitespace around the image)
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)

    # Sanitize the file name to avoid special characters causing issues
    img_filename = f"{os.path.splitext(os.path.basename(wav_file))[0]}.png"
    img_path = os.path.join(output_dir, img_filename)

    # Ensure the directory exists and is writable
    os.makedirs(os.path.dirname(img_path), exist_ok=True)

    # Save the plot as an image (without axis and labels)
    plt.savefig(img_path, format='png', dpi=300, bbox_inches='tight', pad_inches=0)  # Save high-quality PNG
    plt.close()

    # Resize image to 224x224x3 using GPU acceleration (TensorFlow)
    img = Image.open(img_path)
    img_resized = tf.image.resize(np.array(img), (224, 224))  # Resize using TensorFlow (GPU-accelerated)
    img_resized = tf.cast(img_resized, tf.uint8)  # Cast to uint8 for saving as PNG
    img_resized = Image.fromarray(img_resized.numpy())  # Convert back to PIL Image
    img_resized.save(img_path)  # Save resized image

    return img_path

# Function to save the mel spectrogram images and labels
def save_melspectrograms():
    wav_files = []
    images = []
    labels = []

    # Combine both augmented and initial WAV file directories into one list
    all_wav_files = []
    for filename in os.listdir(augmented_wav_dir):
        if filename.endswith('.wav'):
            all_wav_files.append(os.path.join(augmented_wav_dir, filename))

    for filename in os.listdir(initial_wav_dir):
        if filename.endswith('.wav'):
            all_wav_files.append(os.path.join(initial_wav_dir, filename))

    # Process all WAV files with a single progress bar
    for wav_path in tqdm(all_wav_files, desc="Processing WAV files", unit="file"):
        label = get_label(wav_path)

        if label is None:  # Skip files without labels
            continue

        img_path = extract_melspectrogram(wav_path)

        if img_path:
            wav_files.append(wav_path)
            images.append(img_path)
            labels.append(label)

    # Convert labels to numpy array
    labels = np.array(labels)

    return labels

# Save all processed mel spectrograms and return the labels
labels = save_melspectrograms()

# Optionally, encode labels to numerical values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Save encoded labels to a pickle file
with open(encoded_labels_path, 'wb') as f:
    pickle.dump(labels_encoded, f)

# The mel spectrogram images are saved in the 'processed_melspectrograms' directory
# and the encoded labels are saved as a pickle file at 'encoded_labels_path'.


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Processing WAV files:  91%|█████████ | 11307/12400 [1:41:32<09:23,  1.94file/s]



Processing WAV files:  91%|█████████▏| 11317/12400 [1:41:40<13:13,  1.36file/s]



Processing WAV files:  91%|█████████▏| 11319/12400 [1:41:40<10:32,  1.71file/s]



Processing WAV files:  91%|█████████▏| 11322/12400 [1:41:42<10:08,  1.77file/s]



Processing WAV files:  91%|█████████▏| 11324/12400 [1:41:43<08:54,  2.01file/s]



Processing WAV files:  91%|█████████▏| 11330/12400 [1:41:46<10:13,  1.75file/s]



Processing WAV files:  91%|█████████▏| 11335/12400 [1:41:56<26:27,  1.49s/file]



Processing WAV files:  91%|█████████▏| 11337/12400 [1:41:56<18:15,  1.03s/file]



Processing WAV files:  92%|█████████▏| 11347/12400 [1:42:03<13:20,  1.32file/s]



Processing WAV files:  92%|█████████▏| 11351/12400 [1:42:06<12:00,  1.46file/s]



Processing WAV files:  92%|█████████▏| 11359/12400 [1:42:11<13:05,  1.32file/s]



Processing WAV files:  92%|█████████▏| 11362/12400 [1:42:12<08:13,  2.10file/s]



Processing WAV files:  92%|█████████▏| 11366/12400 [1:42:14<08:05,  2.13file/s]



Processing WAV files:  92%|█████████▏| 11369/12400 [1:42:15<08:25,  2.04file/s]



Processing WAV files:  92%|█████████▏| 11400/12400 [1:42:37<13:03,  1.28file/s]



Processing WAV files:  92%|█████████▏| 11414/12400 [1:42:47<12:24,  1.33file/s]



Processing WAV files:  92%|█████████▏| 11416/12400 [1:42:48<10:05,  1.62file/s]



Processing WAV files:  92%|█████████▏| 11426/12400 [1:42:55<12:27,  1.30file/s]



Processing WAV files:  92%|█████████▏| 11439/12400 [1:43:02<11:18,  1.42file/s]



Processing WAV files:  92%|█████████▏| 11444/12400 [1:43:05<10:42,  1.49file/s]



Processing WAV files:  92%|█████████▏| 11448/12400 [1:43:07<09:40,  1.64file/s]



Processing WAV files:  92%|█████████▏| 11458/12400 [1:43:14<11:27,  1.37file/s]



Processing WAV files:  92%|█████████▏| 11464/12400 [1:43:18<10:54,  1.43file/s]



Processing WAV files:  92%|█████████▏| 11467/12400 [1:43:20<09:23,  1.65file/s]



Processing WAV files:  93%|█████████▎| 11472/12400 [1:43:22<08:57,  1.73file/s]



Processing WAV files:  93%|█████████▎| 11477/12400 [1:43:25<10:13,  1.50file/s]



Processing WAV files:  93%|█████████▎| 11481/12400 [1:43:27<08:14,  1.86file/s]



Processing WAV files:  93%|█████████▎| 11492/12400 [1:43:35<11:05,  1.36file/s]



Processing WAV files:  93%|█████████▎| 11505/12400 [1:43:45<11:26,  1.30file/s]



Processing WAV files:  93%|█████████▎| 11523/12400 [1:43:58<11:54,  1.23file/s]



Processing WAV files:  93%|█████████▎| 11525/12400 [1:43:59<09:20,  1.56file/s]



Processing WAV files:  93%|█████████▎| 11537/12400 [1:44:07<11:19,  1.27file/s]



Processing WAV files:  93%|█████████▎| 11540/12400 [1:44:08<09:14,  1.55file/s]



Processing WAV files:  93%|█████████▎| 11544/12400 [1:44:11<08:57,  1.59file/s]



Processing WAV files:  93%|█████████▎| 11548/12400 [1:44:13<09:02,  1.57file/s]



Processing WAV files:  93%|█████████▎| 11560/12400 [1:44:22<11:04,  1.26file/s]



Processing WAV files:  93%|█████████▎| 11569/12400 [1:44:27<09:03,  1.53file/s]



Processing WAV files:  93%|█████████▎| 11576/12400 [1:44:31<09:30,  1.44file/s]



Processing WAV files:  93%|█████████▎| 11587/12400 [1:44:38<10:18,  1.31file/s]



Processing WAV files:  94%|█████████▎| 11598/12400 [1:44:46<10:44,  1.24file/s]



Processing WAV files:  94%|█████████▎| 11604/12400 [1:44:50<09:46,  1.36file/s]



Processing WAV files:  94%|█████████▎| 11609/12400 [1:44:53<09:12,  1.43file/s]



Processing WAV files:  94%|█████████▎| 11620/12400 [1:45:00<10:44,  1.21file/s]



Processing WAV files: 100%|██████████| 12400/12400 [1:56:07<00:00,  1.78file/s]


In [23]:
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import pickle

# Define paths
output_dir = '/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/processed_melspectrograms'
labels_path = '/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/labeled wavs/final_labels.csv'
encoded_labels_path = '/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/labeled wavs/encoded_labels.pkl'

# Load the labels CSV
labels_df = pd.read_csv(labels_path)

# Extract the filename (without extension) from the labels DataFrame
labels_df['Filename'] = labels_df['Filename'].apply(lambda x: os.path.splitext(x)[0])

# List of all PNG files in the output directory
png_files = [f for f in os.listdir(output_dir) if f.endswith('.png')]
png_filenames = [os.path.splitext(f)[0] for f in png_files]

# Match the filenames between the labels and the PNG files
merged_df = labels_df[labels_df['Filename'].isin(png_filenames)]

# Create the spectrograms_labeled_df
spectrograms_labeled_df = merged_df[['ID', 'Filename', 'Diagnosis']]

# Optionally, encode labels to numerical values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(spectrograms_labeled_df['Diagnosis'])


# Save the encoded labels to a pickle file
with open(encoded_labels_path, 'wb') as f:
    pickle.dump(labels_encoded, f)

# Display the DataFrame
spectrograms_labeled_df


Unnamed: 0,ID,Filename,Diagnosis
0,0,101_1b1_Al_sc_Meditron,URTI
1,1,101_1b1_Pr_sc_Meditron,URTI
2,2,102_1b1_Ar_sc_Meditron,Healthy
3,3,103_2b2_Ar_mc_LittC2SE,Asthma
4,4,104_1b1_Al_sc_Litt3200,COPD
...,...,...,...
12338,12338,164_1b1_Ll_sc_Meditron_snippet_4.wav_stretch_0.4,URTI
12339,12339,164_1b1_Ll_sc_Meditron_snippet_4.wav_stretch_0.17,URTI
12340,12340,164_1b1_Ll_sc_Meditron_snippet_4.wav_pitch_-2,URTI
12341,12341,164_1b1_Ll_sc_Meditron_snippet_4.wav_pitch_1,URTI


In [24]:
spectrograms_labeled_df.to_csv('/content/drive/MyDrive/Data Science AUEB/Deep Learning /datasets/labeled wavs/spectrograms_labeled.csv', index=False)
