In [1]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os

In [2]:
def melspec_to_rgb_image(audio_dir, image_dir, img_width=224, img_height=224, dpi=100):
    """
    Converts an audio file to a dB-scaled mel spectrogram,
    Transforms it into an RGB 3D tensor, and saves it.
    Args:
        audio_dir (str): Parent directory of audio files
        image_dir (str): Parent directory to save the output RGB image
        img_width (int): Desired width of the output image in pixels
        img_height (int): Desired height of the output image in pixels
        dpi (int): Dots per inch for figure resolution
    """

    audio_files = os.listdir(audio_dir)

    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
    
    for i in range(len(audio_files)):
        audio = audio_files[i]
        audio_path = os.path.join(audio_dir, audio)
        image_path = os.path.join(image_dir, audio.replace(".mp3", ".png"))
        # Loading the audio file
        y, sr = librosa.load(audio_path, sr=None)
    
        # Computing the mel spectrogram
        mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    
        # Converting to decibels (dB) scale
        mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    
        # Plotting the spectrogram as an image without axes or borders
        # Adjusting figure size to match desired pixel dimensions at specified DPI
        fig = plt.figure(figsize=(img_width/dpi, img_height/dpi), dpi=dpi, frameon=False)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(mel_spectrogram_db, sr=sr, ax=ax, cmap='viridis') # 'viridis' is a good colormap
        
        # Saving the plot to a file
        plt.savefig(image_path, bbox_inches='tight', pad_inches=0)
        plt.close(fig) # Close the figure to free memory
        if (i+1) == len(audio_files):
            print(f"Successfully created and saved ({len(audio_files)}) RGB images in {image_dir}")
    

In [3]:
parent_dir_audio = "data/splitted/phishing"
parent_dir_image = "data/input_features/image/phishing"
audio_dir_list = [os.path.join(parent_dir_audio, f) for f in os.listdir(parent_dir_audio) if not f.startswith('.')]
image_dir_list = [os.path.join(parent_dir_image, f) for f in os.listdir(parent_dir_audio) if not f.startswith('.')]

In [4]:
for i in range(len(audio_dir_list)):
    audio_dir = audio_dir_list[i]
    image_dir = image_dir_list[i]
    print(f"*********** Converting {i+1}/{len(audio_dir_list)} ***********")
    if audio_dir.rsplit('/', 1)[-1] == image_dir.rsplit('/', 1)[-1]:
        melspec_to_rgb_image(audio_dir, image_dir)
    else:
        print(f"Source file (audio) is different from destination file (image)")

*********** Converting 1/412 ***********
Successfully created and saved (8) RGB images in data/input_features/image/phishing/data_agency_183
*********** Converting 2/412 ***********
Successfully created and saved (8) RGB images in data/input_features/image/phishing/data_agency_86
*********** Converting 3/412 ***********
Successfully created and saved (2) RGB images in data/input_features/image/phishing/data_agency_91
*********** Converting 4/412 ***********
Successfully created and saved (2) RGB images in data/input_features/image/phishing/data_loan_177
*********** Converting 5/412 ***********
Successfully created and saved (1) RGB images in data/input_features/image/phishing/data_loan_32
*********** Converting 6/412 ***********
Successfully created and saved (2) RGB images in data/input_features/image/phishing/data_loan_130
*********** Converting 7/412 ***********
Successfully created and saved (8) RGB images in data/input_features/image/phishing/data_loan_24
*********** Converting 8/

In [5]:
parent_dir_audio = "data/splitted/nonphishing"
parent_dir_image = "data/input_features/image/nonphishing"
audio_dir_list = [os.path.join(parent_dir_audio, f) for f in os.listdir(parent_dir_audio) if not f.startswith('.')]
image_dir_list = [os.path.join(parent_dir_image, f) for f in os.listdir(parent_dir_audio) if not f.startswith('.')]

In [6]:
for i in range(len(audio_dir_list)):
    audio_dir = audio_dir_list[i]
    image_dir = image_dir_list[i]
    print(f"*********** Converting {i+1}/{len(audio_dir_list)} ***********")
    if audio_dir.rsplit('/', 1)[-1] == image_dir.rsplit('/', 1)[-1]:
        melspec_to_rgb_image(audio_dir, image_dir)
    else:
        print(f"Source file (audio) is different from destination file (image)")

*********** Converting 1/500 ***********
Successfully created and saved (4) RGB images in data/input_features/image/nonphishing/data_transaction_hist_97
*********** Converting 2/500 ***********
Successfully created and saved (3) RGB images in data/input_features/image/nonphishing/data_subscription_cancellation_53
*********** Converting 3/500 ***********
Successfully created and saved (3) RGB images in data/input_features/image/nonphishing/data_transaction_hist_2
*********** Converting 4/500 ***********
Successfully created and saved (3) RGB images in data/input_features/image/nonphishing/data_loan_service_124
*********** Converting 5/500 ***********
Successfully created and saved (3) RGB images in data/input_features/image/nonphishing/data_loan_service_144
*********** Converting 6/500 ***********
Successfully created and saved (2) RGB images in data/input_features/image/nonphishing/data_loan_service_98
*********** Converting 7/500 ***********
Successfully created and saved (3) RGB imag