In [4]:
import json

with open('data/mixed_data.json', 'r') as f:
    data = json.load(f)

ids = list(data.keys())

In [6]:
len(ids)

633

In [10]:
#Importing the necessary libraries
from functions_audio_model import *
from moviepy.editor import VideoFileClip
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
from scipy.ndimage import zoom
import numpy as np
import os
import json
import librosa
import timm

In [8]:
#Load labels for sarcasm detection (from a different file, not part of the spectrograms folder)
with open('data/mixed_data.json') as f:
    text_data = json.load(f)
    sarcasm_labels = {k: int(v['sarcasm']) for k, v in text_data.items()}

In [11]:
# Check for missing files (To assure completeness of both spectrograms and labels not taking colour channels into account)
spectrogram_dir = 'spectrograms/'
missing_files = [label for label in sarcasm_labels if label + '.npy' not in os.listdir(spectrogram_dir)]

print(f'Missing files: {missing_files}')
print(f'Number of missing files: {len(missing_files)}')

Missing files: ['1_60', '1_70', '1_80', '1_90', '1_105', '1_162', '1_175', '1_182', '1_213', '1_276', '1_340', '1_410', '1_427', '1_430', '1_467', '1_506', '1_507', '1_533', '1_536', '1_537', '1_672', '1_971', '1_1001', '1_1003', '1_1144', '1_1180', '1_1185', '1_1189', '1_1262', '1_1296', '1_1466', '1_1470', '1_1478', '1_1484', '1_1549', '1_1560', '1_1638', '1_1666', '1_1678', '1_1722', '1_1772', '1_1798', '1_1803', '1_1931', '1_1973', '1_1987', '1_2075', '1_2198', '1_2216', '1_2354', '1_2361', '1_2420', '1_2423', '1_2464', '1_2575', '1_2580', '1_2614', '1_2616', '1_2664', '1_2669', '1_2778', '1_2792', '1_2797', '1_2819', '1_2830', '1_2837', '1_2842', '1_2853', '1_3064', '1_3069', '1_3125', '1_3177', '1_3204', '1_3287', '1_3293', '1_3333', '1_3348', '1_3419', '1_3476', '1_3545', '1_3573', '1_3599', '1_3649', '1_3660', '1_3707', '1_3766', '1_3837', '1_3840', '1_3842', '1_4031', '1_4145', '1_4281', '1_4286', '1_4290', '1_4352', '1_4544', '1_4552', '1_4576', '1_4603', '1_4743', '1_4760', 

In [None]:
#NOT TO BE RE-RUN
#Creation of spectrograms from the waveform data extracted above


#Directory containing audio files
audio_dir = 'audio_files/'

#Directory to save spectrograms
spectrogram_dir = 'spectrograms/'

#Length of the maximum waveform found in the dataset
# 1_213.wav
max_length = 882882


for file_name in os.listdir(audio_dir):
    if file_name.endswith('.wav'):
        file_path = os.path.join(audio_dir, file_name)
        y, sr = librosa.load(file_path)

        #Padding the waveform to the maximum length found
        if len(y) < max_length:
            y_padded = np.pad(y, (0, max_length - len(y)), 'constant')
        else:
            y_padded = y[:max_length]

        #Compute the short-time Fourier transform
        D = librosa.stft(y_padded)

        #Convert the amplitude to decibels (logarithmic scale)
        D_log = librosa.amplitude_to_db(abs(D))

        #Resize the spectrogram to 224x224 -> wanted dimensions for the Beit model
        resize_factor_x = 224 / D_log.shape[1]
        resize_factor_y = 224 / D_log.shape[0]
        D_log_resized = zoom(D_log, (resize_factor_y, resize_factor_x))

        #Conversion to colour image
        D_log_resized_color = plt.get_cmap('viridis')(D_log_resized / np.amax(D_log_resized))

        #Remove the alpha channel of the RGBA image
        D_log_resized_color = D_log_resized_color[:, :, :3]

        #All colour channels saved in the same variable
        red_channel = D_log_resized_color[:, :, 0]
        green_channel = D_log_resized_color[:, :, 1]
        blue_channel = D_log_resized_color[:, :, 2]

        #Save the spectrograms separately for each colour channel
        red_spectrogram_path = os.path.join(spectrogram_dir, file_name.replace('.wav', '_red.npy'))
        green_spectrogram_path = os.path.join(spectrogram_dir, file_name.replace('.wav', '_green.npy'))
        blue_spectrogram_path = os.path.join(spectrogram_dir, file_name.replace('.wav', '_blue.npy'))

        np.save(red_spectrogram_path, red_channel)
        np.save(green_spectrogram_path, green_channel)
        np.save(blue_spectrogram_path, blue_channel)