In [1]:
#Importing the necessary libraries
from functions_audio_model_tiny import *
from moviepy.editor import VideoFileClip
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
from scipy.ndimage import zoom
import numpy as np
import os
import json
import librosa
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy, Mean

2024-05-20 20:23:05.037805: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import numpy as np
import torch
from torch.utils.data import Dataset
import os

class SpectrogramDataset(Dataset):
    def __init__(self, spectrogram_dir, labels):
        self.spectrogram_dir = spectrogram_dir
        self.file_list = [f for f in os.listdir(spectrogram_dir) if f.endswith('_red.npy')]
        self.labels = labels  #dictionary mapping base file names to labels

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        #to get the base name of the file (without the color channel)
        base_name = self.file_list[idx].replace('_red.npy', '')

        #All colour channels separately
        red_channel = np.load(os.path.join(self.spectrogram_dir, base_name + '_red.npy'))
        green_channel = np.load(os.path.join(self.spectrogram_dir, base_name + '_green.npy'))
        blue_channel = np.load(os.path.join(self.spectrogram_dir, base_name + '_blue.npy'))

        #Stack the color channels to create RGB image
        spectrogram = np.stack((red_channel, green_channel, blue_channel), axis=2)

        #Conversion to tensor
        spectrogram = torch.from_numpy(spectrogram)

        #Normalisation to range [-1, 1]
        spectrogram = (spectrogram - 0.5) * 2

        #Label
        label = self.labels[base_name]

        return spectrogram, label

In [3]:
#Hyperparameters
learning_rate = 0.0001
#0.001
N_EPOCHS = 20
#5
batchsize = 32
#8

In [4]:
#Load labels for sarcasm detection (from a different file, not part of the spectrograms folder)
with open('data/sarcasm_data.json') as f:
    text_data = json.load(f)
    sarcasm_labels = {k: int(v['sarcasm']) for k, v in text_data.items()}

In [5]:
len(sarcasm_labels)

690

In [6]:
#Initialisation of the dataset
dataset = SpectrogramDataset('data/spectrograms/', sarcasm_labels)

# Initialize a DataLoader
dataloader = DataLoader(dataset, batch_size=batchsize, shuffle=True)


#Splitting the dataset into training and testing data

train_size = int(0.8 * len(dataset))  # 80% of the data for training
test_size = len(dataset) - train_size  # 20% of the data for testing

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

#DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batchsize)
test_dataloader = DataLoader(test_dataset, batch_size=batchsize)

In [None]:
# Set seed for reproducibility
tf.random.set_seed(42)

# Load pre-trained MobileNetV2 model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x)  #Two classes for classification

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Create the optimizer
optimizer = Adam(learning_rate=learning_rate)

# Define the loss function
loss_fn = SparseCategoricalCrossentropy()

# Define the metrics
train_acc_metric = SparseCategoricalAccuracy()
val_acc_metric = SparseCategoricalAccuracy()

train_accs, val_accs, train_losses, val_losses, train_f1s, val_f1s = train_cycle(model, optimizer, loss_fn, train_acc_metric, val_acc_metric, train_dataloader, test_dataloader, epochs=N_EPOCHS)
plot_metrics(train_accs, val_accs, train_losses, val_losses, train_f1s, val_f1s)

# Save the model weights
model.save('models_weights/tiny_bert_spectrograms.h5')