In [1]:
#Importing the necessary libraries
from functions_audio_model_tiny import *
from moviepy.editor import VideoFileClip
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
from scipy.ndimage import zoom
import numpy as np
import os
import json
import librosa
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy, Mean

2024-05-19 18:23:59.961665: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import numpy as np
import torch
from torch.utils.data import Dataset
import os

class SpectrogramDataset(Dataset):
    def __init__(self, spectrogram_dir, labels):
        self.spectrogram_dir = spectrogram_dir
        self.file_list = [f for f in os.listdir(spectrogram_dir) if f.endswith('_red.npy')]
        self.labels = labels  #dictionary mapping base file names to labels

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        #to get the base name of the file (without the color channel)
        base_name = self.file_list[idx].replace('_red.npy', '')

        #All colour channels separately
        red_channel = np.load(os.path.join(self.spectrogram_dir, base_name + '_red.npy'))
        green_channel = np.load(os.path.join(self.spectrogram_dir, base_name + '_green.npy'))
        blue_channel = np.load(os.path.join(self.spectrogram_dir, base_name + '_blue.npy'))

        #Stack the color channels to create RGB image
        spectrogram = np.stack((red_channel, green_channel, blue_channel), axis=2)

        #Conversion to tensor
        spectrogram = torch.from_numpy(spectrogram)

        #Normalisation to range [-1, 1]
        spectrogram = (spectrogram - 0.5) * 2

        #Label
        label = self.labels[base_name]

        return spectrogram, label

In [3]:
#Hyperparameters
learning_rate = 0.0001
#0.001
N_EPOCHS = 10
#5
batchsize = 64
#8

In [4]:
#Load labels for sarcasm detection (from a different file, not part of the spectrograms folder)
with open('data/mixed_data.json') as f:
    text_data = json.load(f)
    sarcasm_labels = {k: int(v['sarcasm']) for k, v in text_data.items()}

In [5]:
#Initialisation of the dataset
dataset = SpectrogramDataset('data/spectrograms/', sarcasm_labels)

# Initialize a DataLoader
dataloader = DataLoader(dataset, batch_size=batchsize, shuffle=True)


#Splitting the dataset into training and testing data

train_size = int(0.8 * len(dataset))  # 80% of the data for training
test_size = len(dataset) - train_size  # 20% of the data for testing

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

#DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batchsize)
test_dataloader = DataLoader(test_dataset, batch_size=batchsize)

In [11]:
# Set seed for reproducibility
tf.random.set_seed(42)

# Load pre-trained MobileNetV2 model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x)  #Two classes for classification

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Create the optimizer
optimizer = Adam(learning_rate=learning_rate)

# Define the loss function
loss_fn = SparseCategoricalCrossentropy()

# Define the metrics
train_acc_metric = SparseCategoricalAccuracy()
val_acc_metric = SparseCategoricalAccuracy()

# Define the training function
def train_cycle(model, optimizer, loss_fn, train_acc_metric, val_acc_metric, train_dataloader, val_dataloader, epochs):
    for epoch in range(epochs):
        print("\nStart of epoch %d" % (epoch,))

        # Iterate over the batches of the dataset.
        for step, (x_batch_train, y_batch_train) in enumerate(train_dataloader):
            with tf.GradientTape() as tape:
                logits = model(x_batch_train, training=True)
                loss_value = loss_fn(y_batch_train, logits)
            grads = tape.gradient(loss_value, model.trainable_weights)
            optimizer.apply_gradients(zip(grads, model.trainable_weights))

            # Update training metric.
            train_acc_metric.update_state(y_batch_train, logits)

            # Log every 200 batches.
            if step % 200 == 0:
                print(
                    "Training loss (for one batch) at step %d: %.4f"
                    % (step, float(loss_value))
                )
                print("Seen so far: %s samples" % ((step + 1) * 64))

        # Display metrics at the end of each epoch.
        train_acc = train_acc_metric.result()
        print("Training acc over epoch: %.4f" % (float(train_acc),))

        # Run a validation loop at the end of each epoch.
        for x_batch_val, y_batch_val in val_dataloader:
            val_logits = model(x_batch_val, training=False)
            # Update val metrics
            val_acc_metric.update_state(y_batch_val, val_logits)
        val_acc = val_acc_metric.result()
        print("Validation acc: %.4f" % (float(val_acc),))

# Train the mode
train_cycle(model, optimizer, loss_fn, train_acc_metric, val_acc_metric, train_dataloader, test_dataloader, epochs=N_EPOCHS)

# Save the model weights
model.save('models_weights/beit_model_spectrograms.h5')


Start of epoch 0
Training loss (for one batch) at step 0: 0.7246
Seen so far: 64 samples
Training acc over epoch: 0.5593
Validation acc: 0.7165

Start of epoch 1
Training loss (for one batch) at step 0: 0.5972
Seen so far: 64 samples
Training acc over epoch: 0.6156
Validation acc: 0.7087

Start of epoch 2
Training loss (for one batch) at step 0: 0.5609
Seen so far: 64 samples
Training acc over epoch: 0.6337
Validation acc: 0.7087

Start of epoch 3
Training loss (for one batch) at step 0: 0.5289
Seen so far: 64 samples
Training acc over epoch: 0.6517
Validation acc: 0.7087

Start of epoch 4
Training loss (for one batch) at step 0: 0.5185
Seen so far: 64 samples
Training acc over epoch: 0.6656
Validation acc: 0.7102

Start of epoch 5
Training loss (for one batch) at step 0: 0.5037
Seen so far: 64 samples
Training acc over epoch: 0.6769
Validation acc: 0.7113

Start of epoch 6
Training loss (for one batch) at step 0: 0.4914
Seen so far: 64 samples
Training acc over epoch: 0.6858
Validati



Validation acc: 0.7063
