In [1]:
import os
import librosa
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
def concatenate_numpy_arrays_from_folder(directory):
    """
    Concatenates all numpy arrays in the specified directory into a single numpy array.

    Args:
        directory (str): The path to the directory containing the numpy array files.

    Returns:
        np.ndarray: The concatenated numpy array.
    """
    # Initialize an empty list to store the arrays
    arrays_list = []

    # Loop through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.npy'):  # Check if the file is a numpy array file
            file_path = os.path.join(directory, filename)

            # Load the numpy array from the file
            numpy_array = np.load(file_path)

            # Print the shape of the numpy array
            #print(f'Shape of {filename}: {numpy_array.shape}')

            # Append the numpy array to the list
            arrays_list.append(numpy_array)

    # Concatenate all numpy arrays in the list into a single array
    concatenated_array = np.concatenate(arrays_list, axis=0)  # Change axis if needed

    # Print the shape of the concatenated array
    print(f'Shape of the concatenated array: {concatenated_array.shape}')

    return concatenated_array

In [None]:
# # for 5 second sampels
test_ads_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_5sec/grouped_embeddings_ad/test_embeddings_ad'
ad_test_embeddings = concatenate_numpy_arrays_from_folder(test_ads_directory)

val_ads_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_5sec/grouped_embeddings_ad/val_embeddings_ad'
ad_val_embeddings = concatenate_numpy_arrays_from_folder(val_ads_directory)

train_ads_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_5sec/grouped_embeddings_ad/train_embeddings_ad'
ad_train_embeddings = concatenate_numpy_arrays_from_folder(train_ads_directory)


test_pods_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_5sec/grouped_embeddings_podcast/test_embeddings_pod'
pod_test_embeddings = concatenate_numpy_arrays_from_folder(test_pods_directory)

val_pods_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_5sec/grouped_embeddings_podcast/val_embeddings_pod'
pod_val_embeddings = concatenate_numpy_arrays_from_folder(val_pods_directory)

train_pod_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_5sec/grouped_embeddings_podcast/train_embeddings_pod'
pod_train_embeddings = concatenate_numpy_arrays_from_folder(train_pod_directory)[:507, :]

print(pod_train_embeddings.shape)


Shape of the concatenated array: (59, 23552)
Shape of the concatenated array: (62, 23552)
Shape of the concatenated array: (507, 23552)
Shape of the concatenated array: (625, 23552)
Shape of the concatenated array: (621, 23552)
Shape of the concatenated array: (2860, 23552)
(507, 23552)


In [None]:
# for 1 second sampels
# test_ads_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_1sec/grouped_embeddings_ad/test_embeddings_ads'
# ad_test_embeddings = concatenate_numpy_arrays_from_folder(test_ads_directory)

# val_ads_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_1sec/grouped_embeddings_ad/val_embeddings_ads'
# ad_val_embeddings = concatenate_numpy_arrays_from_folder(val_ads_directory)

# train_ads_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_1sec/grouped_embeddings_ad/train_embeddings_ads'
# ad_train_embeddings = concatenate_numpy_arrays_from_folder(train_ads_directory)


# test_pods_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_1sec/grouped_embeddings_podcast/test_embeddings_pods'
# pod_test_embeddings = concatenate_numpy_arrays_from_folder(test_pods_directory)

# val_pods_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_1sec/grouped_embeddings_podcast/val_embeddings_pods'
# pod_val_embeddings = concatenate_numpy_arrays_from_folder(val_pods_directory)

# train_pod_directory = '/content/drive/MyDrive/AD-Blocker Project/grouped_embeddings/OpenL3_1sec/grouped_embeddings_podcast/train_embeddings_pods'
# pod_train_embeddings = concatenate_numpy_arrays_from_folder(train_pod_directory)[:2597, :]

# print(pod_train_embeddings.shape)

Shape of the concatenated array: (302, 3072)
Shape of the concatenated array: (319, 3072)
Shape of the concatenated array: (2597, 3072)
Shape of the concatenated array: (668, 3072)
Shape of the concatenated array: (1940, 3072)
Shape of the concatenated array: (6115, 3072)
(2597, 3072)


In [3]:
# # for 3 second sampels
# def load_and_concatenate_npy_files(file_paths):
#     """
#     Load multiple np.array files from Google Drive and concatenate them into a single array.

#     Parameters:
#     file_paths (list of str): List of paths to the np.array files on Google Drive.

#     Returns:
#     np.array: Concatenated array.
#     """
#     # Initialize an empty list to hold arrays
#     arrays = []

#     # Iterate through the file paths, load each file, and append to the list
#     for file_path in file_paths:
#         # Load the np.array file
#         array = np.load(file_path)
#         # Append the loaded array to the list
#         arrays.append(array)

#     # Concatenate all arrays into a single array
#     concatenated_array = np.concatenate(arrays, axis=0)

#     return concatenated_array

# # Example usage:
# file_paths = [
#     '/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_pod_train_embeddings_1.npy',
#     '/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_pod_train_embeddings_2.npy',
#     '/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_pod_train_embeddings_3.npy',
#     '/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_pod_train_embeddings_4.npy']


# pod_train_embeddings =load_and_concatenate_npy_files(file_paths)[:852, :]
# ad_train_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_ad_train_embeddings.npy")

# pod_val_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_pod_val_embeddings.npy")
# ad_val_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_ad_val_embeddings.npy")

# pod_test_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_pod_test_embeddings.npy")
# ad_test_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/OpenL3_3sec/OpenL3_3sec_ad_test_embeddings.npy")

# print("pod_train", pod_train_embeddings.shape)
# print("ad_train", ad_train_embeddings.shape)

# print("pod_val", pod_val_embeddings.shape)
# print("ad_val", ad_val_embeddings.shape)

# print("pod_test", pod_test_embeddings.shape)
# print("ad_test", ad_test_embeddings.shape)



In [None]:
train_embeddings = np.concatenate((pod_train_embeddings, ad_train_embeddings))
val_embeddings = np.concatenate((pod_val_embeddings, ad_val_embeddings))
test_embeddings = np.concatenate((pod_test_embeddings, ad_test_embeddings))

In [None]:
def concatenate_zeros_and_ones(podcast_length, commercials_length):
    # Create array of zeros with size of podcast array
    zeros_array = np.zeros(podcast_length, dtype=int)

    # Create array of ones with size of commercials array
    ones_array = np.ones(commercials_length, dtype=int)

    # Concatenate arrays
    concatenated_array = np.concatenate((zeros_array, ones_array))

    return concatenated_array


podcast_length = pod_train_embeddings.shape[0]
ads_length = ad_train_embeddings.shape[0]

In [None]:
train_labels = concatenate_zeros_and_ones(pod_train_embeddings.shape[0], ad_train_embeddings.shape[0])
val_labels = concatenate_zeros_and_ones(pod_val_embeddings.shape[0], ad_val_embeddings.shape[0])
test_lables = concatenate_zeros_and_ones(pod_test_embeddings.shape[0], ad_test_embeddings.shape[0])

In [None]:
# Define sample weights
# Assuming you have a binary classification problem with labels 0 (podcast) and 1 (commercial)
# Assign weight 1 to podcast samples and weight 6 to commercial samples
sample_weights = {0: 1, 1: 1}

def train_svm_model(embeddings, labels, weights=None):
    """
    Train an SVM model using the provided embeddings and labels.

    Parameters:
    - embeddings: List of embeddings (list of arrays).
    - labels: List of corresponding labels.
    - weights: Dictionary specifying the class weights. Default is None.

    Returns:
    - model: Trained SVM model.
    """
    # Initialize SVM model with linear kernel and class weights
    model = SVC(kernel='linear', class_weight=weights)
    # Train the model
    model.fit(embeddings, labels)
    return model

In [None]:
# Train the SVM model with sample weights
model = train_svm_model(train_embeddings, train_labels, weights=sample_weights)

In [None]:
def validate_model(model, val_embeddings, val_labels):
    """
    Validate the trained model on a validation set.

    Parameters:
    - model: Trained machine learning model.
    - val_embeddings: Embeddings of the validation set.
    - val_labels: Labels of the validation set.

    Returns:
    - validation_accuracy: Accuracy of the model on the validation set.
    - predicted_labels: Predicted labels for the validation set.
    - actual_labels: Actual labels of the validation set.
    """
    # Make predictions on the validation set
    val_predictions = model.predict(val_embeddings)

    # Calculate validation accuracy
    validation_accuracy = accuracy_score(val_labels, val_predictions)

    # Print validation accuracy
    print("Validation Accuracy:", validation_accuracy)

    # Print predicted and actual labels for inspection
    print("Predicted labels:")
    print(val_predictions)
    print("Actual labels:")
    print(val_labels)

    # Return validation accuracy and predicted/actual labels
    return validation_accuracy, val_predictions, val_labels

In [None]:
# Call the function by passing the required arguments
validation_accuracy, validation_predicted_labels, validation_actual_labels = validate_model(model, val_embeddings, val_labels)

# Now you can use the returned values as needed
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 1.0
Predicted labels:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

In [None]:
def test_model(model, test_embeddings, test_labels):
    """
    Test the trained model on a separate test dataset.

    Parameters:
    - model: Trained machine learning model.
    - test_embeddings: Embeddings of the test dataset.
    - test_labels: Labels of the test dataset.

    Returns:
    - test_accuracy: Accuracy of the model on the test dataset.
    - predicted_labels: Predicted labels for the test dataset.
    - actual_labels: Actual labels of the test dataset.
    """
    # Make predictions on the test dataset
    predicted_labels = model.predict(test_embeddings)

    # Calculate test accuracy
    test_accuracy = accuracy_score(test_labels, predicted_labels)

    # Print test accuracy
    print("Test Accuracy:", test_accuracy)

    # Print predicted and actual labels for inspection
    print("Predicted labels:")
    print(predicted_labels)
    print("Actual labels:")
    print(test_labels)

    # Return test accuracy and predicted/actual labels
    return test_accuracy, predicted_labels, test_labels

In [None]:
# Call the function by passing the required arguments
test_accuracy, test_predicted_labels, test_actual_labels = test_model(model, test_embeddings, test_lables)

# Now you can use the returned values as needed
print("Test Accuracy:", test_accuracy)

Test Accuracy: 1.0
Predicted labels:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

In [None]:
def calculate_confusion_matrix(actual_labels, predicted_labels):
    """
    Calculate the confusion matrix based on actual and predicted labels.

    Parameters:
    actual_labels (list): List of actual labels (-1 for ads, 1 for podcasts).
    predicted_labels (list): List of predicted labels (-1 for ads, 1 for podcasts).

    Returns:
    tuple: A tuple containing true positives, false positives, true negatives, and false negatives.
    """
    true_positives = 0
    false_positives = 0
    true_negatives = 0
    false_negatives = 0

    for actual, predicted in zip(actual_labels, predicted_labels):
        if actual == 0 and predicted == 0:  # true_negatives
            true_negatives += 1
        elif actual == 1 and predicted == 0:  # false_negatives
            false_negatives += 1
        elif actual == 1 and predicted == 1:  # true_positives
            true_positives += 1
        elif actual == 0 and predicted == 1:  # false_positives
            false_positives += 1

    return true_positives, false_positives, true_negatives, false_negatives

In [None]:
# Call the function by passing the actual labels and predicted labels
true_positives, false_positives, true_negatives, false_negatives = calculate_confusion_matrix(test_lables, test_predicted_labels)

# Print the results
print("True Positives:", true_positives)
print("False Positives:", false_positives)
print("True Negatives:", true_negatives)
print("False Negatives:", false_negatives)

# Calculate evaluation values
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
specificity = true_negatives / (true_negatives + false_positives)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print the evaluation values
print("Precision:", precision)
print("Recall:", recall)
print("Specificity:", specificity)
print("F1 Score:", f1_score)

True Positives: 59
False Positives: 0
True Negatives: 625
False Negatives: 0
Precision: 1.0
Recall: 1.0
Specificity: 1.0
F1 Score: 1.0
