# ***Establishing Connection to Google Drive***

To initiate the project, the primary step entails establishing a seamless connection to Google Drive. This connection is pivotal for accessing and utilizing the requisite files and datasets essential for the project's execution.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# ***Importing Essential Libraries***

Following the establishment of the Google Drive connection, the subsequent step involves importing the essential libraries necessary for executing the code. These libraries serve as the foundational framework, providing the functionality and tools required to implement various tasks and analyses within the project.

In [None]:
import os
import librosa
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

# ***Loading Data for Processing and Testing***

In this pivotal stage, we load all pertinent data into the project environment for comprehensive processing and testing. By importing the datasets integral to our analysis, we ensure a robust foundation for conducting experiments and evaluations crucial to the project's objectives.

In [None]:

# for 1 second vggish
#pod_train_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_1sec/dast_1sec_pod_train_embeddings.npy")[:2574,:]
pod_train_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_5sec/dast_5sec_pod_train_embeddings.npy")
# for 10 second vggish
# pod_train_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/vggish_10sec/vggish_10sec_pod_train_embeddings.npy")
# ad_train_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/vggish_10sec/vggish_10sec_ad_train_embeddings.npy")

# pod_val_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/vggish_10sec/vggish_10sec_pod_val_embeddings.npy")
# ad_val_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/vggish_10sec/vggish_10sec_ad_val_embeddings.npy")

# pod_test_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/vggish_10sec/vggish_10sec_pod_test_embeddings.npy")
# ad_test_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/vggish_10sec/vggish_10sec_ad_test_embeddings.npy")

In [None]:
from sklearn.decomposition import PCA

# Define the function to reduce dimensions using PCA
def reduce_dimensions(embeddings, n_components=320):
    pca = PCA(n_components=n_components)
    reduced_embeddings = pca.fit_transform(embeddings)
    return reduced_embeddings

In [None]:
print("Original embeddings shape:", pod_train_embeddings.shape)


# Apply dimensionality reduction
reduced_pod_train_embeddings = reduce_dimensions(pod_train_embeddings, n_components=320)

# Check the shape of the reduced embeddings
print("Reduced embeddings shape:", reduced_pod_train_embeddings.shape)

Original embeddings shape: (3284, 131072)
Reduced embeddings shape: (3284, 320)


In [None]:
del pod_train_embeddings
pod_train_embeddings = reduced_pod_train_embeddings


In [None]:
#ad_train_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_1sec/dast_1sec_ad_train_embeddings.npy")
ad_train_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_5sec/dast_5sec_ad_train_embeddings.npy")

In [None]:
print("Original embeddings shape:", ad_train_embeddings.shape)

# Apply dimensionality reduction
reduced_ad_train_embeddings = reduce_dimensions(ad_train_embeddings, n_components=320)

# Check the shape of the reduced embeddings
print("Reduced embeddings shape:", reduced_ad_train_embeddings.shape)

Original embeddings shape: (502, 131072)
Reduced embeddings shape: (502, 320)


In [None]:
del ad_train_embeddings
ad_train_embeddings = reduced_ad_train_embeddings
del reduced_ad_train_embeddings

In [None]:
print(pod_train_embeddings.shape)
# for elem in pod_train_embeddings:
#   print(elem.shape)
print(ad_train_embeddings.shape)

(3284, 320)
(502, 320)


In [None]:
train_embeddings = np.concatenate((pod_train_embeddings, ad_train_embeddings), axis=0)
# val_embeddings = np.concatenate((pod_val_embeddings, ad_val_embeddings))
# test_embeddings = np.concatenate((pod_test_embeddings, ad_test_embeddings))

In [None]:
def concatenate_zeros_and_ones(podcast_length, commercials_length):
    # Create array of zeros with size of podcast array
    zeros_array = np.zeros(podcast_length, dtype=int)

    # Create array of ones with size of commercials array
    ones_array = np.ones(commercials_length, dtype=int)

    # Concatenate arrays
    concatenated_array = np.concatenate((zeros_array, ones_array))

    return concatenated_array


# podcast_length = pod_train_embeddings.shape[0]
# ads_length = ad_train_embeddings.shape[0]


In [None]:
train_labels = concatenate_zeros_and_ones(3284, 502)
# val_labels = concatenate_zeros_and_ones(pod_val_embeddings.shape[0], ad_val_embeddings.shape[0])
# test_labels = concatenate_zeros_and_ones(pod_test_embeddings.shape[0], ad_test_embeddings.shape[0])

# ***Training Support Vector Machine (SVM) Model***

In this critical phase, we commence the training process for our SVM model by executing the data allocated to the training set. Prior to this, we crafted a function designed to furnish us with the requisite model. Subsequently, invoking this function enables us to obtain the model tailored to our specifications, facilitating the subsequent stages of our analysis.

In [None]:
# Define sample weights
# Assuming you have a binary classification problem with labels 0 (podcast) and 1 (commercial)
# Assign weight 1 to podcast samples and weight 6 to commercial samples
sample_weights = {0: 1, 1: 6}

def train_svm_model(embeddings, labels, weights=None):
    """
    Train an SVM model using the provided embeddings and labels.

    Parameters:
    - embeddings: List of embeddings (list of arrays).
    - labels: List of corresponding labels.
    - weights: Dictionary specifying the class weights. Default is None.

    Returns:
    - model: Trained SVM model.
    """
    # Initialize SVM model with linear kernel and class weights
    model = SVC(kernel='linear', class_weight=weights)
    # Train the model
    model.fit(embeddings, labels)
    return model


In [None]:
# from IPython import get_ipython

# ipython = get_ipython()
# ipython.magic('reset -sf')  # Resets the namespace by removing all names defined by the user


In [None]:
# Train the SVM model with sample weights
model = train_svm_model(train_embeddings, train_labels, weights=sample_weights)

In [None]:
del train_embeddings
del train_labels

In [None]:

# pod_val_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_1sec/dast_1sec_pod_val_embeddings.npy")
# ad_val_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_1sec/dast_1sec_ad_val_embeddings.npy")

pod_val_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_5sec/dast_5sec_pod_val_embeddings.npy")
ad_val_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_5sec/dast_5sec_ad_val_embeddings.npy")



In [None]:
print("Original embeddings shape:", pod_val_embeddings.shape)
print("Original embeddings shape:", ad_val_embeddings.shape)

# Apply dimensionality reduction
reduced_pod_val_embeddings = reduce_dimensions(pod_val_embeddings, n_components=320)
reduced_ad_val_embeddings = reduce_dimensions(ad_val_embeddings, n_components=320)
# Check the shape of the reduced embeddings
print("Reduced embeddings shape:", reduced_pod_val_embeddings.shape)
print("Reduced embeddings shape:", reduced_ad_val_embeddings.shape)

Original embeddings shape: (2056, 131072)
Original embeddings shape: (321, 131072)
Reduced embeddings shape: (2056, 320)
Reduced embeddings shape: (321, 320)


In [None]:
val_embeddings = np.concatenate((reduced_pod_val_embeddings, reduced_ad_val_embeddings), axis=0)

NameError: name 'reduced_pod_val_embeddings' is not defined

In [None]:
del reduced_pod_val_embeddings
del reduced_ad_val_embeddings

In [None]:

pod_test_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_1sec/dast_1sec_pod_test_embeddings.npy")
ad_test_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_1sec/dast_1sec_ad_test_embeddings.npy")

pod_test_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_5sec/dast_5sec_pod_test_embeddings.npy")
ad_test_embeddings = np.load("/content/drive/MyDrive/AD-Blocker Project/embeddings/dast_5sec/dast_5sec_ad_test_embeddings.npy")



In [None]:
print("Original embeddings shape:", pod_test_embeddings.shape)
print("Original embeddings shape:", ad_test_embeddings.shape)

# Apply dimensionality reduction
reduced_pod_test_embeddings = reduce_dimensions(pod_test_embeddings, n_components=320)
reduced_ad_test_embeddings = reduce_dimensions(ad_test_embeddings, n_components=320)
# Check the shape of the reduced embeddings
print("Reduced embeddings shape:", reduced_pod_test_embeddings.shape)
print("Reduced embeddings shape:", reduced_ad_test_embeddings.shape)

Original embeddings shape: (2057, 131072)
Original embeddings shape: (323, 131072)
Reduced embeddings shape: (2057, 320)
Reduced embeddings shape: (323, 320)


In [None]:
test_embeddings = np.concatenate((reduced_pod_test_embeddings, reduced_ad_test_embeddings), axis=0)

In [None]:
del pod_val_embeddings
del ad_val_embeddings
del pod_test_embeddings
del ad_test_embeddings

In [None]:
val_labels = concatenate_zeros_and_ones(2056 ,321 )
test_labels = concatenate_zeros_and_ones(2057, 323)
# val_labels = concatenate_zeros_and_ones(pod_val_embeddings.shape[0], ad_val_embeddings.shape[0])
# test_labels = concatenate_zeros_and_ones(pod_test_embeddings.shape[0], ad_test_embeddings.shape[0])

# ***Model Validation and Adjustment***

In this pivotal stage, we evaluate the performance of our model by running the data assigned to the validation set. This step enables us to conduct a thorough examination of the model's efficacy and identify any necessary adjustments. By scrutinizing the model's performance against validation data, we iteratively refine its parameters to enhance its accuracy and robustness.

In [None]:
def validate_model(model, val_embeddings, val_labels):
    """
    Validate the trained model on a validation set.

    Parameters:
    - model: Trained machine learning model.
    - val_embeddings: Embeddings of the validation set.
    - val_labels: Labels of the validation set.

    Returns:
    - validation_accuracy: Accuracy of the model on the validation set.
    - predicted_labels: Predicted labels for the validation set.
    - actual_labels: Actual labels of the validation set.
    """
    # Make predictions on the validation set
    val_predictions = model.predict(val_embeddings)

    # Calculate validation accuracy
    validation_accuracy = accuracy_score(val_labels, val_predictions)

    # Print validation accuracy
    print("Validation Accuracy:", validation_accuracy)

    # Print predicted and actual labels for inspection
    print("Predicted labels:")
    print(val_predictions)
    print("Actual labels:")
    print(val_labels)

    # Return validation accuracy and predicted/actual labels
    return validation_accuracy, val_predictions, val_labels

In [None]:
# Call the function by passing the required arguments
validation_accuracy, validation_predicted_labels, validation_actual_labels = validate_model(model, val_embeddings, val_labels)

# Now you can use the returned values as needed
print("Validation Accuracy:", validation_accuracy)

Validation Accuracy: 0.48548590660496427
Predicted labels:
[0 1 1 ... 1 0 0]
Actual labels:
[0 0 0 ... 1 1 1]
Validation Accuracy: 0.48548590660496427


# ***Final Model Evaluation with Test Data***

In this culminating phase, we subject the test data to our final model for comprehensive evaluation of its performance on real-world datasets. This step allows us to ascertain the effectiveness and generalizability of our model beyond the training and validation stages. By rigorously scrutinizing the model's performance against unseen data, we derive insights into its real-world applicability and overall efficacy.

In [None]:
def test_model(model, test_embeddings, test_labels):
    """
    Test the trained model on a separate test dataset.

    Parameters:
    - model: Trained machine learning model.
    - test_embeddings: Embeddings of the test dataset.
    - test_labels: Labels of the test dataset.

    Returns:
    - test_accuracy: Accuracy of the model on the test dataset.
    - predicted_labels: Predicted labels for the test dataset.
    - actual_labels: Actual labels of the test dataset.
    """
    # Make predictions on the test dataset
    predicted_labels = model.predict(test_embeddings)

    # Calculate test accuracy
    test_accuracy = accuracy_score(test_labels, predicted_labels)

    # Print test accuracy
    print("Test Accuracy:", test_accuracy)

    # Print predicted and actual labels for inspection
    print("Predicted labels:")
    print(predicted_labels)
    print("Actual labels:")
    print(test_labels)

    # Return test accuracy and predicted/actual labels
    return test_accuracy, predicted_labels, test_labels

In [None]:
# Call the function by passing the required arguments
test_accuracy, test_predicted_labels, test_actual_labels = test_model(model, test_embeddings, test_labels)

# Now you can use the returned values as needed
print("Test Accuracy:", test_accuracy)

Test Accuracy: 0.47478991596638653
Predicted labels:
[1 0 0 ... 0 1 1]
Actual labels:
[0 0 0 ... 1 1 1]
Test Accuracy: 0.47478991596638653


# ***Presentation of Model Evaluation Metrics***

In this critical juncture, we showcase a comprehensive array of evaluation metrics meticulously designed to gauge the efficacy and performance of our model. Through the presentation of these metrics, including but not limited to accuracy, precision, recall, and F1-score, we offer a nuanced understanding of the model's strengths and limitations. This holistic evaluation serves to validate the model's efficacy and aids in informing future iterations or enhancements.

In [None]:
# def calculate_confusion_matrix(actual_labels, predicted_labels):
#     """
#     Calculate the confusion matrix metrics.

#     Parameters:
#     - actual_labels: Actual labels.
#     - predicted_labels: Predicted labels.

#     Returns:
#     - true_positives: Number of true positives.
#     - false_positives: Number of false positives.
#     - true_negatives: Number of true negatives.
#     - false_negatives: Number of false negatives.
#     """
#     true_positives = 0
#     false_positives = 0
#     true_negatives = 0
#     false_negatives = 0

#     for actual, predicted in zip(actual_labels, predicted_labels):
#         if actual == 0:
#             if predicted == 0:
#                 true_positives += 1
#             else:
#                 false_negatives += 1
#         else:
#             if predicted == 0:
#                 false_positives += 1
#             else:
#                 true_negatives += 1

#     return true_positives, false_positives, true_negatives, false_negatives

In [None]:
def calculate_confusion_matrix(actual_labels, predicted_labels):
    """
    Calculate the confusion matrix based on actual and predicted labels.

    Parameters:
    actual_labels (list): List of actual labels (-1 for ads, 1 for podcasts).
    predicted_labels (list): List of predicted labels (-1 for ads, 1 for podcasts).

    Returns:
    tuple: A tuple containing true positives, false positives, true negatives, and false negatives.
    """
    true_positives = 0
    false_positives = 0
    true_negatives = 0
    false_negatives = 0

    for actual, predicted in zip(actual_labels, predicted_labels):
        if actual == 0 and predicted == 0:  # true_negatives
            true_negatives += 1
        elif actual == 1 and predicted == 0:  # false_negatives
            false_negatives += 1
        elif actual == 1 and predicted == 1:  # true_positives
            true_positives += 1
        elif actual == 0 and predicted == 1:  # false_positives
            false_positives += 1

    return true_positives, false_positives, true_negatives, false_negatives

In [None]:
# def calculate_confusion_matrix(actual_labels, predicted_labels):
#     tp = sum([1 for actual, predicted in zip(actual_labels, predicted_labels) if actual == 0 and predicted == 0])
#     fn = sum([1 for actual, predicted in zip(actual_labels, predicted_labels) if actual == 0 and predicted != 0])
#     fp = sum([1 for actual, predicted in zip(actual_labels, predicted_labels) if actual != 0 and predicted == 0])
#     tn = sum([1 for actual, predicted in zip(actual_labels, predicted_labels) if actual != 0 and predicted != 0])
#     return tp, fn, fp, tn

In [None]:
# Call the function by passing the actual labels and predicted labels
true_positives, false_positives, true_negatives, false_negatives = calculate_confusion_matrix(test_labels, test_predicted_labels)

# Print the results
print("True Positives:", true_positives)
print("False Positives:", false_positives)
print("True Negatives:", true_negatives)
print("False Negatives:", false_negatives)

# Calculate evaluation values
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
specificity = true_negatives / (true_negatives + false_positives)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print the evaluation values
print("Precision:", precision)
print("Recall:", recall)
print("Specificity:", specificity)
print("F1 Score:", f1_score)

True Positives: 168
False Positives: 1095
True Negatives: 962
False Negatives: 155
Precision: 0.1330166270783848
Recall: 0.5201238390092879
Specificity: 0.467671366067088
F1 Score: 0.21185372005044137
