In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install openl3
!pip install librosa

Collecting openl3
  Downloading openl3-0.4.2.tar.gz (29 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting kapre>=0.3.5 (from openl3)
  Downloading kapre-0.3.7.tar.gz (26 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting resampy<0.3.0,>=0.2.1 (from openl3)
  Downloading resampy-0.2.2.tar.gz (323 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.4/323.4 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: openl3, kapre, resampy
  Building wheel for openl3 (setup.py) ... [?25l[?25hdone
  Created wheel for openl3: filename=openl3-0.4.2-py2.py3-none-any.whl size=249327032 sha256=344074772d0167314ac82008a13b866daaf80af19b3b8ce3e079668b2be56e64
  Stored in directory: /root/.cache/pip/wheels/d0/4d/0a/e57b1dc8ead91b3c5709d9de4f02

# **Import relevant libraries**
Let's transition to the next phase by importing pertinent libraries essential for our tasks. These libraries will facilitate operations related to the operating system, file handling, audio file manipulation, audio embedding, and classification tasks.

In [4]:
# Import relevant libraries to work with files and with the operating system
import zipfile
import os

# Import relevant libraries for working with audio files as well as audio
# embedding
import librosa
import openl3


In [5]:
import numpy as np

# ***Extracting Audio Embeddings***
Now that we have successfully loaded our audio data, the next step is to extract meaningful embeddings from it. This function - **"extract_audio_embedding"** employs OpenL3 to extract audio embeddings from the input audio data. Let's explore how it's done.

In [6]:
def extract_audio_embedding(audio_data, sample_rate):
    """
    Extract audio embeddings using OpenL3.

    Parameters:
    - audio_data: The audio signal.
    - sample_rate: The sample rate of the audio signal.

    Returns:
    - flat_embedding: The flattened audio embedding.
    """

    # Set the desired embedding size
    embedding_size = 512

    try:
        # Extract embeddings using OpenL3
        embedding, _ = openl3.get_audio_embedding(audio_data, sample_rate,
                                                  content_type="music",
                                                  embedding_size=embedding_size)

        # Flatten the embedding
        flat_embedding = np.ravel(embedding)

        return flat_embedding

    except Exception as e:
        # Handle any errors that might occur during the embedding
        # extraction process
        print("Error occurred during audio embedding extraction:", e)
        return None

# **Loading Audio Files**
Before processing any audio data, it's crucial to load the audio files correctly. This function, load_audio, accomplishes precisely that. It takes a file path as input and returns the audio signal along with its sample rate. Let's delve into the details of this function.

In [7]:
def load_audio(file_path):
    """
    Load an audio file and return the audio data and sample rate.

    Parameters:
    - file_path: The path to the audio file.

    Returns:
    - audio_data: The audio signal.
    - sample_rate: The sample rate of the audio file.
    """
    try:
        # Load the audio file using librosa
        audio_data, sample_rate = librosa.load(file_path, sr=None)

        return audio_data, sample_rate
    except Exception as e:
        # Handle any errors that might occur during the loading process
        print("Error occurred during audio loading:", e)
        return None, None

Loading Audio Data for a 5 sec sample

In [8]:

def convert_file_to_embedding(file_name, file_path):

  if file_name.endswith('.wav'):  # Process only WAV audio files
      duration = librosa.get_duration(filename=file_path)
      if duration < 5.0:  # Check duration
          return 0
      audio_data, sample_rate = load_audio(file_path)  # Define load_audio function
      embedding = extract_audio_embedding(audio_data, sample_rate)  # Define extract_audio_embedding function


  return embedding




Path to the audio samples of one second each

In [9]:
ad_demo_embedding = convert_file_to_embedding("ad_segment.wav","/content/drive/MyDrive/AD-Blocker Project/DEMO files/ad_segment.wav")

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=file_path)




In [10]:
first_pod_demo_embedding = convert_file_to_embedding("first_pod_segment.wav","/content/drive/MyDrive/AD-Blocker Project/DEMO files/first_pod_segment.wav")

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=file_path)




In [11]:
second_pod_demo_embedding = convert_file_to_embedding("second_pod_segment.wav","/content/drive/MyDrive/AD-Blocker Project/DEMO files/second_pod_segment.wav")

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=file_path)




In [13]:
demo_embeddings_pap = [first_pod_demo_embedding , ad_demo_embedding, second_pod_demo_embedding]

In [14]:
import joblib

# Save the trained model to a file
joblib_file = "OpenL3_for_demo.pkl"
joblib.dump(demo_embeddings_pap, joblib_file)


['OpenL3_for_demo.pkl']

In [15]:
from google.colab import files

# Download the file
files.download(joblib_file)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>