In [26]:
import os
import cv2
import random
import numpy as np
import librosa
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas

In [None]:
# Code copied and edited from https://www.kaggle.com/code/davids1992/specaugment-quick-implementation

def spec_augment(original_melspec,
                 freq_masking_max_percentage = 0.08, 
                 time_masking_max_percentage = 0.15):

    augmented_melspec = original_melspec.copy()
    all_frames_num, all_freqs_num = augmented_melspec.shape

    # Frequency masking
    freq_percentage = random.uniform(0.0, freq_masking_max_percentage)
    num_freqs_to_mask = int(freq_percentage * all_freqs_num)
    f0 = int(np.random.uniform(low = 0.0, high = (all_freqs_num - num_freqs_to_mask)))
    
    augmented_melspec[:, f0:(f0 + num_freqs_to_mask)] = 0

    # Time masking
    time_percentage = random.uniform(0.0, time_masking_max_percentage)
    num_frames_to_mask = int(time_percentage * all_frames_num)
    t0 = int(np.random.uniform(low = 0.0, high = (all_frames_num - num_frames_to_mask)))
    
    augmented_melspec[t0:(t0 + num_frames_to_mask), :] = 0
    
    return augmented_melspec

In [None]:
audio = '/content/intra (2).m4a'

y,sr = librosa.load(audio)
mels = librosa.feature.melspectrogram(y=y,sr=sr)
p = plt.imshow(librosa.power_to_db(mels,ref=np.max))

In [None]:
aug_mels=spec_augment(mels)
p = plt.imshow(librosa.power_to_db(aug_mels,ref=np.max))