In [1]:
import os
import cv2
import random
import sklearn
import librosa
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
from sklearn import preprocessing

In [None]:
# https://towardsdatascience.com/data-augmentation-techniques-for-audio-data-in-python-15505483c63c
def spec_augment2(original_melspec,
                 freq_masking_max_percentage = 0.15, 
                 time_masking_max_percentage = 0.3):

    augmented_melspec = original_melspec.copy()
    all_frames_num, all_freqs_num = augmented_melspec.shape

    # Frequency masking
    freq_percentage = random.uniform(0.0, freq_masking_max_percentage)
    num_freqs_to_mask = int(freq_percentage * all_freqs_num)
    f0 = int(np.random.uniform(low = 0.0, high = (all_freqs_num - num_freqs_to_mask)))
    
    augmented_melspec[:, f0:(f0 + num_freqs_to_mask)] = 0

    # Time masking
    time_percentage = random.uniform(0.0, time_masking_max_percentage)
    num_frames_to_mask = int(time_percentage * all_frames_num)
    t0 = int(np.random.uniform(low = 0.0, high = (all_frames_num - num_frames_to_mask)))
    
    augmented_melspec[t0:(t0 + num_frames_to_mask), :] = 0
    
    return augmented_melspec

In [2]:
no_of_augmented_images_tobe_formed = 0


def mfcc_generate(audio,saving_location):
  samples, sample_rate = librosa.load(audio, sr=None)
  mfcc = librosa.feature.mfcc(y=samples, sr=sample_rate)

  # Center MFCC coefficient dimensions to the mean and unit variance
  mfcc = sklearn.preprocessing.scale(mfcc, axis=1)
  librosa.display.specshow(mfcc, sr=sample_rate)
  plt.savefig(saving_location, bbox_inches='tight', pad_inches=0)

###### below code is for augmentation

  for i in range(no_of_augmented_images_tobe_formed):
  #   spec_augment(mfcc,saving_location+'--augmented--'+str(i),freq_masking_max_percentage,time_masking_max_percentage)
    aug_mfcc = spec_augment2(mfcc)
    aug_saving_location = saving_location+'--augmented--'+str(i)
    
    # Plot CQT spectrogram
    librosa.display.specshow(aug_mfcc, sr=sample_rate)
    plt.savefig(aug_saving_location, bbox_inches='tight', pad_inches=0)

In [3]:
audio_folder_path = '/content/drive/MyDrive/Colab Notebooks/test2/new_dataset_2/'
saving_dir = '/content/drive/MyDrive/Colab Notebooks/test2/MFCC_techno&semi/'

audio_folder_path, saving_dir

('/content/drive/MyDrive/Colab Notebooks/test2/new_dataset_2/',
 '/content/drive/MyDrive/Colab Notebooks/test2/MFCC_techno&semi/')

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
word_names = os.listdir(audio_folder_path)
word_names
for word in word_names:
  audio_path = audio_folder_path + word
  audio_names = os.listdir(audio_path)

  if not os.path.exists(saving_dir+word):
    os.mkdir(saving_dir+word)

  # print(audio_names)
  for audio in audio_names:
    print(audio)
    saving_location = saving_dir+word+'/'+audio.split('.')[0]
    mfcc_generate(audio_path+'/'+audio,saving_location)