<a href="https://colab.research.google.com/github/pietrodileo/Python_for_MD_thesis/blob/main/CreateMelSpectrogram_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🔊 **Audio Signal Visualization** 
## Generate easily different plot from audio signals stored in Google Drive

In [366]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Import libraries 📚

In [367]:
#importing the libraries
import os 
import cv2
import numpy as np
import matplotlib
#This backend of matplotlib doesn't show plots to the user, but we can save them to Google Drive
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pdb
import librosa
import librosa.display
from scipy.io import wavfile
import seaborn as sns
from pathlib import Path
import pylab
import sys
import soundfile as sf
import tensorflow as tf
!pip install audiomentations
import torch
import torchaudio
from torchaudio import transforms
import IPython.display as ipd
# Define augmentation functions
!pip install pyroomacoustics
from audiomentations import AddGaussianNoise, PitchShift, TimeStretch, RoomSimulator
from scipy import signal
from scipy.signal import butter, lfilter, freqz, filtfilt
!pip install pedalboard
from pedalboard import Pedalboard, Reverb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# ➗ Define local functions


## Control functions 
In this section: the functions that define the outputPath and the analysis selection

In [368]:
def outputPath(subfolder,filename,OUTPUT_DIR,destination):
  file_path = os.path.join(subfolder, filename)
  file_stem = Path(subfolder).stem
  target_dir = f'class_{file_stem}'
  destination_dir = os.path.join(os.path.join(OUTPUT_DIR, destination), target_dir)
  # generate image name
  file_stem = Path(file_path).stem
  imageName = os.path.join(destination_dir, file_stem)
  return file_path, destination_dir,imageName;

## Plot Spectrogram and Mel-Spectrogram functions



In [369]:
def MelSpectrogramPlot(y,sample_rate,save_plot,imageName,overwriteControl,CMAP):
  
  M = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_fft=1024) # non overlap = 512
  M_db = librosa.power_to_db(M, ref=np.max)

  outputName = f'{imageName}_regular.png'
  if overwriteControl == 1 and os.path.exists(outputName):
    return M_db

  fig1 = plt.figure()

  plt.figure(figsize=(20, 20), frameon=False)
  librosa.display.specshow(M_db, sr=sample_rate, x_axis='time', 
                           y_axis='mel',cmap=CMAP)
  #plt.colorbar()
  plt.clim(-80,0)  # identical to caxis([-4,4]) in MATLAB
  plt.ylim([0, 12000])
  plt.axis('off')
  
  # save the plot
  if save_plot == 1:
    fig1 = plt.gcf()
    pylab.savefig(outputName,bbox_inches='tight',pad_inches=0)
    plt.cla()
    pylab.close()
    plt.close(fig1)
  
  return M_db

# 🏗 Data Normalization 

In [370]:
# Data normalization function
from audiomentations import Normalize, Compose
# data normalization
DataNormalization = Compose([Normalize(p=1.0)])  

# 🎨 Data augmentation Functions

## Data Augmentation to the Signal

In [371]:
def DataAugmentation(data,sample_rate,save_plot,imageName,overwriteControl,CMAP,result):
  ## 1. Data Augmentation to the signal
  Signal_Augmentation(data,sample_rate,save_plot,imageName,overwriteControl,CMAP)
  
  ## 2. Data Augmentation to the spectrogram
  # Freq masking
  paramf = 10 
  FreqBandNum = 5
  FrequencyMasking(result,paramf,FreqBandNum,sample_rate,
                   save_plot,imageName,overwriteControl,CMAP)
  # Time masking
  paramt = 80
  TimeBandNum = 30
  TimeMasking(result,paramt,TimeBandNum,sample_rate,
                  save_plot,imageName,overwriteControl,CMAP)
  # Time + Frequency
  param = 20
  FreqBandNum = 3
  TimeBandNum = 5
  Freq_and_Time_Masking(result,param,TimeBandNum,FreqBandNum,sample_rate,
                        save_plot,imageName,overwriteControl,CMAP)

In [372]:
def my_GaussianNoiseAddition(data, sample_rate):
  # Play audio in Jupiter Notebook / Google Colab
  #ipd.Audio('audio/conga_groove.wav') # load a local WAV file
  #ipd.Audio(data, rate=sample_rate) # load a NumPy array
  rms = np.sqrt(np.mean(data**2))
  # Noise is 5% of RMS value of the signal
  noiseMean = 0.05*rms
  noiseLen = len(data)
  stdDevNoise = 0.05*np.std(data)
  noise = np.random.normal(noiseMean,stdDevNoise,noiseLen)
  # noiseMean is the mean of the normal distribution you are choosing from
  # stdDevNoise is the standard deviation of the normal distribution
  # noiseLen is the number of elements you get in array noise
  noisyData = data+noise
  #ipd.Audio(data2, rate=sample_rate) # load a NumPy array
  return noisyData

In [373]:
def Signal_Augmentation(data,sample_rate,save_plot,imageName,overwriteControl,CMAP):
   
  # shift pitch (shift a random number of semitones between min_semitones and max_semitones)
  pitchShifting = Compose([PitchShift(min_semitones = -2, max_semitones = -2, p=1)])

  # stretch time
  timeStretch = Compose([TimeStretch(min_rate=0.8,max_rate=1.25,leave_length_unchanged=True,p=1.0)])

  # Make a Pedalboard object, containing a reverb plugins:
  board = Pedalboard([Reverb(room_size=0.25)])

  # Save plot = 0 or the picture will overwrite the non-augmented one
  # also overwritecontrol = 0, because it is not necessary in this case
  outputName = f'{imageName}_GaussianNoise.png'
  if not(overwriteControl == 1 and os.path.exists(outputName)): 
    # For noise addition I use my own function
    noisy_signal = my_GaussianNoiseAddition(data,sample_rate)
    MelSpectrogramPlot(noisy_signal,sample_rate,0,imageName, 0,CMAP)
    if save_plot == 1:
      fig = plt.gcf()
      pylab.savefig(outputName,bbox_inches='tight',pad_inches=0)
      pylab.close()
 
  outputName = f'{imageName}_PitchShifted.png'
  if not(overwriteControl == 1 and os.path.exists(outputName)): 
    # Pitch Shifting
    pitchy_signal = pitchShifting(data,sample_rate)
    MelSpectrogramPlot(pitchy_signal,sample_rate,0,imageName, 0,CMAP)
    if save_plot == 1:
      fig = plt.gcf()
      pylab.savefig(outputName,bbox_inches='tight',pad_inches=0)
      pylab.close()

  outputName = f'{imageName}_Stretched.png'
  if not(overwriteControl == 1 and os.path.exists(outputName)): 
    stretchy_signal = timeStretch(data,sample_rate)
    MelSpectrogramPlot(stretchy_signal,sample_rate,0,imageName, 0,CMAP)
    if save_plot == 1:
      fig = plt.gcf()
      pylab.savefig(outputName,bbox_inches='tight',pad_inches=0)
      pylab.close()
  
  outputName = f'{imageName}_LargeRoom.png'
  if not(overwriteControl == 1 and os.path.exists(outputName)): 
    # Add Reverb, see pedalboard by spotify
    # Run the audio through this pedalboard!
    roomSim_signal = board(data,sample_rate)
    ipd.Audio(data, rate=sample_rate)
    ipd.Audio(roomSim_signal, rate=sample_rate)
    MelSpectrogramPlot(roomSim_signal,sample_rate,0,imageName, 0,CMAP)
    if save_plot == 1:
      fig = plt.gcf()
      pylab.savefig(outputName,bbox_inches='tight',pad_inches=0)
      pylab.close()

##Data Augmentation to the Spectrogram/Plot 🤿

### Time Masking

In [374]:
def my_time_mask(input, param, TimeNum, name=None):
    """
    Apply masking to a spectrogram in the freq domain.
    Args:
      input: An audio spectogram.
      param: Parameter of masking.
      name: A name for the operation (optional).
    Returns:
      A tensor of spectrogram.
    """
    input = tf.convert_to_tensor(input)
    minElement = tf.reduce_min(input)
    # TODO: Support audio with channel > 1.
    time_max = tf.shape(input)[1]
    # calculate the indexes for all the frequencies
    indices = tf.reshape(tf.range(time_max), (1, -1))
    for x in range(TimeNum):
      # param is the max value of a uniform random distribution
      t = tf.random.uniform(shape=(), minval=0, 
                            maxval=param, dtype=tf.dtypes.int32)
      # t0 is a random selected number from a uniform distribution 
      # with range (0, time_max-t)
      t0 = tf.random.uniform(shape=(), minval=0, 
                             maxval=time_max + t, dtype=tf.dtypes.int32)
      # select all the frequencies greater or equal to t0
      cond1 = tf.math.greater_equal(indices, t0)
      # select all the frequencies smaller than t0 + t
      cond2 = tf.math.less(indices, t0 + t)
      # select all the frequencies between t0 and t0 + t
      condition = tf.math.logical_and(cond1, cond2)
      time_mask = tf.where(condition,  minElement, input)
    return time_mask

In [375]:
def TimeMasking(result,param,TimeNum,sample_rate,save_plot,imageName,overwriteControl,CMAP):
  
  outputName = f'{imageName}_TimeMasked.png'
  if overwriteControl == 1 and os.path.exists(outputName):
    return 
  
  # Freq masking
  # Convert ndarray representing the spectrogram to a tensor for pytorch
  result_tensor = torch.from_numpy(result)        
  time_mask = my_time_mask(result_tensor, param, TimeNum)
  plt.figure(figsize=(20, 20),frameon=False)

  # TO DO: now the plot is limited to mel-spectrogram, define data augmentation also for MFCC, spectrogram, etc..
  librosa.display.specshow(time_mask.numpy(), sr=sample_rate, x_axis='time', 
                           y_axis='mel',cmap=CMAP)   
  #plt.colorbar()
  plt.clim(-80,0)  # identical to caxis([-4,4]) in MATLAB
  plt.ylim([0, 12000])
  plt.axis('off')   
  
  if save_plot == 1:
    fig2 = plt.gcf()
    pylab.savefig(outputName,bbox_inches='tight',pad_inches=0)
    pylab.close()

### Frequency Masking

In [376]:
def my_freq_mask(input, param, FreqNum, name=None):
    """
    Apply masking to a spectrogram in the time domain.
    Args:
      input: An audio spectogram.
      param: Parameter of masking.
      name: A name for the operation (optional).
    Returns:
      A tensor of spectrogram.
    """
    # code is quite similar to my_freq_mask
    input = tf.convert_to_tensor(input)
    minElement = tf.reduce_min(input)
    # TODO: Support audio with channel > 1.
    freq_max = tf.shape(input)[0]
    indices = tf.reshape(tf.range(freq_max), (-1, 1))
    for x in range(FreqNum):
      f = tf.random.uniform(shape=(), minval=0, 
                          maxval=param, dtype=tf.dtypes.int32)
      f0 = tf.random.uniform(shape=(), minval=0,
                          maxval=freq_max - f, dtype=tf.dtypes.int32)
      cond1 = tf.math.greater_equal(indices, f0)
      cond2 = tf.math.less(indices, f0 + f)
      condition = tf.math.logical_and(cond1, cond2)
      masking = tf.where(condition, minElement, input)
      input = masking
    return masking

In [377]:
def FrequencyMasking(result,param,FreqNum,sample_rate,save_plot,imageName,overwriteControl,CMAP):
  
  outputName = f'{imageName}_FreqMasked.png'
  if overwriteControl == 1 and os.path.exists(outputName):
    return 
  
  # Time masking
  # Convert ndarray representing the spectrogram to a tensor for pytorch
  result_tensor = torch.from_numpy(result)        
  freq_mask = my_freq_mask(result_tensor, param, FreqNum)
  
  plt.figure(figsize=(20, 20), frameon=False)
  librosa.display.specshow(freq_mask.numpy(), sr=sample_rate, x_axis='time', 
                           y_axis='mel',cmap=CMAP)   
  #plt.colorbar()
  plt.clim(-80,0)  # identical to caxis([-4,4]) in MATLAB
  plt.ylim([0, 12000])
  plt.axis('off')   
  
  if save_plot == 1:
    fig2 = plt.gcf()
    pylab.savefig(outputName,bbox_inches='tight',pad_inches=0)
    pylab.close()

### Time + Frequency Masking 

In [378]:
def Freq_and_Time_Masking(result,param,TimeNum,FreqNum,sample_rate,save_plot,imageName,overwriteControl,CMAP):
  
  outputName = f'{imageName}_Time_and_Freq_Masked.png'
  if overwriteControl == 1 and os.path.exists(outputName):
    return 
  
  # Time masking
  # Convert ndarray representing the spectrogram to a tensor for pytorch
  result_tensor = torch.from_numpy(result)        
  time_mask = my_time_mask(result_tensor, param, TimeNum)
  
  # Freq masking
  # Convert ndarray representing the spectrogram to a tensor for pytorch
  freq_mask = my_freq_mask(time_mask, param, FreqNum)

  plt.figure(figsize=(20, 20),frameon=False)
  librosa.display.specshow(freq_mask.numpy(), sr=sample_rate, x_axis='time', 
                           y_axis='mel',cmap=CMAP)   
  #plt.colorbar()
  plt.clim(-80,0)  # identical to caxis([-4,4]) in MATLAB
  plt.ylim([0, 12000])
  plt.axis('off')   
  
  if save_plot == 1:
    fig = plt.gcf()
    pylab.savefig(outputName,bbox_inches='tight',pad_inches=0)
    pylab.close()

# ⛹ Define Input and Output directory 

In [379]:
fileName = 'DatasetTesiFinale_vowelE_FineCut_Short_perColab'
zipfile = fileName + '.zip'
directory = '/content/drive/MyDrive/TesiMagistrale/'

zipPath = os.path.join(directory,zipfile)
OUTPUT_DIR = os.path.join(directory,'outputSpectrogram')

if not os.path.exists(OUTPUT_DIR):
   # Create a new directory because it does not exist
   os.makedirs(OUTPUT_DIR)
   print("The output folder has been created!")

# Location of Zip File
drive_path = zipPath
local_path = '/content'

zipCopyPath = os.path.join(local_path,zipfile)
if not os.path.exists(zipCopyPath):
  # Copy the zip file and move it up one level (AKA out of the drive folder)
  !cp '{drive_path}' .
else:
  print('Files already transferred from Drive')

if not os.path.exists(fileName):
  # Navigate to the copied file and unzip it quietly
  os.chdir(local_path)
  !unzip -q '{zipfile}'
else:
  print('Files already unzipped')
# change directory to the new one
INPUT_DIR = os.path.join(local_path,fileName)

Files already transferred from Drive
Files already unzipped


# 🔎 Plot Mel-Spectrogram




In [380]:
destination = 'Mel-Spectrogram' 
save_plot = 1; # if 1, save plot on Google Drive
overwriteControl = 0;
CMAP = 'jet'
#plasma, jet, Greys, Greys_r (reverse), ...

# 🏋 Choose whether to augment the data or not

In [381]:
doAugmentation = True

If doesn't exists, make output directory

In [382]:
# if not os.path.exists(os.path.join(OUTPUT_DIR, destination)):
#     os.makedirs(os.path.join(OUTPUT_DIR, destination))

# ✔ Generate Plot! 

In [None]:
valid_formats = [".wav"]
for folders in os.listdir(INPUT_DIR):
  # select a subfolder
  subfolder = os.path.join(INPUT_DIR,folders)
  # select all the records in the subfolder
  for filename in os.listdir(subfolder):
    file_format = os.path.splitext(filename)[1] 
    if file_format.lower() in valid_formats:
      # sr = 44100 convert all the audio file to a sample frequency of 44100
      # sr = None leave the fs untouched
      data, sample_rate = librosa.load(os.path.join(subfolder,filename),sr=44100)
      # define output path
      file_path, destination_dir, imageName = outputPath(subfolder,filename,OUTPUT_DIR,destination)
      #fs_array.append(sample_rate)
      print('Now reading', filename)
      
      # create destination folder if it doesn't exist
      if not os.path.exists(destination_dir):
        os.mkdir(destination_dir)
      
      # Normalize data
      data = DataNormalization(data,sample_rate)      

      # PreEmphasis
      #data = librosa.effects.preemphasis(data)

      # Plot the signal, result is an ndarray containing the plot
      result = MelSpectrogramPlot(data,sample_rate,save_plot,imageName, overwriteControl,CMAP)
      
      if doAugmentation == True:
        DataAugmentation(data,sample_rate,save_plot,
                     imageName,overwriteControl,CMAP,result)

Now reading PD_ON_TesiPDL_VowelE_0021_FineCut_Short.wav
Now reading PD_ON_TesiPDL_VowelE_0024_FineCut_Short.wav
Now reading PD_ON_TesiPDL_VowelE_0054_SAP_2_FineCut_Short.wav
Now reading PD_ON_TesiPDL_VowelE_0026_FineCut_Short.wav


  if sys.path[0] == '':
  # This is added back by InteractiveShellApp.init_path()
  app.launch_new_instance()


Now reading PD_ON_TesiPDL_VowelE_0017_FineCut_Short.wav


  # Remove the CWD from sys.path while we load stuff.


Now reading PD_ON_TesiPDL_VowelE_0029_FineCut_Short.wav
Now reading PD_ON_TesiPDL_VowelE_0042_FineCut_Short.wav
Now reading PD_ON_TesiPDL_VowelE_0039_FineCut_Short.wav
