<a href="https://colab.research.google.com/github/pietrodileo/Python_for_MD_thesis/blob/main/CreateSpectrogram.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Audio Signal Visualization**
## Generate easily different plot from audio signals stored in Google Drive

## Import libraries

In [581]:
#importing the libraries
import os 
import cv2
import numpy as np
import matplotlib
#This backend of matplotlib doesn't show plots to the user, but we can save them to Google Drive
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pdb
import librosa
import librosa.display
from scipy.io import wavfile
import seaborn as sns
from pathlib import Path
import pylab
import sys
import soundfile as sf

# Define local functions
In this section: the functions that define the outputPath and the analysis selection

In [582]:
def outputPath(subfolder,filename,OUTPUT_DIR,destination):
  file_path = os.path.join(subfolder, filename)
  file_stem = Path(subfolder).stem
  target_dir = f'class_{file_stem}'
  destination_dir = os.path.join(os.path.join(OUTPUT_DIR, destination), target_dir)
  # generate image name
  file_stem = Path(file_path).stem
  imageName = os.path.join(destination_dir, file_stem)
  return file_path, destination_dir,imageName;

def SelectDestination(selection_var):
  if selection_var == 1:
    destination = 'Sig_Spect'
  elif selection_var == 2:
    destination = 'Spectrogram' 
  elif selection_var == 3:
    destination = 'Mel-Spectrogram' 
  elif selection_var == 4:
    destination = 'Scalogram' 
  elif selection_var == 5:
    destination = 'Chromagram'
  elif selection_var == 6:
    destination = 'MFCC'
  elif selection_var == 7:
    destination = 'RASTAMAT'
  else: 
    # If an exact match is not confirmed, this last case will be used if provided
    sys.exit("Assign a proper value to the selection variable!")
  return destination

def signalAnalysis(selection_var,data,sample_rate,save_plot,imageName):
  if selection_var == 1:
    # Plot the signal
    plotSignal(data,sample_rate,save_plot,imageName)
    result = 0;
  elif selection_var == 2:
    # Plot Spectrogram
    result = spectrogramPlot(data,sample_rate,save_plot,imageName)
  elif selection_var == 3:
    result = MelSpectrogramPlot(data,sample_rate,save_plot,imageName)
  elif selection_var == 4:
    result = Scalogram(data,sample_rate,save_plot,imageName)
  elif selection_var == 5:
    result = Chromagram(data,sample_rate,save_plot,imageName)
  elif selection_var == 6:
    result = plotMFCC(data,sample_rate,save_plot,imageName)
  elif selection_var == 7:
    result = plotRASTAMAT(data,sample_rate,save_plot,imageName)
  else: 
    # If an exact match is not confirmed, this last case will be used if provided
    sys.exit("Assign a proper value to the selection variable!")
  return result

## Plot Signal Function

In [583]:
def plotSignal(data,sample_rate,save_plot,imageName):
    plt.figure(figsize=(20,20))
    
    plot_a = plt.subplot(211)
    #plot_a.set_title('Title')
    plot_a.plot(data)
    plot_a.set_xlabel('sample rate * time')
    plot_a.set_ylabel('energy')

    plot_b = plt.subplot(212)
    plot_b.specgram(data, NFFT=1024, Fs=sample_rate, noverlap=900)
    plot_b.set_xlabel('Time')
    plot_b.set_ylabel('Frequency')
    
    plt.show()

    # save the plot
    if save_plot == 1:
      fig1 = plt.gcf()
      pylab.savefig(f'{imageName}.png')
      pylab.close() 


## Plot Spectrogram and Mel-Spectrogram functions



In [584]:
def spectrogramPlot(y,sample_rate,save_plot,imageName):
  #D_highres: numpy array
  D_highres = librosa.stft(y, hop_length=256, n_fft=4096)
  #converting into energy levels(dB)
  S_db_hr = librosa.amplitude_to_db(np.abs(D_highres), ref=np.max)

  fig1 = plt.figure()
  plt.figure(figsize=(20, 20))

  librosa.display.specshow(S_db_hr, hop_length=256, sr=sample_rate, 
                           x_axis='time', y_axis='log',cmap='jet')
  #plt.colorbar()
  plt.clim(-80,0)  # identical to caxis([-4,4]) in MATLAB
  plt.ylim([0, 14000])
  plt.axis('off')

  # save the plot
  if save_plot == 1:
    fig1 = plt.gcf()
    pylab.savefig(f'{imageName}.png')
    pylab.close()

  return S_db_hr

def MelSpectrogramPlot(y,sample_rate,save_plot,imageName):
  M = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_fft=4096)
  M_db = librosa.power_to_db(M, ref=np.max)

  fig1 = plt.figure()

  plt.figure(figsize=(20, 20))
  librosa.display.specshow(M_db, sr=sample_rate, x_axis='time', 
                           y_axis='mel',cmap='jet')
  #plt.colorbar()
  plt.clim(-80,0)  # identical to caxis([-4,4]) in MATLAB
  plt.ylim([0, 14000])
  plt.axis('off')
  
  # save the plot
  if save_plot == 1:
    fig1 = plt.gcf()
    pylab.savefig(f'{imageName}.png')
    pylab.close()
  
  return M_db

## Plot Scalogram Function

In [585]:
def Scalogram(data,sample_rate,save_plot,imageName):
  print('Working on it...')

## Plot Chromagram

In [586]:
def Chromagram(data,sample_rate,save_plot,imageName):
  print('Working on it...')

## Plot MFCC

In [587]:
def plotMFCC(data,sample_rate,save_plot,imageName):
  n_mfcc = 13
  n_mels = 40
  n_fft = 4096 
  hop_length = 160
  fmin = 0
  fmax = None
  mfcc_librosa = librosa.feature.mfcc(y=data, sr=sample_rate, n_fft=n_fft,
                                      n_mfcc=n_mfcc, n_mels=n_mels,
                                      hop_length=hop_length,
                                      fmin=fmin, fmax=fmax, htk=False)
  plt.figure(figsize=(20, 20))
  sns.heatmap(mfcc_librosa, vmin=-500, vmax=300, cbar=False)
  plt.axis('off')
  
  # save the plot
  if save_plot == 1:
    fig1 = plt.gcf()
    pylab.savefig(f'{imageName}.png')
    pylab.close()
    
  return mfcc_librosa

## Plot RASTAMAT

In [588]:
def plotRASTAMAT(data,sample_rate,save_plot,imageName):
  n_mfcc = 13
  n_mels = 40
  n_fft = 4096 
  hop_length = 160
  fmin = 0
  fmax = None
  mfcc_librosa = librosa.feature.mfcc(y=data, sr=sample_rate, n_fft=n_fft,
                                      n_mfcc=n_mfcc, n_mels=n_mels,
                                      hop_length=hop_length,
                                      fmin=fmin, fmax=fmax, dct_type = 2,
                                      htk=False)
  plt.figure(figsize=(20, 20))
  sns.heatmap(mfcc_librosa, vmin=-500, vmax=300, cbar=False)
  plt.axis('off')

  # save the plot
  if save_plot == 1:
    fig1 = plt.gcf()
    pylab.savefig(f'{imageName}.png')
    pylab.close()

  return mfcc_librosa

# Data augmentation Functions

##Data Augmentation to the Signal

In [589]:
from audiomentations.augmentations.time_mask import TimeMask
from audiomentations import TimeStretch
import torch
import torchaudio
from torchaudio import transforms

doAugmentation = True
if doAugmentation == True:
  #!pip install audiomentations
  ## Data Augmentation
  from audiomentations import Compose, AddGaussianNoise, PitchShift, HighPassFilter
  # add gaussian noise
  AddNoise = Compose([AddGaussianNoise(min_amplitude = 0.1, max_amplitude = 0.2, p=1)])
  # shift pitch
  pitchShifting = Compose([PitchShift(min_semitones = -8, max_semitones = -8, p=1)])
  # stretch time
  timeStretch = Compose([TimeStretch(min_rate=0.8,max_rate=1.25,leave_length_unchanged=True,p=1.0)])
  # time Mask
  timeMask = Compose([TimeMask(min_band_part=0.2, max_band_part=0.5, p=1.0)])

  augment = Compose([AddGaussianNoise(min_amplitude = 0.1, max_amplitude = 0.2, p=1),
      PitchShift(min_semitones = -8, max_semitones = -8, p=1),
      TimeStretch(min_rate=0.8,max_rate=1.25,leave_length_unchanged=True,p=1.0),
      #HighPassFilter(min_cutoff_freq = 2000, max_cutoff_freq=4000, p=1) 
      ])

In [590]:
def Signal_Augmentation(selection_var,data,sample_rate,save_plot,imageName):
  #Save plot = 0 or the picture will overwrite another one
  noisy_signal = AddNoise(data,sample_rate)
  signalAnalysis(selection_var,noisy_signal,sample_rate,0,imageName)
  # picture are saved now!
  if save_plot == 1:
    fig = plt.gcf()
    pylab.savefig(f'{imageName}_GaussianNoise.png')
    pylab.close()

  pitchy_signal = pitchShifting(data,sample_rate)
  signalAnalysis(selection_var,pitchy_signal,sample_rate,0,imageName)
  if save_plot == 1:
    fig = plt.gcf()
    pylab.savefig(f'{imageName}_PitchShifted.png')
    pylab.close()

  stretchy_signal = timeStretch(data,sample_rate)
  signalAnalysis(selection_var,stretchy_signal,sample_rate,0,imageName)
  if save_plot == 1:
    fig = plt.gcf()
    pylab.savefig(f'{imageName}_Stretched.png')
    pylab.close()

  augmented_signal = augment(data,sample_rate)
  signalAnalysis(selection_var,augmented_signal,sample_rate,0,imageName)
  if save_plot == 1:
    fig = plt.gcf()
    pylab.savefig(f'{imageName}_SignalAugmented.png')
    pylab.close()

##Data Augmentation to the Spectrogram/Plot

### Frequency Masking

In [591]:
import tensorflow as tf

def my_freq_mask(input, param, FreqNum, name=None):
    """
    Apply masking to a spectrogram in the freq domain.
    Args:
      input: An audio spectogram.
      param: Parameter of freq masking.
      name: A name for the operation (optional).
    Returns:
      A tensor of spectrogram.
    """
    input = tf.convert_to_tensor(input)
    # TODO: Support audio with channel > 1.
    freq_max = tf.shape(input)[1]
    # calculate the indexes for all the frequencies
    indices = tf.reshape(tf.range(freq_max), (1, -1))
    for x in range(FreqNum):
      # param is the max value of a uniform random distribution
      f = tf.random.uniform(shape=(), minval=0, 
                            maxval=param, dtype=tf.dtypes.int32)
      # f0 is a random selected number from a uniform distribution 
      # with range (0, freq_max-f)
      f0 = tf.random.uniform(shape=(), minval=0, 
                             maxval=freq_max - f, dtype=tf.dtypes.int32)
      # select all the frequencies greater or equal to f0
      cond1 = tf.math.greater_equal(indices, f0)
      # select all the frequencies smaller than f0 + f
      cond2 = tf.math.less(indices, f0 + f)
      # select all the frequencies between f0 and f0+f
      condition = tf.math.logical_and(cond1, cond2)
      masking = tf.where(condition,  tf.cast(0, input.dtype), input)
      input = masking
    return masking

In [592]:
def FrequencyMasking(result,param,FreqNum,sample_rate,save_plot,imageName):
  # Freq masking
  # Convert ndarray representing the spectrogram to a tensor for pytorch
  result_tensor = torch.from_numpy(result)        
  freq_mask = my_freq_mask(result_tensor, param, FreqNum)
  plt.figure(figsize=(20, 20))
  librosa.display.specshow(freq_mask.numpy(), sr=sample_rate, x_axis='time', 
                           y_axis='mel',cmap='jet')   
  #plt.colorbar()
  plt.clim(-80,0)  # identical to caxis([-4,4]) in MATLAB
  plt.ylim([0, 14000])
  plt.axis('off')   
  
  if save_plot == 1:
    fig2 = plt.gcf()
    pylab.savefig(f'{imageName}_FreqMasked.png')
    pylab.close()

### Time Masking

# Define Input and Output directory

In [593]:
#let the plot appear and store it with the notebook
%matplotlib inline
#setting the path to the directory containing the pics
INPUT_DIR = '/content/drive/MyDrive/DatasetTesi/Vowel_E_REC'
OUTPUT_DIR = '/content/drive/MyDrive/DatasetTesi/outputSpectrogram/'
valid_formats = [".wav"]
#audio_data = []

# Choose the plot to be generated
Change the value of selection variable to plot:
1. Signal + Spectrogram
2. Spectrogram
3. Mel-Spectrogram
4. CWT (Scalogram)
5. Chromagram
6. MFCC
7. RASTAMAT Coefficients




In [594]:
selection_var = 3
destination = SelectDestination(selection_var)
save_plot = 1; # if 1, save plot on Google Drive
overwriteControl = 0;

Creo una cartella dove salvare l'output

In [595]:
# For every recording, make a spectogram and save it as label_speaker_no.png
if not os.path.exists(os.path.join(OUTPUT_DIR, destination)):
    os.makedirs(os.path.join(OUTPUT_DIR, destination))

# Generate Plot! ✔

In [596]:
for folders in os.listdir(INPUT_DIR):
  # select a subfolder
  subfolder = os.path.join(INPUT_DIR,folders)
  # select all the records in the subfolder
  for filename in os.listdir(subfolder):
    file_format = os.path.splitext(filename)[1] 
    if file_format.lower() in valid_formats:
      data, sample_rate = librosa.load(os.path.join(subfolder,filename))
      # define output path
      file_path, destination_dir, imageName = outputPath(subfolder,filename,OUTPUT_DIR,destination)
      
      print('Now reading', filename)
      # if we don't want to overwrite data
      if overwriteControl == 1:
        if os.path.exists(imageName + '.png'): 
          continue 
      
      # create destination folder if it doesn't exist
      if not os.path.exists(destination_dir):
        os.mkdir(destination_dir)

      # Plot the signal, result is an ndarray containing the plot
      result = signalAnalysis(selection_var,data,sample_rate,save_plot,imageName)
      
      if doAugmentation == True:
        # 1. Data Augmentation to the signal
        Signal_Augmentation(selection_var,data,sample_rate,save_plot,imageName)

        # 2. Data Augmentation to the spectrogram
        # Freq masking
        param = 10 
        FreqNum = 5
        FrequencyMasking(result,param,FreqNum,sample_rate,save_plot,imageName)

      #   # Time masking

      #   time_mask = tfio.audio.time_mask(dbscale_mel_spectrogram, param=10)

Now reading 028_leonori_e.wav
Now reading 045_puggi_emma_PD_OFF_e.wav
Now reading 038_bobisse_e.wav
Now reading 046_gallucci_PD_OFF_e.wav


  


Now reading 005_sweeney_e.wav




Now reading 015_braghese_e.wav
Now reading 031_manzolini_e.wav
Now reading 052_gubitosi_e.wav
Now reading 047_graziano_gina_PD_OFF_e.wav
Now reading 042_zaccarelli_aldo_PD_OFF_e.wav
Now reading 033_tirabassi_e.wav
Now reading 029_Galloni_e.wav
Now reading 002_deleonardis_e.wav
Now reading 044_colavovo_anna_PD_OFF_e.wav
Now reading 032_piacentini_e.wav
Now reading 021_turchi_e.wav
Now reading 022_romeo_e.wav
Now reading 007_Giuliani_e.wav
Now reading 018_fratoni_e.wav
Now reading 004_Sopranzetti_e.wav
Now reading 011_puggi_e.wav
Now reading 030_Ferrari_e.wav
Now reading 035_vellitri_e.wav
Now reading 013_coccia_sergio_e.wav
Now reading 009_giov_paz_berardell_e.wav
Now reading 008_fubelli_e.wav
Now reading 034_graziani_e.wav
Now reading 025_genovese_e.wav


KeyboardInterrupt: ignored

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>