# FSD Audio Classifier
##  02. Data Wrangling & Feature Engineering
In this notebook we will feature engine the raw audio files into the following formats:

    - Mel-Frequency Sprectrograms
    - Mel-Frequency Cepstrum Sprectrograms
    - Mean Mel-Frequency Cepstrum Coefficients
    
In this notebook we will also create synthetic data by Randomly Oversampling the Minority Classes, adding Gaussian Noise with pitch augmentation. We will also Randomly Undersample the Majority Classes to create a balanced dataset for modeling.

In [29]:
import pandas as pd
import numpy as np
import time
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
from PIL import Image
import skimage
import skimage.io
import os
import random
from random import choices
from random import sample
matplotlib inline

In [30]:
pwd

'/Users/Mateo/Springboard/FSD50k'

In [31]:
dev_info = pd.read_json('data/data/labelled_dev_info.json')
eval_info = pd.read_json('data/data/labelled_eval_info.json')

In [32]:
dev_info.head()

Unnamed: 0,title,description,tags,license,uploader,track_num,wav_name,png_name,labels_15,labels_2,labels_4,labels
63,RalfHutterWorking.wav,"Ralf Hutter from Kraftwerk saying ""Working on ...","[male, voice]",http://creativecommons.org/licenses/by/3.0/,fectoper,63,63.wav,63.png,0,0,0,0
136,keyboard-rhymtic.wav,Noise of an average logitech keyboard. Pretty ...,"[keyboard, rhythmic, tap, type]",http://creativecommons.org/licenses/by/3.0/,Anton,136,136.wav,136.png,11,8,3,1
137,keyboard-typing.wav,Noise of an average logitech keyboard. Pretty ...,"[computer, environmental-sounds-research, key,...",http://creativecommons.org/licenses/by/3.0/,Anton,137,137.wav,137.png,8,7,3,1
221,bell.wav,simple *ting* sound,[bell],http://creativecommons.org/publicdomain/zero/1.0/,Erratic,221,221.wav,221.png,5,3,2,3
236,BUSSES.aiff,Departing busses\r\n at Utrecht Central Railwa...,"[bus, depart, drive, station]",http://creativecommons.org/licenses/by-nc/3.0/,hanstimm,236,236.wav,236.png,7,7,3,4


In [33]:
base_dir = '/Users/Mateo/Springboard/FSD50k/'
train_audio_dir = '/Users/Mateo/Springboard/FSD50k/data/FSD50K.dev_audio/'
test_audio_dir = '/Users/Mateo/Springboard/FSD50k/data/FSD50K.eval_audio/'
# train_wav_names = dev_info.wav_name.to_list()
# train_png_names = dev_info.png_name.to_list()
# test_wav_names = eval_info.wav_name.to_list()
# test_png_names = eval_info.png_name.to_list()

## Creating Mel-Frequency Spectrograms for Training/Test Data 

- extracting 128 Mel-Frequency bands as this is the same spectrum as human hearing
- padding/trimming audio to 5 seconds for equal dimensions for image analyze (randomly padding with silence to the beginning and end of the shorter audio files)
- extracting 216 audio events within the 5 second audio files (23 milliseconds intervals)

In [34]:
# # Spectorgrams for Training Data

# # creating directory
# os.chdir(base_dir)
# if not os.path.exists('data/train_spectrograms'):
#     os.makedirs('data/train_spectrograms')
    
# # changing into desired directory to save spectrogram images
# path = 'data/train_spectrograms'
# os.chdir(path)


# start = time.time()

# for i in range(len(train_wav_names)):
#     file_path = train_audio_dir + train_wav_names[i]
#     data, sr = librosa.load(file_path)  # consider using 'kaiser_fast' instead of 'kaiser_best' for faster load time
                                          # data, sr = librosa.load(file_path, res_type='kaiser_fast')
    
#     # trimming to 5 seconds
#     # padding with random offset for shorter tracks to 5 seconds

#     input_length = 5 * 22050 # 5 seconds * 22050 samples per second (the sample_rate)

#     if len(data) > input_length:
#         data = data[:input_length]

#     elif input_length > len(data):
#         max_offset = input_length - len(data)
#         offset = np.random.randint(max_offset)
#         data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    
#     # Fast Fourier Transform, a window for the results image
#     n_fft = 2048
#     # hop length slides the window (4:1 something er other?)
#     hop_length = 512
#     # converts audio spectrum into 128 evenly spaced groups based on human hearing
#     n_mels = 128

#     S = librosa.feature.melspectrogram(data, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
#     S_DB = librosa.power_to_db(S, ref=np.max)
    
    
#     S_DB = S_DB.astype(np.uint8) #converting from float32 to uint8 (more efficient file format)
    
#     # NOT SCALING IN THIS ITERATION....
# #     # # min-max scale to fit inside 8-bit range

# #     img = scale_minmax(S_DB, 0, 255).astype(np.uint8)
# #     img = np.flip(img, axis=0) # put low frequencies at the bottom in image
# #     img = 255-img # invert. make black==more energy

#     skimage.io.imsave(train_png_names[i], S_DB)  # removing .wav suffix with .png 
#     # to display impage: librosa.display.specshow(img, sr=sr, hop_length=hop_length)
    
# end = time.time()
# print(end - start)

In [35]:
# # Spectorgrams for Test Data

# # creating directory
# os.chdir(base_dir)
# if not os.path.exists('data/test_spectrograms'):
#     os.makedirs('data/test_spectrograms')
    
# # changing into desired directory to save spectrogram images
# path = 'data/test_spectrograms'
# os.chdir(path)


# start = time.time()

# for i in range(len(test_wav_names)):
#     file_path = test_audio_dir + test_wav_names[i]
#     # consider using 'kaiser_fast' instead of 'kaiser_best' for faster load time
#     data, sr = librosa.load(file_path, res_type='kaiser_fast')  
    
#     # trimming to 5 seconds
#     # padding with random offset for shorter tracks to 5 seconds

#     input_length = 5 * 22050 # 5 seconds * 22050 samples per second (the sample_rate)

#     if len(data) > input_length:
#         data = data[:input_length]

#     elif input_length > len(data):
#         max_offset = input_length - len(data)
#         offset = np.random.randint(max_offset)
#         data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    
#     # Fast Fourier Transform, a window for the results image
#     n_fft = 2048
#     # hop length slides the window (4:1 something er other?)
#     hop_length = 512
#     # converts audio spectrum into 128 evenly spaced groups based on human hearing
#     n_mels = 128

#     S = librosa.feature.melspectrogram(data, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
#     S_DB = librosa.power_to_db(S, ref=np.max)
    
    
#     S_DB = S_DB.astype(np.uint8) #converting from float32 to uint8 (more efficient file format)
#     skimage.io.imsave(test_wpng_names[i], S_DB)  
    
# end = time.time()
# print(end - start)

## Creating Mel-Frequency Cepstrum Spectrogram for Training/Test Data
- extracting 32 Mel-Frequency Cepstrum bands to account for tonal frequency bands beyond vocal range (12-20 bands are typical for speech analysis)
- padding/trimming audio to 5 seconds for equal dimensions for image analyze
- extracting 216 audio events within the 5 second audio files (23 milliseconds intervals)

In [36]:
# # Creating Train MFC Spectrograms 

# # creating directory
# os.chdir(base_dir)
# if not os.path.exists('data/train_mfcc'):
#     os.makedirs('data/train_mfcc')
    
# # changing into desired directory to save mfcc images
# path = 'data/train_mfcc'
# os.chdir(path)


# start = time.time()

# # intanitating numpy array for Mean MFCC values
# num_files = len(train_wav_names)
# num_mfcc_features = 32
# mfcc_mean_features = np.zeros(shape=(num_files, num_mfcc_features))


# for i in range(len(train_wav_names)):
#     file_path = train_audio_dir + train_wav_names[i]
#     data, sr = librosa.load(file_path, res_type='kaiser_fast')
    
#     #padding/trimming to 5 seconds with random offset for shorter tracks

#     input_length = 5 * 22050

#     if len(data) > input_length:
#         data = data[:input_length]

#     elif input_length > len(data):
#         max_offset = input_length - len(data)
#         offset = np.random.randint(max_offset)
#         data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    
#     # extracting mfcc features, using 32 MFCC Bands
#     mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=32)
    
    
#     # extracting Mean for each MFCC band
#     mfcc_mean = np.mean(mfcc, axis= 1)  # orginally: np.mean(foo.T, axis= 0), but same no?
#     # appending mean values to features list
#     mfcc_mean_features[i] = mfcc_mean
        
    
#     # converting to uint8 num_type
#     mfcc = mfcc.astype(np.uint8)
#     skimage.io.imsave(train_png_names[i], mfcc)  # removing .wav suffix with .png 

    
#     # saving numpy array to disk    
#     mean_features_filepath = base_dir + 'data/train_mean_mfcc.npz'
#     np.savez(mean_features_filepath, mfcc_mean_features)    

    
# end = time.time()
# print(end - start)

### Extracting Test Data MFCC Spectrogram and Mean MFCC Values

In [37]:
# # Creating Test MFC Spectrograms 

# # creating directory
# os.chdir(base_dir)
# if not os.path.exists('data/test_mfcc'):
#     os.makedirs('data/test_mfcc')
    
# # changing into desired directory to save mfcc images
# path = 'data/test_mfcc'
# os.chdir(path)



# start = time.time()


# # intanitating numpy array for Mean MFCC values
# num_files = len(test_wav_names)
# num_mfcc_features = 32
# mfcc_mean_features = np.zeros(shape=(num_files, num_mfcc_features))


# for i in range(len(test_wav_names)):
#     file_path = test_audio_dir + test_wav_names[i]
#     data, sr = librosa.load(file_path, res_type = 'kaiser_fast')
    
#     #padding/trimming to 5 seconds with random offset for shorter tracks

#     input_length = 5 * 22050

#     if len(data) > input_length:
#         data = data[:input_length]

#     elif input_length > len(data):
#         max_offset = input_length - len(data)
#         offset = np.random.randint(max_offset)
#         data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    
#     # extracting mfcc features
#     mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=32)
    
    
#     # extracting Mean for each MFCC band
#     mfcc_processed = np.mean(mfcc, axis= 1)  # orginally: np.mean(foo.T, axis= 0), but same no?
#     # appending mean values to features list
#     mfcc_mean_features[i] = mfcc_processed
    
    
#     # converting to uint8 num_type
#     mfcc = mfcc.astype(np.uint8)
#     skimage.io.imsave(test_png_names[i], mfcc)  # removing .wav suffix with .png 
    

# # saving numpy array to disk    
# mean_features_filepath = base_dir + 'data/test_mean_mfcc.npz'
# np.savez(mean_features_filepath, mfcc_mean_features)    

# end = time.time()
# print(end - start)

# Creating Balanced Dataframe with Random Over/Under Sampling
- there is significant imbalanced classes in the training dataset. We will first create a balanced dataframe by Oversampling the the minority classses and Undersampling the majority classes. We will use Synthetic data for  Randomly Oversampling. We will balance our classes at 5,000, which is the approximate mean of our class value counts.

In [38]:
track_num_labels = dev_info[['track_num','labels']].copy()
track_num_labels.labels.value_counts()

1    15105
3     9275
0     5355
4     5118
2     3239
5     1682
7      776
6      416
Name: labels, dtype: int64

In [39]:
#undersampling majority classes

under_samp_0 = sample(set(track_num_labels[track_num_labels.labels == 0].track_num.values), k=5000)
under_samp_1 = sample(set(track_num_labels[track_num_labels.labels == 1].track_num.values), k=5000)
under_samp_3 = sample(set(track_num_labels[track_num_labels.labels == 3].track_num.values), k=5000)
under_samp_4 = sample(set(track_num_labels[track_num_labels.labels == 4].track_num.values), k=5000)

# over sampling minority classes

over_samp_label_2 = choices(track_num_labels[track_num_labels.labels == 2].track_num.values,k=5000 - dev_info.labels.value_counts().loc[2])
over_samp_label_5 = choices(track_num_labels[track_num_labels.labels == 5].track_num.values,k=5000 - dev_info.labels.value_counts().loc[5])
over_samp_label_6 = choices(track_num_labels[track_num_labels.labels == 6].track_num.values,k=5000 - dev_info.labels.value_counts().loc[6])
over_samp_label_7 = choices(track_num_labels[track_num_labels.labels == 7].track_num.values,k=5000 - dev_info.labels.value_counts().loc[7])

full_under_samps = under_samp_0 + under_samp_1 + under_samp_3 + under_samp_4
all_over_samps = over_samp_label_2 + over_samp_label_5 + over_samp_label_6 + over_samp_label_7


In [40]:
minority_labels = track_num_labels[(track_num_labels.labels == 2) | \
                                   (track_num_labels.labels == 5) | \
                                   (track_num_labels.labels == 6) | \
                                   (track_num_labels.labels == 7)   \
                                  ].track_num.values.tolist()

In [41]:
minority_plus_under_samps = minority_labels + full_under_samps
df_under_samps_plus_minority = track_num_labels.loc[minority_plus_under_samps]
df_under_samps_plus_minority['png_name'] = df_under_samps_plus_minority['track_num'].apply(lambda x: str(x) + '.png')
df_under_samps_plus_minority.head()

Unnamed: 0,track_num,labels,png_name
305,305,2,305.png
344,344,2,344.png
420,420,2,420.png
1729,1729,2,1729.png
1730,1730,2,1730.png


In [42]:
#making a counter for the repeatedly sampled values to create a unique filename I.D.

counter = {}
synth_list = []

for samp in all_over_samps:
    if samp not in counter:
        counter[samp] = 0
    else:
        counter[samp] += 1
    synth_list.append(f'synth_{(counter[samp])}_{samp}.png')
    

In [43]:
# creating a dataframe for just the synthetic data

synth_df = pd.DataFrame(data=[all_over_samps, synth_list], columns= all_over_samps,index=['track_num', 'png_name']).T
synth_df = synth_df.merge(track_num_labels, how='left', on = 'track_num')
synth_df['wav_name'] = synth_df['track_num'].apply(lambda x: str(x) + '.wav')
synth_df['wav_png_tuple'] = list(zip(synth_df['wav_name'], synth_df['png_name']))
synth_df.head()

Unnamed: 0,track_num,png_name,labels,wav_name,wav_png_tuple
0,233133,synth_0_233133.png,2,233133.wav,"(233133.wav, synth_0_233133.png)"
1,391175,synth_0_391175.png,2,391175.wav,"(391175.wav, synth_0_391175.png)"
2,341178,synth_0_341178.png,2,341178.wav,"(341178.wav, synth_0_341178.png)"
3,249724,synth_0_249724.png,2,249724.wav,"(249724.wav, synth_0_249724.png)"
4,141471,synth_0_141471.png,2,141471.wav,"(141471.wav, synth_0_141471.png)"


In [44]:
# creating a dataframe with the synthetic data and our undesampled majority classes

full_synth_df = pd.concat(objs=[df_under_samps_plus_minority, synth_df], ignore_index=True)
full_synth_df = full_synth_df.sort_values(by='track_num').reset_index(drop=True)
full_synth_df['wav_name'] = full_synth_df['track_num'].apply(lambda x: str(x) + '.wav')
full_synth_df.head()

Unnamed: 0,track_num,labels,png_name,wav_name,wav_png_tuple
0,63,0,63.png,63.wav,
1,136,1,136.png,136.wav,
2,137,1,137.png,137.wav,
3,236,4,236.png,236.wav,
4,237,4,237.png,237.wav,


In [27]:
# not saving anything to disk now!

# saving dataframe with the synthetic data and our undesampled majority classes to disk

######### full_synth_df.to_json('data/training_plus_synth_df.json')

## Creating Synthetic Audio Files for Mel-Frequency Spectrograms

In [45]:
base_dir = '/Users/Mateo/Springboard/FSD50k/'
train_audio_dir = '/Users/Mateo/Springboard/FSD50k/data/FSD50K.dev_audio/'
synth_wav_names = synth_df.wav_name.to_list()
synth_png_names = synth_df.png_name.to_list()
synth_wav_png_names = synth_df.wav_png_tuple.to_list()

In [46]:
# # # Synthetic Training Spectrograms 

# # creating directory
# os.chdir(base_dir)
# if not os.path.exists('data/synth_spectrograms4'):
#     os.makedirs('data/synth_spectrograms4')
    
# # changing into desired directory to save spectrogram images
# path = 'data/synth_spectrograms4'
# os.chdir(path)


# start = time.time()

# # empty list to append new track names and their corresponding dataframe I.D. key

# track_names_id_key = []

# for i in range(len(synth_wav_png_names)):
#     file_path = train_audio_dir + synth_wav_png_names[i][0]
#     data, sr = librosa.load(file_path, res_type='kaiser_fast')
    
#     # trimming to 5 seconds
#     # padding with random offset for shorter tracks to 5 seconds

#     input_length = 5 * 22050 # 5 seconds * 22050 samples per second (the sample_rate)

#     if len(data) > input_length:
#         data = data[:input_length]

#     elif input_length > len(data):
#         max_offset = input_length - len(data)
#         offset = np.random.randint(max_offset)
#         data = np.pad(data, (offset, input_length - len(data) - offset), "constant")

#     #########
    
    
#     #  ADD NOISE HERE!!!!
#     # shifting pitch and adding gaussian noise

#     data_shifted = librosa.effects.pitch_shift(data, sr, n_steps=random.randint(-4,4))
#     #Generating noise, with a Gaussian distribution with mean =0 and standard deviation = RMS_required (std of audio signal)
#     STD_noise=np.sqrt(np.mean(data_shifted**2)) 
#     noise=np.random.normal(0, STD_noise, data_shifted.shape[0])  
#     # dampening noise by a factor of 25
#     noise = noise / 25
#     data_plus_noise = data_shifted + noise
    
#     ########
    
    
#     # Fast Fourier Transform, a window for the results image
#     n_fft = 2048
#     # hop length slides the window (4:1 something er other?)
#     hop_length = 512
#     # converts audio spectrum into 128 evenly spaced groups based on human hearing
#     n_mels = 128

#     S = librosa.feature.melspectrogram(data_plus_noise, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
#     S_DB = librosa.power_to_db(S, ref=np.max)
       
#     S_DB = S_DB.astype(np.uint8) #converting from float32 to uint8 (more efficient file format)
    
#     new_file_name = 'synth_' + str(i) +'_id.png'
#     track_names_id_key.append((synth_wav_png_names[i], new_file_name))
#     skimage.io.imsave(new_file_name, S_DB) 
    
#     # counting loop:
#     if i % 3000 == 0:
#         print(i)
    
# end = time.time()
# print(end - start)

In [None]:
# adding column with New Filename I.D. Keys

track_names_id_key_df = pd.DataFrame(track_names_id_key, columns=['wav_png_tuple', 'new_id'])

# merging new dataframe to main dataframe
synth_df = synth_df.merge(track_names_id_key_df, how='left', on = 'wav_png_tuple')

In [None]:
#not saving to disk!!!!

# # saving dataframe to disk
# os.chdir(base_dir)
############################ synth_df.to_json('data/synth_df_with_ids.json')

### Creating Synthetic MFC Spectrograms 


In [108]:
# # saving Test MFCC Spectrograms and Mean MFCC to disk

# # creating directory
# os.chdir(base_dir)
# if not os.path.exists('data/synth_mfcc4'):
#     os.makedirs('data/synth_mfcc4')
    
# # changing into desired directory to save mfcc images
# path = 'data/synth_mfcc4'
# os.chdir(path)

# start = time.time()

# mfcc_track_id_key = []

# for i in range(len(synth_wav_png_names)):
#     file_path = train_audio_dir + synth_wav_png_names[i][0]
#     data, sr = librosa.load(file_path, res_type = 'kaiser_fast')
    
#     #padding/trimming to 5 seconds with random offset for shorter tracks

#     input_length = 5 * 22050

#     if len(data) > input_length:
#         data = data[:input_length]

#     elif input_length > len(data):
#         max_offset = input_length - len(data)
#         offset = np.random.randint(max_offset)
#         data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    
#     #########
    
    
#     #  ADD NOISE HERE!!!!
#     # shifting pitch and adding gaussian noise

#     data_shifted = librosa.effects.pitch_shift(data, sr, n_steps=random.randint(-4, 4))
#     #Generating noise, with a Gaussian distribution with mean =0 and standard deviation = RMS_required (std of audio signal)
#     STD_noise=np.sqrt(np.mean(data_shifted**2)) 
#     noise=np.random.normal(0, STD_noise, data_shifted.shape[0])  
#     # dampening noise by a factor of 25
#     noise = noise / 25
#     data_plus_noise = data_shifted + noise
    
#     ########
    
#     # extracting mfcc features
#     mfcc = librosa.feature.mfcc(y=data_plus_noise, sr=sr, n_mfcc=32)
    
#     # converting to uint8 num_type
#     mfcc = mfcc.astype(np.uint8)
    
#     # saving mfcc to disk    
#     new_file_name = 'synth_' + str(i) +'_id.png'
#     mfcc_track_id_key.append((synth_wav_png_names[i], new_file_name))
#     skimage.io.imsave(new_file_name, mfcc) 
  
#     # counting loop:
#     if i % 3000 == 0:
#         print(i)
    
# end = time.time()
# print(end - start)

0
3000
6000
9000
12000
3669.7071709632874


In [None]:
# adding column with New Filename I.D. Keys
mfcc_id_key_df = pd.DataFrame(mfcc_track_id_key, columns=['wav_png_tuple', 'new_mfcc_id'])
track_names_id_key_df = pd.DataFrame(track_names_id_key, columns=['wav_png_tuple', 'synth_mfcc_id'])
synth_df = synth_df.merge(track_names_id_key_df, how='left', on = 'wav_png_tuple')
synth_df.head()

In [115]:
# # saving dataframe to disk
# os.chdir(base_dir)
############################### synth_df.to_json('data/synth_mfcc_ids_df.json')

In [None]:
# saving final dataframe with only: Labels, Track Numb,  Track I.D.s

synth_df2 = synth_df.copy()
synth_df2 = synth_df2[['track_num', 'labels', 'new_id']]
synth_df2 = synth_df2.rename(columns={'new_id':'png_name'})
train_synth_df = pd.concat([df_under_samps_plus_minority,synth_df2],ignore_index=True)
train_synth_df.head()

In [136]:
# This is Dataframe should be used for all modeling notebooks.

# os.chdir(base_dir)
########################### train_synth_df.to_json('data/train_synth_df.json')

In [54]:
train_synth_df =pd.read_json('data/train_synth_df.json')

In [55]:
train_synth_df.head()

Unnamed: 0,track_num,labels,png_name
0,305,2,305.png
1,344,2,344.png
2,420,2,420.png
3,1729,2,1729.png
4,1730,2,1730.png


In [56]:
train_synth_df.shape

(40000, 3)

In [57]:
train_synth_df.png_name.nunique()

40000

In [58]:
synth_df2.shape

(13887, 5)

In [128]:
df_under_samps_plus_minority.shape

(26113, 3)

## [EDIT] Extracting Mean MFCC  Values with Resampled Dataset 
    - We will extract the Mean MFCC  Values for the new synthetic data as well as the undersampled majority classes to create a new dataframe to use for modeling.


#### This needs to be run again (error in last iteration of feature extraction)

In [None]:
full_synth_wav_names = full_synth_df.wav_name.to_list()
#full_synth_png_names = full_synth_df.png_name.to_list()

In [None]:
len(full_synth_wav_names)

In [None]:
# saving Mean MFCC Values to disk


start = time.time()


# intanitating numpy array for Mean MFCC values
num_files = len(full_synth_wav_names)
num_mfcc_features = 32
mfcc_mean_features = np.zeros(shape=(num_files, num_mfcc_features))


for i in range(len(full_synth_wav_names)):
    file_path = train_audio_dir + full_synth_wav_names[i]
    data, sr = librosa.load(file_path, res_type = 'kaiser_fast')
    
    
    #trimming to 5 seconds with random offset for shorter tracks

    input_length = 5 * 22050

    
    #Not padding shorter tracks as silence with throw off Mean Values

#     if len(data) > input_length:
#         data = data[:input_length]
#         pass

    if input_length > len(data):
        max_offset = input_length - len(data)
        offset = np.random.randint(max_offset)
        data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    
    #########
    
    
    #  ADD NOISE HERE!!!!
    # shifting pitch and adding gaussian noise

    data_shifted = librosa.effects.pitch_shift(data, sr, n_steps=random.randint(-4,4))
    #Generating noise, with a Gaussian distribution with mean =0 and standard deviation = RMS_required (std of audio signal)
    STD_noise=np.sqrt(np.mean(data_shifted**2)) 
    noise=np.random.normal(0, STD_noise, data_shifted.shape[0])  
    # dampening noise by a factor of 25
    noise = noise / 25
    data_plus_noise = data_shifted + noise
    
    ########
    
    # extracting mfcc features
    mfcc = librosa.feature.mfcc(y=data_plus_noise, sr=sr, n_mfcc=32)
    
    
    # extracting Mean for each MFCC band
    mfcc_processed = np.mean(mfcc, axis= 1) 
    # appending mean values to features list
    mfcc_mean_features[i] = mfcc_processed

# saving numpy array to disk    
mean_features_filepath = base_dir + 'data/full_training_plus_synth_mean_mfcc.npz'
np.savez(mean_features_filepath, mfcc_mean_features)    

end = time.time()
print(end - start)
print(mfcc_mean_features.shape)

# Next Step: EDA
    - In the next notebook we will perform Exploratory Data Analysis on these newly created features and the metadata of the audio files.