**Background:**

The purpose of this notebook is to transform a .wav file into a spectrogram and then feed it into a well known model such as InceptionV3 for feature generation

## Libraries and File Locations

In [1]:
import sys
sys.path.append("..")

In [11]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np
import soundfile
from scipy import signal
import librosa
import matplotlib.pyplot as plt
import specdisplay
from os import listdir
from os.path import isfile, join
import json

## Define file generation parameters

**Number of Mel-frequencies to keep in the spectrograms**

In [3]:
n_mels = 128

## Defining spectrogram transformation function

## NOTE!!

We're averaging across channels, come back and try a different approach if that's not working

In [4]:
def extract_spectrogram(samples,sample_rate,n_mels=128,n_fft=2048):
    feature_set = []
    if samples.shape[1:]:
        channels = samples.shape[1:][0]
    else:
        channels = 1
        samples = np.expand_dims(samples, axis=1)
    for i in range(channels):
        sample_channel_x = samples[:,i]
        mel_spectrogram = librosa.feature.melspectrogram(y=sample_channel_x, 
                                                     sr=sample_rate,
                                                     n_fft=n_fft, 
                                                     n_mels = n_mels
                                                        )
        decibel_spec = librosa.logamplitude(mel_spectrogram,ref_power=np.max)
        feature_set.append(decibel_spec)
    feature_set = np.array(feature_set)
    feature_set = np.mean(feature_set,axis=0)
    return feature_set

# Creating the PNG spectrograms

In [5]:
samples_folder = "/home/romulo/github/soundflux/samples"

# Read all of the json files

In [9]:
metadata = [samples_folder+"/"+ f for f in listdir(samples_folder) if isfile(join(samples_folder, f)) and ".json" in f]

In [19]:
data = []
error_count = 0
for fi in metadata:
    log = open(fi, "r").read()
    try:
        d = json.loads(log)
        data.append(d)
    except Exception as e:
        error_count +=1
        print("Error number {}".format(error_count))

Error number 1


In [24]:
dataset = pd.DataFrame(data)

In [25]:
dataset

Unnamed: 0,audio_file,class,id,index_file,metadata,prefix,sample_length,timestamp,vibration_file
0,randy_sample_id_1550376990.526841.wav,falling_dummy,1550376990.526841,randy_sample_id_1550376990.526841_index.json,"{'floor_type': 'wooden_floor', 'distance_from_...",randy_sample,4,"02/17/2019, 04:16:30",randy_sample_id_1550376990.526841_vibration.json
1,randy_sample_id_1550717099.515901.wav,falling_dummy,1550717099.515901,randy_sample_id_1550717099.515901_index.json,"{'floor_type': 'carpet_room3', 'distance_from_...",randy_sample,4,"02/21/2019, 02:44:59",
2,negative_sample_id_1551026129.0965.wav,general_noise,1551026129.0965,negative_sample_id_1551026129.0965_index.json,"{'distance_from_device': 72, 'floor_type': 'wo...",negative_sample,4,"02/24/2019, 16:35:29",
3,randy_sample_id_1551319808.967742.wav,falling_dummy,1551319808.967742,randy_sample_id_1551319808.967742_index.json,"{'distance_from_device': 180, 'floor_type': 'w...",randy_sample,4,"02/28/2019, 02:10:08",
4,negative_sample_id_1551538987.462724.wav,general_noise,1551538987.462724,negative_sample_id_1551538987.462724_index.json,"{'distance_from_device': 96, 'floor_type': 'wo...",negative_sample,4,"03/02/2019, 15:03:07",
5,negative_sample_id_1551026054.854264.wav,general_noise,1551026054.854264,negative_sample_id_1551026054.854264_index.json,"{'distance_from_device': 72, 'floor_type': 'wo...",negative_sample,4,"02/24/2019, 16:34:14",
6,random_sample_id_1549763978.971404.wav,background_music,1549763978.971404,random_sample_id_1549763978.971404_index.json,,random_sample,4,"02/10/2019, 01:59:38",random_sample_id_1549763978.971404_vibration.json
7,randy_sample_id_1550631021.192445.wav,falling_dummy,1550631021.192445,randy_sample_id_1550631021.192445_index.json,"{'distance_from_device': 108, 'floor_type': 'c...",randy_sample,4,"02/20/2019, 02:50:21",
8,randy_sample_id_1551319650.225114.wav,falling_dummy,1551319650.225114,randy_sample_id_1551319650.225114_index.json,"{'distance_from_device': 180, 'floor_type': 'w...",randy_sample,4,"02/28/2019, 02:07:30",
9,negative_sample_id_1551407094.99554.wav,general_noise,1551407094.99554,negative_sample_id_1551407094.99554_index.json,"{'distance_from_device': 84, 'floor_type': 'wo...",negative_sample,4,"03/01/2019, 02:24:54",


In [26]:
target_folder = "/media/romulo/6237-3231/randy_samples"

In [31]:
for index, row in dataset[::].iterrows():
    print("Transforming file {}".format(index))
    
    if not os.path.exists(target_folder + "/"+'spectrograms'):
        os.makedirs(target_folder + "/"+'spectrograms')
    
    if not os.path.exists(target_folder + "/"+'spectrograms/' + row['class']):
        os.makedirs(target_folder + "/"+'spectrograms/' + row['class'])

    # Convert to log scale (dB). We'll use the peak power as reference.
    y, sr = soundfile.read(samples_folder + "/"+ str(row['audio_file']))
    log_s = extract_spectrogram(y,sr,n_mels=n_mels,n_fft=2048)
    #new plot
    fig = plt.figure(figsize=(12,4))
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)

    #getting spectrogram
    specdisplay.specshow(log_s, sr=sr, x_axis='time', y_axis='mel')

    #Saving PNG
    plt.savefig(target_folder + "/"+'spectrograms/' + row['class'] + 
                '/' + row['id'] + '.png')
    plt.close()

Transforming file 0


  if np.issubdtype(data.dtype, np.complex):


Transforming file 1
Transforming file 2
Transforming file 3
Transforming file 4
Transforming file 5
Transforming file 6
Transforming file 7
Transforming file 8
Transforming file 9
Transforming file 10
Transforming file 11
Transforming file 12
Transforming file 13
Transforming file 14
Transforming file 15
Transforming file 16
Transforming file 17
Transforming file 18
Transforming file 19
Transforming file 20
Transforming file 21
Transforming file 22
Transforming file 23
Transforming file 24
Transforming file 25
Transforming file 26
Transforming file 27
Transforming file 28
Transforming file 29
Transforming file 30
Transforming file 31
Transforming file 32
Transforming file 33
Transforming file 34
Transforming file 35
Transforming file 36
Transforming file 37
Transforming file 38
Transforming file 39
Transforming file 40
Transforming file 41
Transforming file 42
Transforming file 43
Transforming file 44
Transforming file 45
Transforming file 46
Transforming file 47
Transforming file 48
T