In [1]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import os
import time

import torch
from torch.autograd import Variable

import librosa
import librosa.display
import numpy as np

import sys, inspect
sys.path.append("/audio")
# Use this if you want to include modules from a subfolder
# cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(os.path.split(inspect.getfile( inspect.currentframe() ))[0],"../../../audio/")))
# if cmd_subfolder not in sys.path:
#     sys.path.insert(0, cmd_subfolder)
    
import torchaudio

from PIL import Image

# Analysis of sound lengths by class

In [18]:
src_path = "/data/datasets/sound_datasets/pytorch_UrbanSound8K/audio/trainset/"

t0 = time.time()

duration_log={}

for root, dirs, _ in os.walk(src_path):  
    for class_dir in dirs:
        
        print(class_dir)
        files = os.listdir(os.path.join(src_path, class_dir))
        #counter = 0
        
        # initialize new class in duration_log
        duration_log[class_dir] = []
        
        for file in files:
            #counter += 1

            sound_in = os.path.join(src_path,class_dir,file)
            
            # Saving duration
            sound_tensor, sr = torchaudio.load(sound_in)
            duration = librosa.core.get_duration(sound_tensor.numpy()[:,0], sr=sr)
            duration_log[class_dir].append(duration)
            
            #if not counter % 100: print(counter)
            
tn = time.time()
tt = tn-t0
print(tt/60)

air_conditioner
car_horn
children_playing
dog_bark
drilling
gun_shot
jack_hammer
siren
street_music
engine_idling
1.9357112367947897


In [2]:
class_colors = {"gun_shot":"#b00036",
                "street_music":"#ff8765",
                "siren":"#c35d03",
                "dog_bark":"#c2c331",
                "engine_idling":"#007008",
                "car_horn":"#00a069",
                "drilling":"#00b4f5",
                "air_conditioner":"#0164cd",
                "jack_hammer":"#bfa2ff",
                "children_playing":"#e44caa"
               }

def generate_histograms(input_dict,color_key):
    
    for key in input_dict.keys():
        
        plt.figure(figsize=(8, 6), dpi=400) # to create a new fig for each loop
        plt.hist(input_dict[key],color=color_key[key])
        
        n_obs = len(input_dict[key])
        title_class = key.replace('_',' ').title()
        plt.title(' '.join([title_class,"excerpt lengths","(n=" + str(n_obs) + ")"]))
        plt.ylabel("Number of observations")
        plt.xlabel("Length of excerpt in seconds")
        plt.tight_layout()
        savefig_name = ''.join(["./manuscript/figures/",key,'.png'])
        plt.savefig(savefig_name)
        plt.close()

In [46]:
generate_histograms(duration_log,class_colors)

# Image averages

In [3]:
def dir_avg(src_path):
    '''Generate the average image given a directory of images.'''
    
    # Access all PNG files in directory
    allfiles=os.listdir(src_path)
    imlist=[filename for filename in allfiles if  filename[-4:] in [".jpg",".JPG"]]
    
    # Assuming all images are the same size, get dimensions of first image
    img_open = ''.join([src_path,imlist[0]])
    w,h=Image.open(img_open).size
    N=len(imlist)
    
    # Create a numpy array of floats to store the average (assume RGB images)
    arr=np.zeros((h,w,3),np.float)
    
    # Build up average pixel intensities, casting each image as an array of floats
    for im in imlist:
        img_open = ''.join([src_path,im])
        imarr=np.array(Image.open(img_open),dtype=np.float)
        arr=arr+imarr/N
    
    # Round values in array and cast as 8-bit integer
    arr=np.array(np.round(arr),dtype=np.uint8)
    
    return arr

In [6]:
for target_class in list(class_colors.keys()):
    
    src_path = "/data/datasets/sound_datasets/pytorch_UrbanSound8K/image_80tr10va10te/trainset/" + target_class + "/"
    avg_img = dir_avg(src_path)
    avg_n = len([name for name in os.listdir(src_path)])

    plt.figure(figsize=(8, 6), dpi=400)
    plt.imshow(avg_img)
    title_class = target_class.replace('_',' ').title()
    plt.title(title_class+" average spectrogram"+" (n="+str(avg_n)+")")
    plt.ylabel("Frequency (Hz)")
    plt.yticks([])
    plt.xlabel("Time")
    plt.xticks([])
    plt.tight_layout()

    savefig_name = ''.join(["./manuscript/figures/",target_class,'_train_avg.png'])
    plt.savefig(savefig_name)
    plt.close()