In [2]:
import glob
import librosa
import matplotlib.pyplot as plt
import numpy as np
import os
import soundfile as sf
from tqdm import tqdm

The frequency range of right whales moans is typically between 100 and 500 Hz, with most energy concentrated aorund 200 Hz. The moans are usually produced at a fundamental frequency of around 100 Hz, but contain a series of harmonics and ovetones that extend up to 500 Hz.

the spectrogram generated from the audio files will be filtered to only include frequencies in the range of 0 Hz to 500 Hz.

In [3]:
def get_images(samples,sr,output_path):
	hop_length = 40
	S = librosa.feature.melspectrogram(
		y=samples,
		sr=sr,
		n_fft=200,
		hop_length=hop_length,
		n_mels=50,
		fmax=500
	)
	S_db = librosa.power_to_db(
		S, 
		ref=np.max
	)
	plt.subplots(figsize=(2,2))
	librosa.display.specshow(
		S_db,
		x_axis='time',
		y_axis='linear',
		sr=sr,
		hop_length=hop_length, 
	)
	plt.axis('off')
	plt.tight_layout()
	plt.savefig(
		output_path, 
		dpi=150, 
		format='png' , 
		bbox_inches='tight', 
		pad_inches=0
	)
	plt.close()


In [4]:
output_path= './train_png/'
if not os.path.exists(output_path):
	os.makedirs(output_path)

source_path= '../audio/data/train/'
if not os.path.exists(source_path):
	raise ValueError('Can not find the audio files, make sure you have followed the steps in the audio folder readme')

train_files= glob.glob(source_path+'train*.aiff')
train_files[:5]

['../audio/data/train/train8696.aiff',
 '../audio/data/train/train86.aiff',
 '../audio/data/train/train24166.aiff',
 '../audio/data/train/train13215.aiff',
 '../audio/data/train/train9984.aiff']

In [5]:
for input_file in tqdm(train_files,desc='Proccesing files'):
	audio_data, sr = sf.read(input_file)
	output_file = os.path.join(
		output_path, 
		os.path.splitext(os.path.basename(input_file))[0] + '.png'
	)
	get_images(audio_data, sr, output_file)

Proccesing files: 100%|██████████| 30000/30000 [1:07:11<00:00,  7.44it/s]
