Idea: what would happen if we tried to classify music using one of the well-understood object classification networks, such as the application networks built into Keras? We would need to find some meaningful way of representing musical features in a 299x299 (or 224x224) pixel image. What rendering might work? Spectrally shaded audio waveforms include so much information it can be possible for a DJ to identify and mix in a new track without ever having listened to it. It seems plausible that a rendering designed for fine-grained style comparison might be even more informative.

In [None]:
import os, os.path
import numpy as np
from musictoys import audiofile, analysis

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (16,3)

In [None]:
audio_files = ["jfb-back_home.wav", "kronfeld-dreamatic.wav", "liberty_chaps-get_up_get_down.wav"]
audio_files = [os.path.join("audio_files", f) for f in audio_files]
audio_clips = [analysis.normalize(*audiofile.read(f)) for f in audio_files]
audio_clips = [data for data, samplerate in audio_clips if samplerate==22050]

In [None]:
clip = audio_clips[0]
print clip.shape, clip.min(), clip.max(), clip.mean()


In [None]:
def plotspectrogram(spec):
    #plt.set_cmap('afmhot')
    plt.imshow(spec.T, interpolation='nearest', aspect='auto')
    plt.gca().invert_yaxis()

In [None]:
def printspecrange(spec, name):
    print "%s range = %.3f..%.3f; mean=%.3f, stdev=%.3f" % (name, spec.min(), spec.max(), spec.mean(), spec.std())


In [None]:
specgram = np.absolute(analysis.stft(clip, 2048, 1024))
printspecrange(specgram, "spectrogram")
# Convert to power spectrum.
powerspec = np.square(specgram)
printspecrange(powerspec, "power spectrum")
# Get loudness, convert power to decibels.
loudspec = 10.0 * np.log10(powerspec)
printspecrange(loudspec, "loudness (dB)")
# Discard everything below 120 dB and rescale to 0..1.
levelspec = np.clip(1.0 + loudspec / 120.0, 0, 1)
printspecrange(levelspec, "normalized")
plotspectrogram(levelspec)


In [None]:
import librosa, librosa.display
mfccs = librosa.feature.mfcc(y=audio_clips[0], sr=22050, n_mfcc=13)
plotspectrogram(mfccs.T)

In [None]:
plotspectrogram(librosa.feature.mfcc(y=audio_clips[1], sr=22050, n_mfcc=13).T)

In [None]:
plotspectrogram(librosa.feature.mfcc(y=audio_clips[2], sr=22050, n_mfcc=13).T)