Reads audio files with ffmpeg, computes [melspectrograms](https://librosa.github.io/librosa/generated/librosa.feature.melspectrogram.html) and [Mel-frequency cepstral coefficients (MFCCs)](https://librosa.github.io/librosa/generated/librosa.feature.mfcc.html), and outputs them to a pickle file. 

Includes examples for audio playback and feature visualization.

In [None]:
""" `imageio_ffmpeg` contains a pre-built `ffmpeg` binary, needed for mp3 decoding by `librosa`. 
    It is installed as a custom package on Kaggle. If no `ffmpeg` binary is found in `/usr/local/bin` 
    then create a softlink to the `imageio_ffmpeg` binary. 
"""
import os
if not os.path.exists("/usr/local/bin/ffmpeg"): 
    import imageio_ffmpeg
    os.link(imageio_ffmpeg.get_ffmpeg_exe(), "/usr/local/bin/ffmpeg")

### Read Audio Files and Compute Data Transforms

In [None]:
import pandas as pd
import numpy as np
from librosa.display import specshow
from librosa import feature
from glob import glob
import os
from IPython.display import Audio
from matplotlib import pyplot as plt
from zipfile import ZipFile
from tqdm import tqdm


def get_full_path(sample): return os.path.join(sounds_dir, sample['file_name'])
sounds_dir = "../input/xenocanto-avian-vocalizations-canv-usa/xeno-canto-ca-nv/"
# sounds_dir = "../input/xeno-canto-ca-nv/"

df = pd.read_csv("../input/xenocanto-avian-vocalizations-canv-usa/xeno-canto_ca-nv_index.csv")
# df = pd.read_csv("../input/xeno-canto_ca-nv_index.csv")
files_list = glob(os.path.join(sounds_dir,"*.mp3"))
print("%i mp3 files in %s"%(len(files_list), sounds_dir))
print("%i samples in index."%len(df))
df.head()

In [None]:
# from tqdm import tqdm
# features = {}
# for sample_idx, sample in tqdm(df.iterrows(), total=len(df)):
#     data, samplerate = lr.load(get_full_path(sample))
#     features[sample['file_id']] = {
#         'melspectrogram': lr.feature.melspectrogram(data, sr=samplerate),
#         'mfcc': lr.feature.mfcc(data, sr=samplerate ),
#         'spectral_centroid': lr.feature.spectral_centroid(data, sr=samplerate),
#         'spectral_bandwidth': lr.feature.spectral_bandwidth(data, sr=samplerate),
#     }
import pickle
with open('../input/avian-vocalizations-pickled-spectrograms-and-mfcc/features.pickle', 'rb') as f:
    features = pickle.load(f)


In [None]:
feature_dir = 'features'
if not os.path.isdir(feature_dir): os.mkdir(feature_dir)

In [None]:
for file_id in tqdm(list(features)):
    sg = features[file_id]['melspectrogram']
    mm_sg = np.memmap(os.path.join(feature_dir,'XC%s_melspectrogram.dat'%file_id), 
                      shape=sg.shape, dtype='float32', mode='w+')
    mm_sg[:] = sg[:].astype('float32')
    
# with ZipFile("melspectrograms.zip",'a') as zf:
#     for file_id in features:
#         zf.write(os.path.join(feature_dir,'XC%s_melspectrogram.dat'%file_id))

In [None]:
# for file_id in tqdm(list(features)):
#     mfcc = features[file_id]['mfcc']
#     mm_mfcc = np.memmap(os.path.join(feature_dir,'XC%s_mfcc.dat'%file_id), 
#                       shape=mfcc.shape, dtype='float32', mode='w+')
#     mm_mfcc[:] = mfcc[:].astype('float32')
    
# with ZipFile("mfccs.zip",'a') as zf:
#     for file_id in features:
#         zf.write(os.path.join(feature_dir,'XC%s_mfcc.dat'%file_id))

In [None]:
def plot_spectral_center(center, bw=None, log=False):
    if log and bw is not None:
        bw = np.log(bw)
    plt.figure(figsize=(15,4))
    plt.bar(np.arange(len(center)), np.log(center) if log else center, 1, yerr=bw if bw is not None else np.zeros(len(center)));
    plt.ylabel("log(Hz)" if log else "HZ");
    title = "log(Spectral Centroid)" if log else "Spectral Centroid"
    if bw is not None:
        title += " +/- log(bandwidth)" if log else " +/- bandwidth"
    plt.title(title);

In [None]:
sample = df.iloc[7]
center = features[sample['file_id']]['spectral_centroid'][0]
bw = features[sample['file_id']]['spectral_bandwidth'][0]
plot_spectral_center(center, bw)
plt.show()
plot_spectral_center(center, bw, log=True)
plt.show()
print("log(center) mean: %.3f, std deviation: %.3f"%(np.mean(np.log(center)), np.std(np.log(center))))

In [None]:
spectral_centroids = [features[file_id]['spectral_centroid'][0] for file_id in list(features)]
spectral_bandwidths = [features[file_id]['spectral_bandwidth'][0] for file_id in list(features)]
# melspectrogram = [features[file_id]['melspectrogram'][0] for file_id in list(features)]
# mfcc = [features[file_id]['mfcc'][0] for file_id in list(features)]
melspectrogram = [np.memmap(os.path.join(feature_dir,'XC%s_melspectrogram.dat'%file_id), 
                           shape=features[file_id]['melspectrogram'].shape, dtype='float32') 
                  for file_id in list(features)]
# mfcc = [np.memmap(os.path.join(feature_dir,'XC%s_mfcc.dat'%file_id), 
#                            shape=features[file_id]['mfcc'].shape, dtype='float32') 
#                   for file_id in list(features)]
def log_clipped(a):
    return np.log(np.clip(a,.0000001,a.max()))
df['spectral_centroid_mean'] = [np.mean(c) for c in spectral_centroids]
df['spectral_centroid_max'] = [np.max(c) for c in spectral_centroids]
df['spectral_centroid_mmin'] = [np.min(c) for c in spectral_centroids]
df['spectral_centroid_std'] = [np.std(c) for c in spectral_centroids]
df['spectral_bandwidth_mean'] = [np.mean(b) for b in spectral_bandwidths]
df['spectral_bandwidth_max'] = [np.max(b) for b in spectral_bandwidths]
df['spectral_bandwidth_min'] = [np.min(b) for b in spectral_bandwidths]
df['spectral_bandwidth_std'] = [np.std(b) for b in spectral_bandwidths]
df['melspectrogram_mean'] = [np.mean(a) for a in melspectrogram]
df['melspectrogram_max'] = [np.max(a) for a in melspectrogram]
df['melspectrogram_min'] = [np.min(a) for a in melspectrogram]
df['melspectrogram_std'] = [np.std(a) for a in melspectrogram]
df['melspectrogram_log_mean'] = [np.mean(log_clipped(a)) for a in melspectrogram]
df['melspectrogram_log_max'] = [np.max(log_clipped(a)) for a in melspectrogram]
df['melspectrogram_log_min'] = [np.min(log_clipped(a)) for a in melspectrogram]
df['melspectrogram_log_std'] = [np.std(log_clipped(a)) for a in melspectrogram]
# df['mfcc_mean'] = [np.mean(a) for a in mfcc]
# df['mfcc_max'] = [np.max(a) for a in mfcc]
# df['mfcc_min'] = [np.min(a) for a in mfcc]
# df['mfcc_std'] = [np.std(a) for a in mfcc]

In [None]:
df['melspectrogram_mean'].mean(), df['melspectrogram_std'].mean()

In [None]:
sg_agg = np.concatenate([sg.flatten() for sg in melspectrogram])
sg_agg.shape, sg_agg.mean(), sg_agg.std()

In [None]:
df['melspectrogram_log_mean'].mean(), df['melspectrogram_log_std'].mean()

In [None]:
log_sg_agg = np.concatenate([log_clipped(sg.flatten()) for sg in melspectrogram])
log_sg_agg.shape, log_sg_agg.mean(), log_sg_agg.std()

In [None]:
# df['mfcc_mean'].mean(), df['mfcc_std'].mean()

In [None]:
# mfcc_agg = np.concatenate([sg.flatten() for sg in mfcc])
# mfcc_agg.shape, mfcc_agg.mean(), mfcc_agg.std()

#### Spectral Centroid

In [None]:
print("Mean spectral centroid of entire dataset:                      %6.0f"%df['spectral_centroid_mean'].mean())
print("Max sample-wise average spectral centroid of entire dataset:   %6.0f"%df['spectral_centroid_mean'].max())
print("Min sample-wise average spectral centroid of entire dataset:   %6.0f"%df['spectral_centroid_mean'].min())
print("Std deviation of sample-wise average of entire dataset         %6.0f"%df['spectral_centroid_mean'].std())

#### Spectral Bandwidth

In [None]:
print("Mean spectral bandwidth of entire dataset:                     %6.0f"%df['spectral_bandwidth_mean'].mean())
print("Max sample-wise average spectral bandwidth of entire dataset:  %6.0f"%df['spectral_bandwidth_mean'].max())
print("Min sample-wise average spectral bandwidth of entire dataset:  %6.0f"%df['spectral_bandwidth_mean'].min())
print("Std deviation of sample-wise average of entire dataset:        %6.0f"%df['spectral_bandwidth_mean'].std())

#### Mel Spectrogram Power Magnitude

In [None]:
print("Mean melspectrogram magnitude of entire dataset:                           %8.4f"%df['melspectrogram_mean'].mean())
print("Max sample-wise average melspectrogram magnitude of entire dataset:         %8.1f"%df['melspectrogram_mean'].max())
print("Min sample-wise average melspectrogram magnitude of entire dataset:         %8.4f"%df['melspectrogram_mean'].min())
print("Std dev of sample-wise average  melspectrogram magnitude of entire dataset: %8.4f"%df['melspectrogram_mean'].std())

#### MFCC statistics

In [None]:
# print("Mean MFCC of entire dataset:                               %9.4f"%df['mfcc_mean'].mean())
# print("Max sample-wise average MFCC of entire dataset:            %9.4f"%df['mfcc_mean'].max())
# print("Min sample-wise average MFCC of entire dataset:            %9.4f"%df['mfcc_mean'].min())
# print("Std dev of sample-wise average of MFCC of entire dataset:  %9.4f"%df['mfcc_mean'].std())

In [None]:
specshow(log_clipped(melspectrogram[7]))

In [None]:
melspec_shapes = [ features[file_id]['melspectrogram'].shape for file_id in features]
df['melspectrogram_shape'] = melspec_shapes[:]

# mfcc_shapes = [ features[file_id]['mfcc'].shape for file_id in features]
# df['mfcc_shape'] = mfcc_shapes[:]

shapes_df = pd.DataFrame([{'file_id':file_id, 
                            'melspectrogram_shape': features[file_id]['melspectrogram'].shape,
#                             'mfcc_shape': features[file_id]['mfcc'].shape,
                          } 
                for file_id in features ])
shapes_df.to_csv("feature_shapes.csv")
shapes_df.head()

In [None]:
stats_df = pd.DataFrame([{
    'melspectrogram_mean': sg_agg.mean(),
    'melspectrogram_std': sg_agg.std(),
#     'mfcc_mean': sg_agg.mean(),
#     'mfcc_std': mfcc_agg.std(),
    'log_melspec_mean': log_sg_agg.mean(),
    'log_melspec_std': log_sg_agg.std(),
}])
stats_df.to_csv('stats.csv')
stats_df

Now that we have pixel means and variances, let's make some room on the dies, and then output normalized data.

In [None]:
# Remove files so that Keggle kernel output will save (Avoid Too many files error)
for f in glob(feature_dir+"/*"): os.remove(f)

In [None]:
for file_id in tqdm(list(features)):
    sg = features[file_id]['melspectrogram']
    mm_sg = np.memmap(os.path.join(feature_dir,'XC%s_melspectrogram_logscaled_normalized.dat'%file_id), 
                      shape=sg.shape, dtype='float32', mode='w+')
    # Subtract the mean and divide by standard deviation
    mm_sg[:] = ((log_clipped(sg[:])-float(stats_df['log_melspec_mean']))/float(stats_df['log_melspec_std'])).astype('float32')

with ZipFile("melspectrograms_logscaled_normalized.zip",'a') as zf:
    zf.write("stats.csv")
    zf.write("feature_shapes.csv")
    for file_id in features:
        zf.write(os.path.join(feature_dir,'XC%s_melspectrogram_logscaled_normalized.dat'%file_id))
        
# Remove files so that Keggle kernel output will save (Avoid Too many files error)
for f in glob(feature_dir+'/*'): os.remove(f)
os.removedirs(['features'])
    
# for file_id in tqdm(list(features)):
#     mfcc = features[file_id]['mfcc']
#     mm_mfcc = np.memmap(os.path.join(feature_dir,'XC%s_mfcc.dat'%file_id), 
#                       shape=mfcc.shape, dtype='float32', mode='w+')
#     # Subtract the mean and divide by standard deviation
#     mm_mfcc[:] = ((mfcc[:]-float(stats_df['mfcc_mean']))/float(stats_df['mfcc_std'])).astype('float32')
    
    
# with ZipFile("mfccs_normalized.zip",'a') as zf:
#     zf.write("stats.csv")
#     zf.write("feature_shapes.csv")
#     for file_id in features:
#         zf.write(os.path.join(feature_dir,'XC%s_mfcc.dat'%file_id))
        
# for f in glob('features/*'): os.remove(f)

In [None]:
# Remove files so that Keggle kernel output will save (Avoid Too many files error)
for f in glob(feature_dir+'/*'): os.remove(f)
os.removedirs(feature_dir)

In [None]:
os.listdir()