In [1]:
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import time
import pandas as pd
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [2]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    #chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    logmel = np.mean(np.log(librosa.feature.melspectrogram(X, sr=sample_rate).T),axis=0)
    #contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    #tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,logmel

def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
    features, labels, namePath = np.empty((0,168)), np.empty(0), np.empty(0)
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            mfccs, logmel = extract_feature(fn)
            ext_features = np.hstack([mfccs,logmel])
            features = np.vstack([features,ext_features])
            labels = np.append(labels, fn.split('/')[2].split('-')[1])
            namePath = np.append(namePath, fn)
    return np.array(features), np.array(labels, dtype = np.int), np.array(namePath, dtype=np.string_)

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

def one_hot_decode(labels):
    n_labels, n_unique_labels = np.shape(labels)
    print n_unique_labels
    one_hot_decode = np.zeros((n_labels))
    for idx in range(n_labels):
        for l in range(n_unique_labels):
            if(labels[idx, l] == 1):
                one_hot_decode[idx] = l
                break;
    return one_hot_decode

In [None]:
x = extract_feature("audio/fold1/7061-6-0-0.wav")
nfMfccs = np.shape(x[0])[0]
nfChr = np.shape(x[1])[0]
nfMel = np.shape(x[2])[0]
nfCon = np.shape(x[3])[0]
nfTon = np.shape(x[4])[0]

In [None]:
features = np.load("nn_features_fold_12345.npy", allow_pickle=True)
labels = np.load("nn_labels_fold_12345.npy", allow_pickle=True)
file_names = np.load("nn_file_names_fold_12345.npy", allow_pickle=True)

In [None]:
mfccs_col = ["mfcc_" + str(i) for i in range(nfMfccs)]
chroma_stft_col = ["chroma_" + str(i) for i in range(nfChr)]
melspec_col = ["melspec_" + str(i) for i in range(nfMel)]
contrast_col = ["contrast_" + str(i) for i in range(nfCon)]
tonnetz_col = ["tonnetz_" + str(i) for i in range(nfTon)]

In [None]:
features_df = pd.read_pickle("nn_features_df_fold_12345.pkl")
norm_data_df = pd.read_pickle("nn_data_df_fold_12345.pkl")
labels_df = pd.read_pickle("nn_labels_df_fold_12345.pkl")

In [None]:
norm_data_df.head()

## Univariate distributions

#### MFCC

In [None]:
fig = plt.figure(figsize=(50, 50))
for g in range(1,nfMfccs+1):
    ax = fig.add_subplot(8,5,g)
    sns.distplot(norm_data_df["mfcc_" + str(g-1)], ax=ax)

#### Chroma

In [None]:
fig = plt.figure(figsize=(35, 30))
for g in range(1,nfChr+1):
    ax = fig.add_subplot(3,4,g)
    sns.distplot(norm_data_df["chroma_" + str(g-1)], ax=ax)
fig.tight_layout()

#### Log MelSpec

In [None]:
for f in range(8):
    fig = plt.figure(figsize=(35, 30))
    for sp in range(16):
        f_num = f*16+sp
        ax = fig.add_subplot(4,4,sp+1)
        sns.distplot(np.log(features_df["melspec_" + str(f_num)]), ax=ax)
    fig.tight_layout()

#### Spectral contrast

In [None]:
fig = plt.figure(figsize=(10, 10))
for g in range(1,nfCon+1):
    ax = fig.add_subplot(4,2,g)
    sns.distplot(norm_data_df["contrast_" + str(g-1)], ax=ax)
fig.tight_layout()

#### Tonnetz

In [None]:
fig = plt.figure(figsize=(10, 8))
for g in range(1,nfTon+1):
    ax = fig.add_subplot(3,2,g)
    sns.distplot(norm_data_df["tonnetz_" + str(g-1)], ax=ax)
fig.tight_layout()

## Bivariate distribution

#### MFCC

In [None]:
import random

In [None]:
f1 = "mfcc_" + str(random.randrange(nfMfccs))
f2 = "mfcc_" + str(random.randrange(nfMfccs))

In [None]:

g = sns.FacetGrid(norm_data_df, hue="Label", size=8)
colors = sns.color_palette("Set2", 10)
g.map(plt.scatter, f1, f2, s=5, cmap=colors)
g.add_legend()

In [None]:
f1 = "mfcc_" + str(random.randrange(nfMfccs))
sns.boxplot(x="Label", y=f1, data=norm_data_df)

In [None]:
f1 = "mfcc_" + str(random.randrange(nfMfccs))
f1 = "mfcc_6"
sns.swarmplot(x="Label", y=f1, data=norm_data_df);

In [None]:
mfcc_mel_features = np.delete(features, obj=range(180,193),axis=1)
mfcc_mel_features = np.delete(mfcc_mel_features, obj=range(40,52),axis=1)
mfcc_logmel_features = np.copy(mfcc_mel_features)
mfcc_logmel_features[:,40:168] = np.log(mfcc_logmel_features[:,40:168] )
mfcc_logmel_features_and_filenames = np.concatenate((mfcc_logmel_features, file_names[:,np.newaxis]), axis=1)
train_x1, test_x1, train_y, test_y = train_test_split(mfcc_logmel_features_and_filenames, labels, test_size=0.3, random_state=5, stratify=xlabels)
train_x = train_x1[:,0:-1]
test_x = test_x1[:,0:-1]