In [3]:
# import libraries
import numpy as np
import pandas as pd
import scipy
import librosa
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import os

In [5]:
data_dir = 'data/'
wav_files = os.listdir(data_dir)
samp_rate = 22050
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

In [6]:
# create feature dataframe
feature_names = ["idx", "mean", "stdev", "skew", "kurtosis", "zcr_mean", "zcr_stdev",
                 "rmse_mean", "rmse_stdev", "tempo"] + \
                ['mfccs_' + str(i+1) + '_mean' for i in range(20)] + \
                ['mfccs_' + str(i+1) + '_stdev' for i in range(20)] + \
                ['chroma_' + str(i+1) + '_mean' for i in range(12)] + \
                ['chroma_' + str(i+1) + '_stdev' for i in range(12)] + \
                ["centroid_mean", "centroid_stdev"] + \
                ['contrast_' + str(i+1) + '_mean' for i in range(7)] + \
                ['contrast_' + str(i+1) + '_std' for i in range(7)] + \
                ["rolloff_mean", "rolloff_stdev", "genre"]

param_names = feature_names[1:-1]
label_names = feature_names[-1]

feature_frame = pd.DataFrame(columns=feature_names)

In [7]:
# populate feature dataframe

for wav in wav_files:

    # separate genre and idx from filename
    genre = "".join([i for i in wav[:-4] if i.isalpha()])
    idx = int("".join([i for i in wav[:-4] if i.isdigit()]))
    label = genres.index(genre)

    # extract waveform
    y, sr = librosa.load(data_dir + wav, sr = samp_rate)

    # calculate features

    # spectral moments
    mean = np.mean(abs(y))
    stdev = np.std(y)
    skew = scipy.stats.skew(abs(y))
    kurtosis = scipy.stats.kurtosis(y)

    # zero crossing
    zcr = librosa.feature.zero_crossing_rate(y + 0.0001, frame_length=2048, hop_length=512)[0]
    zcr_mean = np.mean(zcr)
    zcr_stdev = np.std(zcr)

    # root mean squared energy
    rmse = librosa.feature.rms(y + 0.0001)[0]
    rmse_mean = np.mean(rmse)
    rmse_stdev = np.mean(rmse)

    # tempo
    tempo = librosa.beat.tempo(y, sr=sr)

    # Mel-Frequency cepstral coefficients
    mfccs = librosa.feature.mfcc(y, sr=sr, n_mfcc=20)
    mfccs_mean = np.mean(mfccs, axis=1)
    mfccs_stdev = np.std(mfccs, axis=1)

    # chroma vector data
    chroma = librosa.feature.chroma_stft(y, sr=sr, hop_length=1024)
    chroma_mean = np.mean(chroma, axis=1)
    chroma_stdev = np.std(chroma, axis=1)

    # spectral centroids
    spectral_centroids = librosa.feature.spectral_centroid(y+0.01, sr=sr)[0]
    centroid_mean = np.mean(spectral_centroids)
    centroid_stdev = np.std(spectral_centroids)

    # spectral contrast
    spectral_contrast = librosa.feature.spectral_contrast(y, sr=sr, n_bands = 6, fmin = 200.0)
    contrast_mean = np.mean(spectral_contrast, axis=1)
    contrast_stdev = np.std(spectral_contrast, axis=1)

    # spectral rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y+0.01, sr=sr, roll_percent = 0.85)[0]
    rolloff_mean = np.mean(spectral_rolloff)
    rolloff_stdev = np.std(spectral_rolloff)
    
    # Build feature vector
    feature_vect = []
    feature_vect.append(idx)
    feature_vect.extend([mean, stdev, skew, kurtosis])
    feature_vect.extend([zcr_mean, zcr_stdev])
    feature_vect.extend([rmse_mean, rmse_stdev])
    feature_vect.extend(tempo)
    feature_vect.extend(mfccs_mean)
    feature_vect.extend(mfccs_stdev)
    feature_vect.extend(chroma_mean)
    feature_vect.extend(chroma_stdev)
    feature_vect.extend([centroid_mean, centroid_stdev])
    feature_vect.extend(contrast_mean)
    feature_vect.extend(contrast_stdev)
    feature_vect.extend([rolloff_mean, rolloff_stdev])
    feature_vect.append(genre)

    # update dataframe
    feature_frame = feature_frame.append(pd.DataFrame(feature_vect, index=feature_names).transpose(), ignore_index=True)

In [8]:
# save features to csv
feature_frame.to_csv('features.csv', index=False)

In [5]:
# read features from csv
features = pd.read_csv('features.csv')

In [11]:
# reduce dimensionality using PCA
params = features.loc[:, param_names].values
labels = features.loc[:, label_names].values

params_norm = StandardScaler().fit_transform(params)

ValueError: could not convert string to float: '[123.046875]'