# import statements

In [1]:
import numpy as np
import pandas as pd
from glob import glob
import librosa


# Loading in raw input data

In [2]:
# Load in the data from the specified directory
train_files = glob('train/*')

# Load the audio file using librosa
# y = audio time series

time_series = []
#load the files from the directory
for i in range(0, 1):
    y, _ = librosa.load(train_files[i])
    time_series.append(y)


In [3]:
# print(time_series)
print(time_series[0])
print(len(time_series[0]))
# Method to break up data into smaller parts

new_time_series = []
new_labels = []
train = pd.read_csv('train.csv')
y_train = train['Genre']
for index in range(len(time_series)):
    for start_time in range(26):
        new_time_series.append(time_series[index][start_time * 22050 : (start_time + 5) * 22050])
        new_labels.append(y_train[index])



[-0.02728271 -0.03622437 -0.04992676 ...  0.04800415  0.03561401
  0.03475952]
661504


# Basic feature extraction

In [11]:
def extract_features(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    spectral_centroid: the center of mass of the spectrum
    spectral rolloff: the frequency below which 85% of the magnitude distribution is concentrated
    spectral bandwidth: the width of the band of frequencies
    spectral contrast: the difference in amplitude between peaks and valleys in the spectrum
    spectral flatness: the flatness of a signal
    spectral rms: the root mean square of the signal
    """
    sr = 22050
    spectral_centroid = librosa.feature.spectral_centroid(y=series)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=series)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=series)
    spectral_contrast = librosa.feature.spectral_contrast(y=series)
    spectral_flatness = librosa.feature.spectral_flatness(y=series)
    rms = librosa.feature.rms(y=series)
    onset_env = librosa.onset.onset_strength(y=series, sr=sr)
    tempo, beats = librosa.beat.beat_track(y =series, sr=sr)
    beat_strengths = onset_env[beats]
    key = librosa.feature.chroma_stft(y=series, sr=sr)

    return spectral_centroid, spectral_rolloff, spectral_bandwidth, spectral_contrast, spectral_flatness, rms, tempo, beat_strengths, key

centroids = []
rolloffs = []
bandwidths = []
contrasts = []
flatnesses = []
rms = []
tempos = []
beat_strengths = []
keys = []


for i in range(0, len(time_series)):
    spectral_centroid, spectral_rolloff, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rms, tempo, beat_strength, key = extract_features(time_series[i])
    centroids.append(spectral_centroid)
    rolloffs.append(spectral_rolloff)
    bandwidths.append(spectral_bandwidth)
    contrasts.append(spectral_contrast)
    flatnesses.append(spectral_flatness)
    rms.append(spectral_rms)
    tempos.append(tempo)
    beat_strengths.append(beat_strength)
    keys.append(key)

#Print the size of the features
print(len(rms[0][0]))



1293


In [14]:
def extract_MORE_features(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    zero_crossing_rate: the rate of sign changes in the signal
    mfcc: Mel-frequency cepstral coefficients
    """
    zero_rate = librosa.feature.zero_crossing_rate(y=series)
    mfcc = librosa.feature.mfcc(y=series)


    return zero_rate, mfcc

zero_rates = []
mfccs = []

for i in range(0, len(time_series)):
    zero_rate, mfcc = extract_MORE_features(time_series[i])
    zero_rates.append(zero_rate)
    mfccs.append(mfcc)


In [15]:
features =[centroids, rolloffs, bandwidths, contrasts, flatnesses, rms, tempos, beat_strengths, keys, zero_rates, mfccs]
print(keys[0])


[[0.6870512  0.8685882  0.9635946  ... 0.37126568 0.51484257 0.59994495]
 [0.47481188 0.5558943  0.58297354 ... 0.44635394 0.43633562 0.44802457]
 [0.45623416 0.48858538 0.47255298 ... 0.46851033 0.4473988  0.49899974]
 ...
 [0.28546423 0.29619884 0.43726018 ... 0.23843835 0.6287394  0.7065811 ]
 [0.4030487  0.47109658 0.5946139  ... 0.19010578 0.47997636 0.5546902 ]
 [0.6411027  0.98321337 1.         ... 0.2709052  0.5394487  0.6598817 ]]


# Export the Feature data

In [33]:
def aggregate_features(features):
    """
    stores the mean, standard deviation, max, and min of the features
    """
    means = []
    stds = []
    maxs = []
    mins = []
    for feature in features:
        means.append(np.mean(feature))
        stds.append(np.std(feature))
        maxs.append(np.max(feature))
        mins.append(np.min(feature))
    return means, stds, maxs, mins

def save_features(features, filename):
    """
    saves the features to a csv file
    """
    df = pd.DataFrame({filename: features})
    df.to_csv('features_test/' + filename + '.csv', index=False)

In [17]:
aggregates = []
for feature in features:
    mean, std, max, min = aggregate_features(feature)
    aggregates.append(mean)
    aggregates.append(std)
    aggregates.append(max)
    aggregates.append(min)
aggregates.append(tempos)
feature_count = 0
for aggregate in aggregates:
    save_features(aggregate, 'feature_' + str(feature_count))
    feature_count += 1



In [25]:
def evenMOREfeatures(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    chroma: the chroma of the signal
    """
    tonnetz = librosa.feature.tonnetz(y=series)
    chroma = librosa.feature.chroma_stft(y=series)
    harmonic, percussive = librosa.effects.hpss(y=series)
    harmonic_chroma = librosa.feature.chroma_cqt(y=harmonic)
    percussive_tempo, _ = librosa.beat.beat_track(y=percussive)

    return tonnetz, chroma, harmonic_chroma, percussive_tempo

tonnetzs = []
chromas = []
harmonic_chromas = []
percussive_tempos = []

for i in range(0, len(time_series)):
    tonnetz, chroma, harmonic_chroma, percussive_tempo = evenMOREfeatures(time_series[i])
    tonnetzs.append(tonnetz)
    chromas.append(chroma)
    harmonic_chromas.append(harmonic_chroma)
    percussive_tempos.append(percussive_tempo)


In [26]:
aggregate_more = []
for feature in [tonnetzs, chromas, harmonic_chromas, percussive_tempos]:
    mean, std, max, min = aggregate_features(feature)
    aggregate_more.append(mean)
    aggregate_more.append(std)
    aggregate_more.append(max)
    aggregate_more.append(min)
    
for aggregate in aggregate_more:
    save_features(aggregate, 'feature_' + str(i))
    feature_count += 1


In [34]:
def more_features(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    mfcc_delta: the change in mfcc
    """
    mfcc_delta = librosa.feature.delta(librosa.feature.mfcc(y=series))

    return mfcc_delta

mfcc_deltas = []

for i in range(0, len(time_series)):
    mfcc_delta = more_features(time_series[i])
    mfcc_deltas.append(mfcc_delta)

aggregate_mfcc_delta = []
for feature in [mfcc_deltas]:
    mean, std, max, min = aggregate_features(feature)
    aggregate_mfcc_delta.append(mean)
    aggregate_mfcc_delta.append(std)
    aggregate_mfcc_delta.append(max)
    aggregate_mfcc_delta.append(min)
feature_count = 61
for aggregate in aggregate_mfcc_delta:
    save_features(aggregate, 'feature_' + str(feature_count))
    feature_count += 1