# import statements

In [1]:
import numpy as np
import pandas as pd
from glob import glob
import librosa


# Loading in raw input data

In [7]:
# Load in the data from the specified directory
train_files = glob('train/*')

# Load the audio file using librosa
# y = audio time series

time_series = []
#load the files from the directory
for i in range(0, 800):
    y, _ = librosa.load(train_files[i])
    time_series.append(y)


# Basic feature extraction

In [3]:
def extract_features(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    spectral_centroid: the center of mass of the spectrum
    spectral rolloff: the frequency below which 85% of the magnitude distribution is concentrated
    spectral bandwidth: the width of the band of frequencies
    spectral contrast: the difference in amplitude between peaks and valleys in the spectrum
    spectral flatness: the flatness of a signal
    spectral rms: the root mean square of the signal
    """
    sr = 22050
    spectral_centroid = librosa.feature.spectral_centroid(y=series)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=series)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=series)
    spectral_contrast = librosa.feature.spectral_contrast(y=series)
    spectral_flatness = librosa.feature.spectral_flatness(y=series)
    rms = librosa.feature.rms(y=series)
    onset_env = librosa.onset.onset_strength(y=series, sr=sr)
    tempo, beats = librosa.beat.beat_track(y =series, sr=sr)
    beat_strengths = onset_env[beats]
    key = librosa.feature.chroma_stft(y=series, sr=sr)

    return spectral_centroid, spectral_rolloff, spectral_bandwidth, spectral_contrast, spectral_flatness, rms, tempo, beat_strengths, key

centroids = []
rolloffs = []
bandwidths = []
contrasts = []
flatnesses = []
rms = []
tempos = []
beat_strengths = []
keys = []


for i in range(0, len(time_series)):
    spectral_centroid, spectral_rolloff, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rms, tempo, beat_strength, key = extract_features(time_series[i])
    centroids.append(spectral_centroid)
    rolloffs.append(spectral_rolloff)
    bandwidths.append(spectral_bandwidth)
    contrasts.append(spectral_contrast)
    flatnesses.append(spectral_flatness)
    rms.append(spectral_rms)
    tempos.append(tempo)
    beat_strengths.append(beat_strength)
    keys.append(key)




In [None]:
def extract_MORE_features()

In [4]:
features =[centroids, rolloffs, bandwidths, contrasts, flatnesses, rms, tempos, beat_strengths, keys]
print(keys[0])


[[0.6202782  0.28463265 0.52313125 ... 0.48861602 0.4922129  0.32770872]
 [0.79212254 0.30917493 0.5053827  ... 0.236012   0.45460925 0.4201854 ]
 [1.         0.43742636 0.6827227  ... 0.2439473  0.6008393  0.7393849 ]
 ...
 [0.70094    0.56869745 0.61966157 ... 0.8171984  0.2146829  0.15337887]
 [0.5535064  0.41045326 0.62554777 ... 1.         0.45614347 0.3133688 ]
 [0.9245443  0.4391839  0.61112124 ... 0.7873888  0.59380823 0.46977058]]


# Export the Feature data

In [5]:
def aggregate_features(features):
    """
    stores the mean, standard deviation, max, and min of the features
    """
    means = []
    stds = []
    maxs = []
    mins = []
    for feature in features:
        means.append(np.mean(feature))
        stds.append(np.std(feature))
        maxs.append(np.max(feature))
        mins.append(np.min(feature))
    return means, stds, maxs, mins

def save_features(features, filename):
    """
    saves the features to a csv file
    """
    df = pd.DataFrame({filename: features})
    df.to_csv('features_train/' + filename + '.csv', index=False)

In [6]:
aggregates = []
for feature in features:
    mean, std, max, min = aggregate_features(feature)
    aggregates.append(mean)
    aggregates.append(std)
    aggregates.append(max)
    aggregates.append(min)
aggregates.append(tempos)
i = 0
for aggregate in aggregates:
    save_features(aggregate, 'feature_' + str(i))
    i += 1

