# import statements

In [13]:
import numpy as np
import pandas as pd
from glob import glob
import librosa

# For preprocessing
import wave
import os

# for avoiding bad audio files
from soundfile import LibsndfileError, SoundFileRuntimeError

# Loading in raw input data

In [52]:
# Load in the data from the specified directory
raw_train_files = glob('train/*')

# Load the audio file using librosa
# y = audio time series

# Method for splits valid data files
def split_data(input_dir, output_dir, segment_duration):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.endswith('.wav'):
                input_file = os.path.join(root, file)
                try:
                    with wave.open(input_file, 'rb') as wf:
                        framerate = wf.getframerate()
                        nframes = wf.getnframes()
                        # Gets the full length of the audio in seconds
                        duration = nframes / framerate

                        # Calc frames per seg
                        frames_per_seg = int(segment_duration * framerate)
                        num_segments = int(duration / segment_duration)

                        # split the file into segments
                        for i in range(num_segments):
                            start_frame = i * frames_per_seg
                            end_frame = (i + 1) * frames_per_seg

                            # Read audio data for this segment
                            segment_data = wf.readframes(frames_per_seg)

                            # Write the segment to a new file
                            output_file = os.path.join(output_dir, f'{os.path.splitext(os.path.basename(input_file))[0]}_segment_{i}.wav')
                            with wave.open(output_file, 'wb') as segment_wf:
                                segment_wf.setparams(wf.getparams())
                                segment_wf.writeframes(segment_data)
                except Exception as e:
                    print(f"Error splitting audio file {input_file}: {e}")
    return output_dir

train_files_dir = split_data("train", "split_output", 5)
train_files = glob(train_files_dir + "/*")
time_series = []
#load the files from the directory
for filename in train_files:
    try:
        y, _ = librosa.load(filename)
        # print("Iteration " + str(i) + " success")
        time_series.append(y)
    except Exception as e:
        print(f"Error loading audio file {filename}: {e}")

Error splitting audio file train/train312.wav: file does not start with RIFF id


In [53]:
# print(time_series)
print(time_series[0])
print(len(time_series[0]))
# Method to break up data into smaller parts

new_time_series = []
new_labels = []
train = pd.read_csv('train.csv')
y_train = train['Genre']
for index in range(len(time_series)):
    for start_time in range(26):
        new_time_series.append(time_series[index][start_time * 22050 : (start_time + 5) * 22050])
        new_labels.append(y_train[index])



[-0.02783203  0.03942871  0.01895142 ...  0.04376221  0.04745483
  0.00982666]
110250


KeyError: 800

# Basic feature extraction

In [54]:
def extract_features(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    spectral_centroid: the center of mass of the spectrum
    spectral rolloff: the frequency below which 85% of the magnitude distribution is concentrated
    spectral bandwidth: the width of the band of frequencies
    spectral contrast: the difference in amplitude between peaks and valleys in the spectrum
    spectral flatness: the flatness of a signal
    spectral rms: the root mean square of the signal
    """
    sr = 22050
    spectral_centroid = librosa.feature.spectral_centroid(y=series)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=series)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=series)
    spectral_contrast = librosa.feature.spectral_contrast(y=series)
    spectral_flatness = librosa.feature.spectral_flatness(y=series)
    rms = librosa.feature.rms(y=series)
    onset_env = librosa.onset.onset_strength(y=series, sr=sr)
    tempo, beats = librosa.beat.beat_track(y =series, sr=sr)
    beat_strengths = onset_env[beats]
    key = librosa.feature.chroma_stft(y=series, sr=sr)

    return spectral_centroid, spectral_rolloff, spectral_bandwidth, spectral_contrast, spectral_flatness, rms, tempo, beat_strengths, key

centroids = []
rolloffs = []
bandwidths = []
contrasts = []
flatnesses = []
rms = []
tempos = []
beat_strengths = []
keys = []


for i in range(0, len(time_series)):
    spectral_centroid, spectral_rolloff, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rms, tempo, beat_strength, key = extract_features(time_series[i])
    centroids.append(spectral_centroid)
    rolloffs.append(spectral_rolloff)
    bandwidths.append(spectral_bandwidth)
    contrasts.append(spectral_contrast)
    flatnesses.append(spectral_flatness)
    rms.append(spectral_rms)
    tempos.append(tempo)
    beat_strengths.append(beat_strength)
    keys.append(key)




In [55]:
def extract_MORE_features(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    zero_crossing_rate: the rate of sign changes in the signal
    mfcc: Mel-frequency cepstral coefficients
    """
    zero_rate = librosa.feature.zero_crossing_rate(y=series)
    mfcc = librosa.feature.mfcc(y=series)


    return zero_rate, mfcc

zero_rates = []
mfccs = []

for i in range(0, len(time_series)):
    zero_rate, mfcc = extract_MORE_features(time_series[i])
    zero_rates.append(zero_rate)
    mfccs.append(mfcc)


In [57]:
features =[centroids, rolloffs, bandwidths, contrasts, flatnesses, rms, tempos, beat_strengths, keys, zero_rates, mfccs]
print(keys[0])


[[1.         1.         0.38291848 ... 0.43263552 0.37777305 0.39717636]
 [0.22910109 0.20459473 0.08079255 ... 0.28775373 0.22603354 0.26838958]
 [0.03219749 0.03264437 0.0251236  ... 0.37318882 0.22551163 0.20922756]
 ...
 [0.72905344 0.95025206 1.         ... 0.8150928  0.83707166 0.9120585 ]
 [0.3821179  0.4050591  0.38146168 ... 1.         1.         1.        ]
 [0.25871077 0.20428272 0.09758538 ... 0.59401184 0.5136056  0.49116886]]


# Export the Feature data

In [58]:
def aggregate_features(features):
    """
    stores the mean, standard deviation, max, and min of the features
    """
    means = []
    stds = []
    maxs = []
    mins = []
    for feature in features:
        means.append(np.mean(feature))
        stds.append(np.std(feature))
        maxs.append(np.max(feature))
        mins.append(np.min(feature))
    return means, stds, maxs, mins

def save_features(features, filename):
    """
    saves the features to a csv file
    """
    df = pd.DataFrame({filename: features})
    df.to_csv('features_test/' + filename + '.csv', index=False)

In [59]:
aggregates = []
for feature in features:
    mean, std, max, min = aggregate_features(feature)
    aggregates.append(mean)
    aggregates.append(std)
    aggregates.append(max)
    aggregates.append(min)
aggregates.append(tempos)
feature_count = 0
for aggregate in aggregates:
    save_features(aggregate, 'feature_' + str(feature_count))
    feature_count += 1



In [60]:
def evenMOREfeatures(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    chroma: the chroma of the signal
    """
    tonnetz = librosa.feature.tonnetz(y=series)
    chroma = librosa.feature.chroma_stft(y=series)
    harmonic, percussive = librosa.effects.hpss(y=series)
    harmonic_chroma = librosa.feature.chroma_cqt(y=harmonic)
    percussive_tempo, _ = librosa.beat.beat_track(y=percussive)

    return tonnetz, chroma, harmonic_chroma, percussive_tempo

tonnetzs = []
chromas = []
harmonic_chromas = []
percussive_tempos = []

for i in range(0, len(time_series)):
    tonnetz, chroma, harmonic_chroma, percussive_tempo = evenMOREfeatures(time_series[i])
    tonnetzs.append(tonnetz)
    chromas.append(chroma)
    harmonic_chromas.append(harmonic_chroma)
    percussive_tempos.append(percussive_tempo)


In [61]:
aggregate_more = []
for feature in [tonnetzs, chromas, harmonic_chromas, percussive_tempos]:
    mean, std, max, min = aggregate_features(feature)
    aggregate_more.append(mean)
    aggregate_more.append(std)
    aggregate_more.append(max)
    aggregate_more.append(min)
    
for aggregate in aggregate_more:
    save_features(aggregate, 'feature_' + str(i))
    feature_count += 1


In [62]:
def more_features(series):
    """
    Uses Librosa to extract features from the time series.
    series: list of floats
    returns:
    mfcc_delta: the change in mfcc
    """
    mfcc_delta = librosa.feature.delta(librosa.feature.mfcc(y=series))

    return mfcc_delta

mfcc_deltas = []

for i in range(0, len(time_series)):
    mfcc_delta = more_features(time_series[i])
    mfcc_deltas.append(mfcc_delta)

aggregate_mfcc_delta = []
for feature in [mfcc_deltas]:
    mean, std, max, min = aggregate_features(feature)
    aggregate_mfcc_delta.append(mean)
    aggregate_mfcc_delta.append(std)
    aggregate_mfcc_delta.append(max)
    aggregate_mfcc_delta.append(min)
feature_count = 61
for aggregate in aggregate_mfcc_delta:
    save_features(aggregate, 'feature_' + str(feature_count))
    feature_count += 1