In [1]:
import librosa
import neuro
import random
import numpy as np
import pandas as pd
import risp
import eons
import json
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
import wave
import os 
from scipy.signal import find_peaks


# original params
if False:
    eo_params = {
        "starting_nodes": 3,
        "starting_edges": 6,
        "merge_rate": 0,
        "population_size": 100,
        "multi_edges": 0,
        "crossover_rate": 0.5,
        "mutation_rate": 0.9,
        "selection_type": "tournament",
        "tournament_size_factor": 0.1,
        "tournament_best_net_factor": 0.9,
        "random_factor": 0.05,
        "num_mutations": 3,
        "node_mutations": { "Threshold": 1.0 },
        "net_mutations": { },
        "edge_mutations": { "Weight": 0.5, "Delay": 0.5 },
        "num_best" : 4
    }

eo_params = {
    "merge_rate": 0.1,
    "population_size": 100,
    "multi_edges": 0,
    "crossover_rate": 0.5,
    "mutation_rate": 0.8,
    "selection_type": "tournament",
    "tournament_size_factor": 0.1,
    "tournament_best_net_factor": 0.9,
    "random_factor": 0.05,
    "num_mutations": 4,
    "node_mutations": { "Threshold": 1.0 },
    "net_mutations": { },
    "edge_mutations": { "Weight": 0.5, "Delay": 0.5 },
    "num_best" : 4
}



In [2]:
import wave
import librosa
import numpy as np
import os 
from scipy.signal import find_peaks

unique_sample_rates = np.array([])
min_song_len = float('inf')

directory = 'audio_database'


def mel_binary_mean(mel_spectrum):
    # Computes the row-wise mean values (the mean of each channel)
    channel_means = np.mean(mel_spectrum, axis=1)

    # Applies thresholding to create binary representation
    binary_spectrum = np.where(mel_spectrum >= channel_means[:, np.newaxis], 1, 0)

    return binary_spectrum


def find_peaks_per_channel(spectrum, threshold=0):
    peaks = np.zeros_like(spectrum)
    
    for i in range(spectrum.shape[0]):
        channel_data = spectrum[i, :]
        channel_peaks, _ = find_peaks(channel_data, height=threshold)
        peaks[i, channel_peaks] = 1
    
    return peaks

# Create a new folder to save the npy files
new_folder = "npy_files"
new_folder_path = os.path.join(directory, new_folder)
os.makedirs(new_folder_path, exist_ok=True)

for folder_name in os.listdir(directory):
    unique_song_len = np.array([])

    folder_path = os.path.join(directory, folder_name)
    if os.path.isdir(folder_path):  # Check if the item is a directory
        for filename in os.listdir(folder_path):
            if filename.endswith('.wav'):
                try: 
                    file_path = os.path.join(folder_path, filename)
                    audio_signal, sample_rate = librosa.load(file_path, sr=None)

                    unique_sample_rates = np.append(unique_sample_rates, sample_rate)
                    
                    n_fft = 2048
                    hop_length = n_fft // 16
                    magnitude_spectrum = np.abs(librosa.stft(audio_signal, n_fft=n_fft, hop_length=hop_length))
                    num_mels = 8
                    mel_spectrum = librosa.feature.melspectrogram(
                        sr=sample_rate,
                        S=magnitude_spectrum,
                        n_fft=n_fft,
                        hop_length=hop_length,
                        n_mels=num_mels
                    )
                    #mel_spectrum = mel_spectrum[:, :min_song_len]
                    unique_song_len = np.append(unique_song_len, np.shape(mel_spectrum[0]))
                    peak_spectrogram = mel_binary_mean(mel_spectrum) #<---------- to change encoder 
                    #peak_spectrogram = peak_spectrogram[:, :min_song_len]

                    output_filename = f"{filename}_peak_spectrogram.npy"  # Move this line here
                    output_path = os.path.join(new_folder_path, output_filename)
                    np.save(output_path, peak_spectrogram)
                    print(f"Processed {filename} in folder {folder_name}. Saved peak_spectrogram as {output_filename}")
                    
                    
                    if min_song_len > len(peak_spectrogram[1]):
                        min_song_len = len(peak_spectrogram[1])

                except Exception as e:
                    print(f"Error processing {filename} in folder {folder_name}: {str(e)}")
                    continue
        print(f"unique song length in {folder_name} is {np.unique(unique_song_len)}")

min_song_len = int(min_song_len)        
print(f"unique sample rates for all genres {np.unique(unique_sample_rates)}")
print(f"minimum song length is {min_song_len}")

Processed pop.00027.wav in folder pop. Saved peak_spectrogram as pop.00027.wav_peak_spectrogram.npy
Processed pop.00033.wav in folder pop. Saved peak_spectrogram as pop.00033.wav_peak_spectrogram.npy
Processed pop.00032.wav in folder pop. Saved peak_spectrogram as pop.00032.wav_peak_spectrogram.npy
Processed pop.00026.wav in folder pop. Saved peak_spectrogram as pop.00026.wav_peak_spectrogram.npy
Processed pop.00030.wav in folder pop. Saved peak_spectrogram as pop.00030.wav_peak_spectrogram.npy
Processed pop.00024.wav in folder pop. Saved peak_spectrogram as pop.00024.wav_peak_spectrogram.npy
Processed pop.00018.wav in folder pop. Saved peak_spectrogram as pop.00018.wav_peak_spectrogram.npy
Processed pop.00019.wav in folder pop. Saved peak_spectrogram as pop.00019.wav_peak_spectrogram.npy
Processed pop.00025.wav in folder pop. Saved peak_spectrogram as pop.00025.wav_peak_spectrogram.npy
Processed pop.00031.wav in folder pop. Saved peak_spectrogram as pop.00031.wav_peak_spectrogram.npy


  return f(*args, **kwargs)


Error processing jazz.00054.wav in folder jazz: 
Processed jazz.00040.wav in folder jazz. Saved peak_spectrogram as jazz.00040.wav_peak_spectrogram.npy
Processed jazz.00068.wav in folder jazz. Saved peak_spectrogram as jazz.00068.wav_peak_spectrogram.npy
Processed jazz.00097.wav in folder jazz. Saved peak_spectrogram as jazz.00097.wav_peak_spectrogram.npy
Processed jazz.00083.wav in folder jazz. Saved peak_spectrogram as jazz.00083.wav_peak_spectrogram.npy
Processed jazz.00082.wav in folder jazz. Saved peak_spectrogram as jazz.00082.wav_peak_spectrogram.npy
Processed jazz.00096.wav in folder jazz. Saved peak_spectrogram as jazz.00096.wav_peak_spectrogram.npy
Processed jazz.00069.wav in folder jazz. Saved peak_spectrogram as jazz.00069.wav_peak_spectrogram.npy
Processed jazz.00041.wav in folder jazz. Saved peak_spectrogram as jazz.00041.wav_peak_spectrogram.npy
Processed jazz.00055.wav in folder jazz. Saved peak_spectrogram as jazz.00055.wav_peak_spectrogram.npy
Processed jazz.00043.wav

In [3]:
'''
def load_npy_files_with_prefix(directory, prefix, min_song_len):
    npy_files = [file for file in os.listdir(directory) if file.startswith(prefix) and file.endswith('.npy')]
    npy_files.sort()  # Sort the files for consistent order

    if len(npy_files) == 0:
        raise ValueError(f"No npy files found with prefix '{prefix}' in directory '{directory}'")
    loaded_data = []

    
    for npy_file in npy_files:
        npy_path = os.path.join(directory, npy_file)
        data = np.load(npy_path)

        #trim the data array to the desired shape (min_song_len)
        if len(data[1]) > min_song_len:
            trimmed_data = data[:, :min_song_len]
            loaded_data.append(trimmed_data)
        else:
            loaded_data.append(data)
            
    return np.array(loaded_data)
'''

'\ndef load_npy_files_with_prefix(directory, prefix, min_song_len):\n    npy_files = [file for file in os.listdir(directory) if file.startswith(prefix) and file.endswith(\'.npy\')]\n    npy_files.sort()  # Sort the files for consistent order\n\n    if len(npy_files) == 0:\n        raise ValueError(f"No npy files found with prefix \'{prefix}\' in directory \'{directory}\'")\n    loaded_data = []\n\n    \n    for npy_file in npy_files:\n        npy_path = os.path.join(directory, npy_file)\n        data = np.load(npy_path)\n\n        #trim the data array to the desired shape (min_song_len)\n        if len(data[1]) > min_song_len:\n            trimmed_data = data[:, :min_song_len]\n            loaded_data.append(trimmed_data)\n        else:\n            loaded_data.append(data)\n            \n    return np.array(loaded_data)\n'

In [4]:
'''
directory = new_folder_path

# Loading hiphop songs
X_hiphop = load_npy_files_with_prefix(directory = directory, prefix = 'hiphop', min_song_len=min_song_len)
y_hiphop = ['hiphop'] * len(X_hiphop)

# Loading country songs
X_country = load_npy_files_with_prefix(directory = directory, prefix = 'country', min_song_len=min_song_len)
y_country = ['country'] * len(X_country)

# Combining the data and labels
X = np.concatenate((X_hiphop, X_country), axis=0)
y = np.concatenate((y_hiphop, y_country), axis=0)

X = (np.rint(X)).astype(int)
'''




"\ndirectory = new_folder_path\n\n# Loading hiphop songs\nX_hiphop = load_npy_files_with_prefix(directory = directory, prefix = 'hiphop', min_song_len=min_song_len)\ny_hiphop = ['hiphop'] * len(X_hiphop)\n\n# Loading country songs\nX_country = load_npy_files_with_prefix(directory = directory, prefix = 'country', min_song_len=min_song_len)\ny_country = ['country'] * len(X_country)\n\n# Combining the data and labels\nX = np.concatenate((X_hiphop, X_country), axis=0)\ny = np.concatenate((y_hiphop, y_country), axis=0)\n\nX = (np.rint(X)).astype(int)\n"

In [7]:
def load_npy_files(directory, min_song_len):
    npy_files = [file for file in os.listdir(directory) if file.endswith('.npy')]
    npy_files.sort()  # Sort the files for consistent order

    if len(npy_files) == 0:
        raise ValueError(f"No npy files found in directory '{directory}'")
    loaded_data = []
    labels = []

    for npy_file in npy_files:
        npy_path = os.path.join(directory, npy_file)
        data = np.load(npy_path)

        #trim the data array to the desired shape (min_song_len)
        if len(data[1]) > min_song_len:
            trimmed_data = data[:, :min_song_len]
            loaded_data.append(trimmed_data)
        else:
            loaded_data.append(data)

        # Extract label from the file name by splitting on underscore
        label = npy_file.split('.')[0]
        labels.append(label)
            
    return np.array(loaded_data), labels

directory = new_folder_path
min_song_len = 100  # replace with your value

# Loading all songs
X, y = load_npy_files(directory = directory, min_song_len=min_song_len)

X = (np.rint(X)).astype(int)
labels = np.unique(y)

print("There are " + str(len(labels)) + " labels: " + str(labels))



There are 10 labels: ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']


In [9]:
# Defining the number of folds (tunable perameter)
n_folds = 5

# Creating a KFold object
kf = KFold(n_splits=n_folds)

train_results = []
test_results = []

y = np.array(y)  # Convert list to NumPy array

# Iterate over each fold
for train_index, test_index in kf.split(X):
    # Split the data into training and testing sets for this fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]




In [None]:
'''
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=7)

labels = np.unique(y_train)
dmin = [np.min(X_train[i]) for i in range(X_train.shape[0])]
dmax = [np.max(X_train[i]) for i in range(X_train.shape[0])]

print(labels)

'''


['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']


# selecting sample scale 
for i in range(len(X_train)):
    # mel region (20 total) corresponds to id 
    for j in range(len(X_train[i])):
        # time bin selection 
        for k in range(len(X_train[i][j])): 
            if X_train[i][j][k] != 0:
                spike = neuro.Spike(id=j,time=0,value=X_train[i][j][k])
                proc .... 


In [10]:
risp_config = {
  "leak_mode": True,
  "min_weight": -1,
  "max_weight": 1,
  "min_threshold": -1,
  "max_threshold": 1,
  "max_delay": 5
}


proc = risp.Processor(risp_config)

temp_net = neuro.Network()
temp_net.set_properties(proc.get_network_properties())

In [11]:
def create_neuron(neuron_id, net, moa):
    neuron = net.add_node(neuron_id)
    temp_net.randomize_node_properties(moa, neuron)
    return neuron

In [12]:
n_inputs = num_mels
n_hidden = 200
n_outputs = len(labels)
n_neurons = n_inputs+n_hidden+n_outputs
n_synapses = 1200
seed = 42

moa = neuro.MOA()
moa.seed(seed)
random.seed(seed)

In [13]:
for i in range(n_inputs):
    neuron = create_neuron(i, temp_net, moa)
    neuron.set("Threshold",0.75)
    temp_net.add_input(neuron.id)
    
for i in range(n_outputs):
    neuron = create_neuron(i+n_inputs, temp_net, moa)
    neuron.set("Threshold",0.75)
    temp_net.add_output(neuron.id)
    
for i in range(n_hidden):
    neuron = create_neuron(i+n_inputs+n_outputs, temp_net, moa)




In [14]:
for i in range(n_synapses):
    source = random.randint(0,n_neurons-1)
    dest = random.randint(0,n_neurons-1)
    synapse = temp_net.add_or_get_edge(source, dest)
    temp_net.randomize_edge_properties(moa, synapse)

In [15]:
evolver = eons.EONS(eo_params)
evolver.set_template_network(temp_net)

pop = evolver.generate_population(eo_params,1)

In [16]:
def get_prediction(x):
    #print(x)
    #proc.clear_activity()
    for j in range(len(x)):
            # time bin selection 
            for k in range(len(x[j])): 
                if x[j][k] != 0:
                    spike = neuro.Spike(id=j,time=k,value=x[j][k])
                    #spike = neuro.Spike(id=j,time=0,value=x[j][k])
                    proc.apply_spike(spike)
    proc.run(10000)
    return labels[proc.output_count_max(n_outputs)[0]]

In [17]:
def fitness(net, X, y):
    proc.load_network(net)
    
    # Set up output tracking
    for i in range(n_outputs):
        proc.track_neuron_events(i)
    
    y_predict = [get_prediction(x) for x in X]
    #print(len(y_predict))
    return accuracy_score(y_predict, y)

In [22]:
vals = []
for i in range(50):
    # Calculate the fitnesses of all of the networks in the population
    fitnesses = [fitness(net.network, X_train, y_train) for net in pop.networks]
    # Track the best performing network throughout and print the current best result
    max_fit = max(fitnesses)
    mean_fit = np.mean(fitnesses)
    #print(fitnesses)
    vals.append(max_fit)
    print("Epoch ", i, " : ","max fit", max_fit, "mean fit",mean_fit)
    
    # Create the next population based on the fitnesses of the current population
    pop = evolver.do_epoch(pop, fitnesses, eo_params)

Epoch  0  :  max fit 0.18875 mean fit 0.18128749999999996
Epoch  1  :  max fit 0.18875 mean fit 0.17982499999999998
Epoch  2  :  max fit 0.18875 mean fit 0.17793749999999997
Epoch  3  :  max fit 0.18875 mean fit 0.17749999999999996
Epoch  4  :  max fit 0.18875 mean fit 0.18053749999999993
Epoch  5  :  max fit 0.19 mean fit 0.17871249999999997
Epoch  6  :  max fit 0.19 mean fit 0.17971250000000003
Epoch  7  :  max fit 0.195 mean fit 0.17828749999999996
Epoch  8  :  max fit 0.195 mean fit 0.18012499999999998
Epoch  9  :  max fit 0.195 mean fit 0.18085
Epoch  10  :  max fit 0.195 mean fit 0.1825375
Epoch  11  :  max fit 0.195 mean fit 0.18427500000000005
Epoch  12  :  max fit 0.195 mean fit 0.18345
Epoch  13  :  max fit 0.19625 mean fit 0.18377500000000005
Epoch  14  :  max fit 0.19625 mean fit 0.18257500000000004
Epoch  15  :  max fit 0.19625 mean fit 0.18362500000000004
Epoch  16  :  max fit 0.19625 mean fit 0.18519999999999995
Epoch  17  :  max fit 0.19625 mean fit 0.183725


In [20]:
best_net = pop.networks[fitnesses.index(max_fit)].network
train = fitness(best_net, X_train, y_train)
print("Training Accuracy: ", train)
test = fitness(best_net, X_test, y_test)
print("Testing Accuracy: ", test)

# Store the results
train_results.append(train)
test_results.append(test)


Training Accuracy:  0.1775
Testing Accuracy:  0.0


10 Genre Classifier with K-Fold Cross Classifier 