In [1]:
from fcrbm_utilities import FCRBM, spectrogram_to_audio, process_audio_files

2025-08-04 11:39:38.281785: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import os
import librosa
import numpy as np
import tensorflow as tf

In [7]:
# ingest and process audio samples

audio_sample_dir = 'data/audio_segments/'

# define FFT parameters
n_fft = 2048
hop = 512
sr = 44100

# process audio

spectrogram_data_np, style_data_np, visible_dim = process_audio_files(audio_sample_dir, n_fft, hop, sr)

# Prepare history data (the previous spectrogram frame)
history_data_np = np.roll(spectrogram_data_np, shift=1, axis=0)
history_data_np[0] = 0 # The first frame has no previous frame, so its history is zero.

# Convert to TensorFlow tensors
v_data = tf.constant(spectrogram_data_np, dtype=tf.float32)
u_data = tf.constant(history_data_np, dtype=tf.float32)
y_data = tf.constant(style_data_np, dtype=tf.float32)

# Print the shapes to verify everything is correct
print("Data successfully loaded and preprocessed! ✅")
print(f"Visible data (spectrogram frames) shape: {v_data.shape}")
print(f"History data (previous frames) shape: {u_data.shape}")
print(f"Style data (one-hot vectors) shape: {y_data.shape}")
print(f"Visible layer dimension: {visible_dim}")
print(f"Number of distinct styles (style_dim): {y_data.shape[1]}")

Data successfully loaded and preprocessed! ✅
Visible data (spectrogram frames) shape: (13644, 1025)
History data (previous frames) shape: (13644, 1025)
Style data (one-hot vectors) shape: (13644, 9)
Visible layer dimension: 1025
Number of distinct styles (style_dim): 9




In [12]:
# match parameters defined in audio processing

visible_dim = int((n_fft / 2) + 1)
hidden_dim = 100
style_dim = 9 # number of segments we are trying to learn
history_dim = visible_dim # the autoregressive data

learning_rate = 0.001
epochs = 5
batch_size = 64

#instatiate FCRBM with correct parameters eventually

fcrbm = FCRBM(
    visible_dim=visible_dim,
    hidden_dim=hidden_dim,
    style_dim=style_dim,
    history_dim=visible_dim,
    k=1
)

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)


In [14]:
# v_data (spectrogram frames), u_data (history), y_data (style)

# create a TensorFlow Dataset from the tensors
dataset = tf.data.Dataset.from_tensor_slices((v_data, u_data, y_data)).shuffle(buffer_size=1000).batch(batch_size)

In [15]:
# training
for epoch in range(epochs):
    epoch_loss = 0
    num_batches = 0
    
    # iterate over the dataset in batches
    for v_batch, u_batch, y_batch in dataset:
        # Feed the batched data into the custom train_step method
        loss = fcrbm.train_step(v_batch, u_batch, y_batch, optimizer)
        epoch_loss += loss.numpy()
        num_batches += 1
    if num_batches > 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss / num_batches:.4f}")

2025-08-04 11:59:05.105751: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 1/5, Loss: 132.1753


2025-08-04 11:59:16.627233: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 2/5, Loss: 392.6080


2025-08-04 11:59:28.889370: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 3/5, Loss: 953.6532


2025-08-04 11:59:40.316318: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 4/5, Loss: 1794.4773
Epoch 5/5, Loss: 2866.2771


2025-08-04 11:59:53.418748: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
