In [8]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from magenta.models.music_vae import configs

import numpy as np
from magenta.models.music_vae.trained_model import TrainedModel # We need this class

def build_standalone_encoder(hparams, input_shape):
    """
    Builds a standard Keras model that replicates the custom BidirectionalLstmEncoder.
    
    Args:
        hparams: The same hparams object used by the original model.
        input_shape: The shape of a single input sequence (e.g., (96, 64)).
    """
    
    # 1. Define the input layer
    input_tensor = tf.keras.layers.Input(shape=input_shape, name='encoder_input')
    
    # 2. Re-create the stacked LSTM cells as Keras layers
    # The original code uses a shared cell for fw and bw. The modern equivalent
    # is to create one stack of LSTM layers and wrap it in Bidirectional.
    
    # Start with the input tensor
    x = input_tensor
    
    # Create the stack of LSTM layers
    # The `return_sequences=True` is crucial for all but the last layer in a stack.
    num_lstm_layers = len(hparams.enc_rnn_size)
    for i, layer_size in enumerate(hparams.enc_rnn_size):
        lstm_layer = tf.keras.layers.LSTM(
            units=layer_size, 
            return_sequences=False, # The last (and only) LSTM returns a single vector
            name='multi_rnn_cell/cell_0/lstm_cell' # CRITICAL: Match TF1 variable scope
        )
        
        bi_lstm = tf.keras.layers.Bidirectional(
            lstm_layer,
            name='bidirectional_rnn' # CRITICAL: Match TF1 variable scope
        )

        # To create the 'cell_0' scope, we wrap the Bidirectional layer in a tiny sub-model
        # This is the key to matching the checkpoint's variable names.
        bi_lstm_model = tf.keras.Model(inputs=x, outputs=bi_lstm(x), name=f'cell_{i}')
        x = bi_lstm_model(x)

    # 3. Replicate the flatten operation
    # The original code flattens, but since the last LSTM returns a single vector per direction,
    # the output of Bidirectional is already "flat" in the time dimension.
    
    # 4. Re-create the final Dense layers
    # This layer needs to be inside an 'encoder' scope, which we will achieve by wrapping the whole thing.
    mu_layer = tf.keras.layers.Dense(hparams.z_size, name='mu')
    
    # The final output of our model is just the embedding (mu)
    mu_output = mu_layer(x)
    
    # 5. Create and return the final Keras model
    # We name the final model 'encoder' to add the last required scope.
    encoder_model = tf.keras.Model(inputs=input_tensor, outputs=mu_output, name='encoder')
    
    print("--- Standalone Keras Encoder Built with FINAL Corrected Naming ---")
    encoder_model.summary()
    
    return encoder_model

# --- Example Usage ---

# You would get `your_hparams` from the `mel_2bar._config.hparams` object
mel_2bar_config = configs.CONFIG_MAP['cat-mel_2bar_big']

# Derive the input_shape from the config
# At present mel_2bar_config.data_converter.input_depth is 90 (for 88 piano keys + 2 possibly special tokens)
# and mel_2bar_config.hparams.max_seq_len is 32 (for 2 bars of 16 steps each)
encoder_input_shape = (mel_2bar_config.hparams.max_seq_len, mel_2bar_config.data_converter.input_depth)

standalone_encoder = build_standalone_encoder(mel_2bar_config.hparams, input_shape=encoder_input_shape)


--- Standalone Keras Encoder Built with FINAL Corrected Naming ---
Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_input (InputLayer)  [(None, 32, 90)]          0         
                                                                 
 cell_0 (Functional)         (None, 4096)              35045376  
                                                                 
 mu (Dense)                  (None, 512)               2097664   
                                                                 
Total params: 37,143,040
Trainable params: 37,143,040
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Use tensorflow.compat.v1 and disable V2 behavior

print("--- Step 1: Loading original TF1-style MusicVAE model ---")
mel_2bar_config = configs.CONFIG_MAP['cat-mel_2bar_big']
BASE_DIR = "models/download.magenta.tensorflow.org/models/music_vae"
checkpoint_path = BASE_DIR + '/checkpoints/mel_2bar_big.ckpt'
mel_2bar = TrainedModel(mel_2bar_config, batch_size=4, checkpoint_dir_or_path=checkpoint_path)
sess = mel_2bar._sess
print("Original model loaded successfully.")

print("\n--- Step 2: Building the new standalone TF2 Keras encoder ---")
encoder_input_shape = (mel_2bar_config.hparams.max_seq_len, mel_2bar_config.data_converter.input_depth)
standalone_encoder = build_standalone_encoder(mel_2bar_config.hparams, input_shape=encoder_input_shape)

print("\n--- Step 3: Starting weight transfer from TF1 session to TF2 model ---")
tf1_variables = sess.graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
tf1_variable_names = [v.name for v in tf1_variables]
tf1_variable_values = sess.run(tf1_variables)
tf1_weights_map = dict(zip(tf1_variable_names, tf1_variable_values))

# Iterate through the layers of our new Keras model to set their weights
for layer in standalone_encoder.layers:
    if layer.name == 'cell_0': # This is our Bidirectional sub-model
        for sub_layer in layer.layers:
            if isinstance(sub_layer, tf.keras.layers.Bidirectional):
                print(f"\nProcessing layer: {layer.name}/{sub_layer.name}")
                
                fw_kernel_name = 'encoder/cell_0/bidirectional_rnn/fw/multi_rnn_cell/cell_0/lstm_cell/kernel:0'
                fw_bias_name = 'encoder/cell_0/bidirectional_rnn/fw/multi_rnn_cell/cell_0/lstm_cell/bias:0'
                bw_kernel_name = 'encoder/cell_0/bidirectional_rnn/bw/multi_rnn_cell/cell_0/lstm_cell/kernel:0'
                bw_bias_name = 'encoder/cell_0/bidirectional_rnn/bw/multi_rnn_cell/cell_0/lstm_cell/bias:0'
                
                try:
                    # --- WEIGHT RE-ORDERING LOGIC ---
                    def reorder_lstm_weights(kernel, bias, num_units):
                        # TF1 format: [i, c, f, o] (input, cell, forget, output)
                        # TF2 format: [i, f, c, o] (input, forget, cell, output)
                        
                        # Split into the 4 gate weights
                        k_i, k_c, k_f, k_o = np.split(kernel, 4, axis=-1)
                        b_i, b_c, b_f, b_o = np.split(bias, 4, axis=-1)
                        
                        # Re-assemble in TF2 order (swap c and f)
                        reordered_kernel = np.concatenate([k_i, k_f, k_c, k_o], axis=-1)
                        reordered_bias = np.concatenate([b_i, b_f, b_c, b_o], axis=-1)
                        
                        return reordered_kernel, reordered_bias

                    # Get weights from checkpoint
                    fw_kernel_tf1, fw_bias_tf1 = tf1_weights_map[fw_kernel_name], tf1_weights_map[fw_bias_name]
                    bw_kernel_tf1, bw_bias_tf1 = tf1_weights_map[bw_kernel_name], tf1_weights_map[bw_bias_name]

                    # Re-order weights to match Keras's expected format
                    num_units = mel_2bar_config.hparams.enc_rnn_size[0]
                    fw_kernel_tf2, fw_bias_tf2 = reorder_lstm_weights(fw_kernel_tf1, fw_bias_tf1, num_units)
                    bw_kernel_tf2, bw_bias_tf2 = reorder_lstm_weights(bw_kernel_tf1, bw_bias_tf1, num_units)

                    # Split the re-ordered kernels for Keras
                    input_depth = mel_2bar_config.data_converter.input_depth
                    fw_input_kernel, fw_recurrent_kernel = np.split(fw_kernel_tf2, [input_depth], axis=0)
                    bw_input_kernel, bw_recurrent_kernel = np.split(bw_kernel_tf2, [input_depth], axis=0)
                    
                    # Assemble the final list of 6 weights
                    keras_weights = [
                        fw_input_kernel, fw_recurrent_kernel, fw_bias_tf2,
                        bw_input_kernel, bw_recurrent_kernel, bw_bias_tf2
                    ]
                    
                    sub_layer.set_weights(keras_weights)
                    print(f"  - Successfully re-ordered, split, and transferred 6 weights for Bidirectional LSTM.")

                except Exception as e:
                    print(f"  - FATAL ERROR: An exception occurred during weight transfer: {e}")

    if layer.name == 'mu' and isinstance(layer, tf.keras.layers.Dense):
        # This part remains the same
        print(f"\nProcessing layer: {layer.name}")
        kernel_name = f'encoder/mu/kernel:0'
        bias_name = f'encoder/mu/bias:0'
        try:
            layer.set_weights([tf1_weights_map[kernel_name], tf1_weights_map[bias_name]])
            print(f"  - Successfully transferred weights for Dense layer 'mu'.")
        except KeyError as e:
            print(f"  - FATAL ERROR: Could not find weight {e} for 'mu' layer!")

print("\n--- Step 4: Weight transfer complete. Saving the new Keras model. ---")
output_keras_model_path = 'models/music_vae_encoder_keras'
tf.keras.models.save_model(standalone_encoder, output_keras_model_path)
print(f"\nSuccessfully saved the new Keras model to: '{output_keras_model_path}'")



--- Step 1: Loading original TF1-style MusicVAE model ---
INFO:tensorflow:Building MusicVAE model with BidirectionalLstmEncoder, CategoricalLstmDecoder, and hparams:
{'max_seq_len': 32, 'z_size': 512, 'free_bits': 0, 'max_beta': 0.5, 'beta_rate': 0.99999, 'batch_size': 4, 'grad_clip': 1.0, 'clip_mode': 'global_norm', 'grad_norm_clip_to_zero': 10000, 'learning_rate': 0.001, 'decay_rate': 0.9999, 'min_learning_rate': 1e-05, 'conditional': True, 'dec_rnn_size': [2048, 2048, 2048], 'enc_rnn_size': [2048], 'dropout_keep_prob': 1.0, 'sampling_schedule': 'inverse_sigmoid', 'sampling_rate': 1000, 'use_cudnn': False, 'residual_encoder': False, 'residual_decoder': False, 'control_preprocessing_rnn_size': [256]}
INFO:tensorflow:
Encoder Cells (bidirectional):
  units: [2048]

INFO:tensorflow:
Decoder Cells:
  units: [2048, 2048, 2048]



  tf.layers.dense(
  self._kernel = self.add_variable(
  self._bias = self.add_variable(
  mu = tf.layers.dense(
  sigma = tf.layers.dense(


INFO:tensorflow:Restoring parameters from models/download.magenta.tensorflow.org/models/music_vae/checkpoints/mel_2bar_big.ckpt
Original model loaded successfully.

--- Step 2: Building the new standalone TF2 Keras encoder ---
--- Standalone Keras Encoder Built with FINAL Corrected Naming ---
Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_input (InputLayer)  [(None, 32, 90)]          0         
                                                                 
 cell_0 (Functional)         (None, 4096)              35045376  
                                                                 
 mu (Dense)                  (None, 512)               2097664   
                                                                 
Total params: 37,143,040
Trainable params: 37,143,040
Non-trainable params: 0
_________________________________________________________________

--- Step 3: Star

In [15]:
# Use tensorflow.compat.v1 and disable V2 behavior for the original model

# ==============================================================================
# 1. SETUP & MODEL LOADING
# ==============================================================================

print("--- Step 1: Loading original TF1-style MusicVAE model ---")
mel_2bar_config = configs.CONFIG_MAP['cat-mel_2bar_big']
BASE_DIR = "models/download.magenta.tensorflow.org/models/music_vae"
checkpoint_path = BASE_DIR + '/checkpoints/mel_2bar_big.ckpt'

# Use a batch size of 1 for easier comparison
BATCH_SIZE = 1
mel_2bar = TrainedModel(mel_2bar_config, batch_size=BATCH_SIZE, checkpoint_dir_or_path=checkpoint_path)
sess = mel_2bar._sess
print("Original model loaded.")

print("\n--- Step 2: Loading the new standalone Keras encoder ---")
keras_model_path = 'models/music_vae_encoder_keras'
# We can load the Keras model using the modern API
standalone_encoder = tf.keras.models.load_model(keras_model_path)
print("New Keras model loaded.")


# ==============================================================================
# 2. INFERENCE & COMPARISON
# ==============================================================================

print("\n--- Step 3: Generating a random input tensor ---")
seq_len = mel_2bar_config.hparams.max_seq_len
input_depth = mel_2bar_config.data_converter.input_depth
control_depth = mel_2bar_config.data_converter.control_depth # This will be 0
input_shape = (BATCH_SIZE, seq_len, input_depth)

random_input = np.random.rand(*input_shape).astype(np.float32)
print(f"Generated random input with shape: {random_input.shape}")

print("\n--- Step 4: Running inference on both models ---")

# --- Get embedding from the ORIGINAL TF1 model ---

# ** THE FIX IS HERE **
# Create an empty array for the `_controls` placeholder
empty_controls = np.zeros((BATCH_SIZE, seq_len, control_depth), dtype=np.float32)
print(f"Generated empty controls with shape: {empty_controls.shape}")

# Add the empty controls to the feed_dict
feed_dict = {
    mel_2bar._inputs: random_input,
    mel_2bar._inputs_length: [seq_len] * BATCH_SIZE,
    mel_2bar._controls: empty_controls # Add the required empty placeholder value
}
# `_mu` is the tensor that holds the embedding
original_embedding = sess.run(mel_2bar._mu, feed_dict)


# --- Get embedding from the NEW Keras model ---
keras_embedding = standalone_encoder.predict(random_input)


# ==============================================================================
# 3. VERIFICATION
# ==============================================================================

print("\n--- Step 5: Comparing the outputs ---")
print(f"Original model's embedding (first 5 values): {original_embedding[0, :5]}")
print(f"New Keras model's embedding (first 5 values):  {keras_embedding[0, :5]}")

are_close = np.allclose(original_embedding, keras_embedding, atol=1e-6)

print("\n--- VERIFICATION RESULT ---")
if are_close:
    print("✅ SUCCESS: The embeddings from both models are identical!")
    abs_diff = np.mean(np.abs(original_embedding - keras_embedding))
    print(f"   (Mean absolute difference: {abs_diff:.10f})")
else:
    print("❌ FAILURE: The embeddings do not match.")
    abs_diff = np.mean(np.abs(original_embedding - keras_embedding))
    print(f"   (Mean absolute difference: {abs_diff})")



--- Step 1: Loading original TF1-style MusicVAE model ---
INFO:tensorflow:Building MusicVAE model with BidirectionalLstmEncoder, CategoricalLstmDecoder, and hparams:
{'max_seq_len': 32, 'z_size': 512, 'free_bits': 0, 'max_beta': 0.5, 'beta_rate': 0.99999, 'batch_size': 1, 'grad_clip': 1.0, 'clip_mode': 'global_norm', 'grad_norm_clip_to_zero': 10000, 'learning_rate': 0.001, 'decay_rate': 0.9999, 'min_learning_rate': 1e-05, 'conditional': True, 'dec_rnn_size': [2048, 2048, 2048], 'enc_rnn_size': [2048], 'dropout_keep_prob': 1.0, 'sampling_schedule': 'inverse_sigmoid', 'sampling_rate': 1000, 'use_cudnn': False, 'residual_encoder': False, 'residual_decoder': False, 'control_preprocessing_rnn_size': [256]}
INFO:tensorflow:
Encoder Cells (bidirectional):
  units: [2048]

INFO:tensorflow:
Decoder Cells:
  units: [2048, 2048, 2048]

INFO:tensorflow:Restoring parameters from models/download.magenta.tensorflow.org/models/music_vae/checkpoints/mel_2bar_big.ckpt
Original model loaded.

--- Step 2:

  updates=self.state_updates,



--- Step 5: Comparing the outputs ---
Original model's embedding (first 5 values): [-0.9633615  1.2745852  1.700005  -1.7129043 -1.2040325]
New Keras model's embedding (first 5 values):  [-0.8934264  1.2289464  1.5655204 -1.4896607 -1.3244532]

--- VERIFICATION RESULT ---
❌ FAILURE: The embeddings do not match.
   (Mean absolute difference: 0.09183776378631592)
