In [None]:
import tensorflow.compat.v1 as tf
import numpy as np
import os
from magenta.models.music_vae import configs
from magenta.models.music_vae.trained_model import TrainedModel


# ======================================================================
#  1. SETUP: Configuration and Environment
# ======================================================================

# The original MusicVAE model requires a TF1 compatibility environment.
tf.disable_v2_behavior()


mel_2bar_config = configs.CONFIG_MAP['cat-mel_2bar_big']

BASE_DIR="models/download.magenta.tensorflow.org/models/music_vae"
MUSICVAE_CHECKPOINT_DIR= BASE_DIR + '/checkpoints/mel_2bar_big.ckpt'

BATCH_SIZE = 1


# Path to your saved Keras standalone encoder model
KERAS_MODEL_PATH = 'models/music_vae_encoder_keras'
# Path to your newly created TFLite model
TFLITE_MODEL_PATH = 'models/music_vae_encoder_tf2.tflite'

# --- Model-specific tensor names for the original MusicVAE ---
# (These are based on our previous explorations)
MUSICVAE_INPUT_TENSOR_NAME = "Placeholder_2:0"
MUSICVAE_INPUT_LENGTH_NAME = "Placeholder_1:0"
MUSICVAE_OUTPUT_TENSOR_NAME = "encoder/mu/BiasAdd:0"


# ======================================================================
#  2. LOAD MODELS: Load each of the three models
# ======================================================================

print("--- Loading Models ---")

# --- Model A: Original MusicVAE (from Checkpoint) ---
print("Loading original MusicVAE model...")
graph_a = tf.Graph()
sess_a = tf.compat.v1.Session(graph=graph_a)
with graph_a.as_default(), sess_a.as_default():
    # Load the MusicVAE model
    mel_2bar = TrainedModel(mel_2bar_config, batch_size=BATCH_SIZE, checkpoint_dir_or_path=MUSICVAE_CHECKPOINT_DIR)



# --- Model B: Standalone Encoder (from Keras .h5 file) ---
print(f"\nLoading Keras model from: {KERAS_MODEL_PATH}...")
# We load this into its own graph and session to keep it isolated.
graph_b = tf.Graph()
sess_b = tf.compat.v1.Session(graph=graph_b)
with graph_b.as_default(), sess_b.as_default():
    keras_encoder = tf.keras.models.load_model(KERAS_MODEL_PATH)
print("Keras model loaded successfully.")


# --- Model C: TFLite Encoder (from .tflite file) ---
print(f"\nLoading TFLite model from: {TFLITE_MODEL_PATH}...")
# The TFLite interpreter is independent of TF sessions and graphs.
interpreter = tf.lite.Interpreter(model_path=TFLITE_MODEL_PATH)
interpreter.allocate_tensors()
# Get input and output details for later use
tflite_input_details = interpreter.get_input_details()
tflite_output_details = interpreter.get_output_details()
print("TFLite model loaded successfully.")


# ======================================================================
#  3. PREPARE INPUT: Create a single, common input sequence
# ======================================================================

print("\n--- Step 3: Generating a random input tensor ---")
seq_len = mel_2bar_config.hparams.max_seq_len
input_depth = mel_2bar_config.data_converter.input_depth
control_depth = mel_2bar_config.data_converter.control_depth # This will be 0
input_shape = (BATCH_SIZE, seq_len, input_depth)

random_input = np.random.rand(*input_shape).astype(np.float32)
print(f"Generated random input with shape: {random_input.shape}")

# Create an empty array for the `_controls` placeholder
empty_controls = np.zeros((BATCH_SIZE, seq_len, control_depth), dtype=np.float32)
print(f"Generated empty controls with shape: {empty_controls.shape}")

# Add the empty controls to the feed_dict
feed_dict = {
    mel_2bar._inputs: random_input,
    mel_2bar._inputs_length: [seq_len] * BATCH_SIZE,
    mel_2bar._controls: empty_controls # Add the required empty placeholder value
}
# `_mu` is the tensor that holds the embedding
original_embedding = sess_a.run(mel_2bar._mu, feed_dict)


# ======================================================================
#  4. RUN INFERENCE: Get embeddings from all three models
# ======================================================================

print("\n--- Running Inference ---")

# --- Get Embedding A: MusicVAE ---
with graph_a.as_default():
    musicvae_embedding = sess_a.run(
        MUSICVAE_OUTPUT_TENSOR_NAME,
        feed_dict={
            MUSICVAE_INPUT_TENSOR_NAME: common_input_sequence,
            MUSICVAE_INPUT_LENGTH_NAME: sequence_length
        }
    )
print("Got embedding from MusicVAE.")

# --- Get Embedding B: Keras Encoder ---
with graph_b.as_default():
    keras_embedding = sess_b.run(
        keras_encoder.output,
        feed_dict={
            keras_encoder.input: common_input_sequence
        }
    )
print("Got embedding from Keras model.")

# --- Get Embedding C: TFLite Encoder ---
interpreter.set_tensor(tflite_input_details[0]['index'], common_input_sequence)
interpreter.invoke()
tflite_embedding = interpreter.get_tensor(tflite_output_details[0]['index'])
print("Got embedding from TFLite model.")


# ======================================================================
#  5. COMPARE RESULTS: Calculate and display the differences
# ======================================================================

print("\n--- Comparing Embeddings ---")

# Print the first 5 values of each embedding for a visual check
print(f"MusicVAE Embedding (sample): {musicvae_embedding[0, :5]}")
print(f"Keras Embedding (sample):   {keras_embedding[0, :5]}")
print(f"TFLite Embedding (sample):  {tflite_embedding[0, :5]}")

# Calculate the Euclidean distance between the embeddings
dist_musicvae_vs_keras = np.linalg.norm(musicvae_embedding - keras_embedding)
dist_keras_vs_tflite = np.linalg.norm(keras_embedding - tflite_embedding)
dist_musicvae_vs_tflite = np.linalg.norm(musicvae_embedding - tflite_embedding)

print("\n--- Embedding Distances (Euclidean) ---")
print(f"Distance (MusicVAE vs. Keras):   {dist_musicvae_vs_keras:.6f}")
print(f"Distance (Keras vs. TFLite):     {dist_keras_vs_tflite:.6f}")
print(f"Distance (MusicVAE vs. TFLite):  {dist_musicvae_vs_tflite:.6f}")

print("\n--- Analysis ---")
if dist_keras_vs_tflite < 1e-5:
    print("✅ The Keras and TFLite models produce nearly identical embeddings. The conversion was successful.")
else:
    print("⚠️ The Keras and TFLite models show a numerical difference. This can be due to quantization or optimizations during conversion.")

if dist_musicvae_vs_keras < 1e-5:
     print("✅ The original MusicVAE and the Keras standalone encoder produce nearly identical embeddings.")
else:
     print(f"ℹ️ The original MusicVAE and Keras models have a notable difference (distance: {dist_musicvae_vs_keras:.4f}). This confirms the discrepancy we observed previously.")



--- Loading Models ---
Loading original MusicVAE model...
INFO:tensorflow:Building MusicVAE model with BidirectionalLstmEncoder, CategoricalLstmDecoder, and hparams:
{'max_seq_len': 32, 'z_size': 512, 'free_bits': 0, 'max_beta': 0.5, 'beta_rate': 0.99999, 'batch_size': 1, 'grad_clip': 1.0, 'clip_mode': 'global_norm', 'grad_norm_clip_to_zero': 10000, 'learning_rate': 0.001, 'decay_rate': 0.9999, 'min_learning_rate': 1e-05, 'conditional': True, 'dec_rnn_size': [2048, 2048, 2048], 'enc_rnn_size': [2048], 'dropout_keep_prob': 1.0, 'sampling_schedule': 'inverse_sigmoid', 'sampling_rate': 1000, 'use_cudnn': False, 'residual_encoder': False, 'residual_decoder': False, 'control_preprocessing_rnn_size': [256]}
INFO:tensorflow:
Encoder Cells (bidirectional):
  units: [2048]

INFO:tensorflow:
Decoder Cells:
  units: [2048, 2048, 2048]



  tf.layers.dense(
  self._kernel = self.add_variable(
  self._bias = self.add_variable(


INFO:tensorflow:Restoring parameters from models/download.magenta.tensorflow.org/models/music_vae/checkpoints/mel_2bar_big.ckpt


  mu = tf.layers.dense(
  sigma = tf.layers.dense(



Loading Keras model from: models/music_vae_encoder_keras...
Keras model loaded successfully.

Loading TFLite model from: models/music_vae_encoder_tf2.tflite...
TFLite model loaded successfully.

--- Step 3: Generating a random input tensor ---
Generated random input with shape: (1, 32, 90)
Generated empty controls with shape: (1, 32, 0)


In [None]:
original_embedding = sess_a.run(mel_2bar._mu, feed_dict)


# ======================================================================
#  4. RUN INFERENCE: Get embeddings from all three models
# ======================================================================

print("\n--- Running Inference ---")

# --- Get Embedding A: MusicVAE ---
with graph_a.as_default():
    musicvae_embedding = sess_a.run(
        MUSICVAE_OUTPUT_TENSOR_NAME,
        feed_dict={
            MUSICVAE_INPUT_TENSOR_NAME: common_input_sequence,
            MUSICVAE_INPUT_LENGTH_NAME: sequence_length
        }
    )
print("Got embedding from MusicVAE.")

# --- Get Embedding B: Keras Encoder ---
with graph_b.as_default():
    keras_embedding = sess_b.run(
        keras_encoder.output,
        feed_dict={
            keras_encoder.input: common_input_sequence
        }
    )
print("Got embedding from Keras model.")

# --- Get Embedding C: TFLite Encoder ---
interpreter.set_tensor(tflite_input_details[0]['index'], common_input_sequence)
interpreter.invoke()
tflite_embedding = interpreter.get_tensor(tflite_output_details[0]['index'])
print("Got embedding from TFLite model.")


# ======================================================================
#  5. COMPARE RESULTS: Calculate and display the differences
# ======================================================================

print("\n--- Comparing Embeddings ---")

# Print the first 5 values of each embedding for a visual check
print(f"MusicVAE Embedding (sample): {musicvae_embedding[0, :5]}")
print(f"Keras Embedding (sample):   {keras_embedding[0, :5]}")
print(f"TFLite Embedding (sample):  {tflite_embedding[0, :5]}")

# Calculate the Euclidean distance between the embeddings
dist_musicvae_vs_keras = np.linalg.norm(musicvae_embedding - keras_embedding)
dist_keras_vs_tflite = np.linalg.norm(keras_embedding - tflite_embedding)
dist_musicvae_vs_tflite = np.linalg.norm(musicvae_embedding - tflite_embedding)

print("\n--- Embedding Distances (Euclidean) ---")
print(f"Distance (MusicVAE vs. Keras):   {dist_musicvae_vs_keras:.6f}")
print(f"Distance (Keras vs. TFLite):     {dist_keras_vs_tflite:.6f}")
print(f"Distance (MusicVAE vs. TFLite):  {dist_musicvae_vs_tflite:.6f}")

print("\n--- Analysis ---")
if dist_keras_vs_tflite < 1e-5:
    print("✅ The Keras and TFLite models produce nearly identical embeddings. The conversion was successful.")
else:
    print("⚠️ The Keras and TFLite models show a numerical difference. This can be due to quantization or optimizations during conversion.")

if dist_musicvae_vs_keras < 1e-5:
     print("✅ The original MusicVAE and the Keras standalone encoder produce nearly identical embeddings.")
else:
     print(f"ℹ️ The original MusicVAE and Keras models have a notable difference (distance: {dist_musicvae_vs_keras:.4f}). This confirms the discrepancy we observed previously.")