In [None]:
import numpy as np
import librosa
import os
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from scipy.io.wavfile import write
import cv2

: 

In [2]:
def audio_to_fixed_size_mel_spectrogram(audio_path, target_shape=(256, 256)):
    # Load the audio file
    y, sr = librosa.load(audio_path, sr=None)
    # Generate the Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    # Convert to decibel scale
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
    # Resize to the target shape (256x256)
    mel_spectrogram_resized = cv2.resize(mel_spectrogram_db, target_shape)
    return mel_spectrogram_resized, sr


In [3]:
source_path = r"E:\music\f_g1.mp3"
destination_folder = r"E:\ideas\output_audio"
def mel_spectrogram_to_audio(mel_spectrogram, sr):
    # Resize back to (128, Time)
    mel_spectrogram_resized = cv2.resize(mel_spectrogram, (mel_spectrogram.shape[1], 128))
    # Convert from decibels to power
    mel_spectrogram_power = librosa.db_to_power(mel_spectrogram_resized)
    # Invert the Mel spectrogram to waveform
    audio = librosa.feature.inverse.mel_to_audio(mel_spectrogram_power, sr=sr)
    return audio


In [4]:
def display_spectrogram(spectrogram, sr):
    # If spectrogram has 3 channels (RGB), reduce it to a single channel for visualization
    if len(spectrogram.shape) == 3 and spectrogram.shape[-1] == 3:
        spectrogram = np.mean(spectrogram, axis=-1)  # Take the mean across channels
    # Normalize the spectrogram to 0..1 for visualization
    spectrogram = (spectrogram - np.min(spectrogram)) / (np.max(spectrogram) - np.min(spectrogram))
    # Plot the spectrogram
    if not destination_folder.endswith(os.path.sep):
        destination_folder += os.path.sep
    destination_path = os.path.join(destination_folder, os.path.basename(source_path))
    os.rename(source_path, destination_path)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='mel', cmap='viridis')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')
    plt.tight_layout()
    plt.show()


In [5]:
# Load the trained CycleGAN model
def load_model(model_path):
    try:
        # Load the model
        model = tf.keras.models.load_model(model_path, compile=False)
        print(f"Model loaded successfully from: {model_path}")
        return model
    except Exception as e:
        print(f"Error loading model from {model_path}: {e}")
        raise


In [None]:
def process_audio_file(audio_path, model_path, output_path):
    # Step 1: Convert audio to fixed-size Mel spectrogram
    mel_spectrogram, sr = audio_to_fixed_size_mel_spectrogram(audio_path)
    print("Mel Spectrogram Shape:", mel_spectrogram.shape)
    display_spectrogram(mel_spectrogram, sr)
    
    # Step 2: Normalize the spectrogram for the model
    mel_spectrogram_input = mel_spectrogram / np.max(np.abs(mel_spectrogram))  # Normalize
    mel_spectrogram_input = np.stack([mel_spectrogram_input] * 3, axis=-1)  # Add 3 channels
    mel_spectrogram_input = np.expand_dims(mel_spectrogram_input, axis=0)  # Add batch dimension

    # Step 3: Load the trained CycleGAN model
    model = load_model(model_path)
    print("Model Input Shape:", model.input_shape)

    # Step 4: Generate the converted spectrogram
    generated_spectrogram = model.predict(mel_spectrogram_input)
    generated_spectrogram = np.squeeze(generated_spectrogram)  # Remove batch and channel dimensions
    print("Generated Mel Spectrogram Shape:", generated_spectrogram.shape)

    # Step 5: Convert the generated spectrogram back to audio
    generated_audio = mel_spectrogram_to_audio(generated_spectrogram, sr)

    # Step 6: Save the generated audio
    # write(output_path, sr, (generated_audio * 32767).astype(np.int16))  # Convert to 16-bit PCM format
    print(f"Generated audio saved to {output_path}")

# Example usage
audio_path = r"E:\ideas\music2 - Copy_org\input_audio\p1"  # Path to the input audio file
model_path = r"E:\ideas\music2 - Copy_org\models\final_generator_guitar_to_piano.h5"  # Path to the trained CycleGAN model
output_path = r"E:\ideas\music2 - copy_org\output_audio"  # Path to save the generated audio

process_audio_file(audio_path, model_path, output_path)