In [1]:
from tensorflow.keras.models import load_model
import soundfile as sf
import numpy as np
import librosa

# Load the trained generator model
generator = load_model('generator.h5')

def preprocess_audio(file_path, sample_rate=22050, duration=5):
    # Load the audio file
    audio, file_sample_rate = sf.read(file_path)
    
    # Resample if needed
    if file_sample_rate != sample_rate:
        audio = librosa.resample(audio, orig_sr=file_sample_rate, target_sr=sample_rate)
    
    # Pad or truncate audio to fixed length
    audio = pad_or_truncate(audio, int(sample_rate * duration))
    
    # Ensure audio is 2D: (length, 1) if mono, (length, channels) if stereo
    if len(audio.shape) == 1:
        audio = audio[:, np.newaxis]
    
    # Reshape to match the generator's expected input
    audio = audio[np.newaxis, :, :].astype(np.float32)
    
    return audio

def pad_or_truncate(audio, length):
    if len(audio) > length:
        return audio[:length]
    elif len(audio) < length:
        return np.pad(audio, (0, length - len(audio)), 'constant')
    else:
        return audio

# Example: Preprocess your input .wav file
input_file_path = "test.wav"
preprocessed_audio = preprocess_audio(input_file_path)

compressed_audio = generator.predict(preprocessed_audio)

# Example: Save the compressed audio as a .flac file
output_file_path = "test-gen.flac"

# Reshape if needed (flattening to a single channel)
compressed_audio = compressed_audio.squeeze()

# Save using the soundfile library
sf.write(output_file_path, compressed_audio, samplerate=22050)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 580ms/step
