In [9]:
import torch
import torch.nn as nn
import numpy as np
import librosa
import soundfile as sf
import os

# ✅ Define the SAME model as in training
class VoiceConversionModel(nn.Module):
    def __init__(self, input_dim=13, hidden_dim=128):
        super(VoiceConversionModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        x, _ = self.lstm(x)
        return self.fc(x)

# ✅ Load trained model
model_path = "D:\\projects\\voice_conversion\\models\\voice_conversion.pth"
model = VoiceConversionModel()
model.load_state_dict(torch.load(model_path))
model.eval()

print("✅ Model loaded successfully!")

# ✅ Load input features
test_file = "D:\\projects\\voice_conversion\\data\\features\\LJ001-0004_features.npy"

if not os.path.exists(test_file):
    raise FileNotFoundError(f"Test file not found: {test_file}")

test_features = np.load(test_file)
print("Loaded Test Features Shape:", test_features.shape)  # Debugging

# ✅ Fix Dimension Issue (Ensure it has 13 features per frame)
if test_features.shape[-1] != 13:
    print("⚠️ Feature mismatch detected! Fixing dimensions...")
    test_features = test_features[:, :13]  # Take only the first 13 features

test_features = torch.tensor(test_features, dtype=torch.float32).unsqueeze(0)

print("✅ Fixed Test Features Shape:", test_features.shape)

# ✅ Run the model
with torch.no_grad():
    converted_features = model(test_features)

converted_features = converted_features.squeeze(0).numpy()

print("✅ Voice conversion completed!")

# ✅ Convert back to audio
mel_spectrogram = librosa.feature.inverse.mel_to_stft(converted_features.T)
waveform = librosa.griffinlim(mel_spectrogram)

# ✅ Save output
output_wav_path = "D:\\projects\\voice_conversion\\results\\LJ001-0004_converted_audio.wav"
sf.write(output_wav_path, waveform, samplerate=22050)

print(f"✅ Converted voice saved at: {output_wav_path}")


  model.load_state_dict(torch.load(model_path))


✅ Model loaded successfully!
Loaded Test Features Shape: (13, 222)
⚠️ Feature mismatch detected! Fixing dimensions...
✅ Fixed Test Features Shape: torch.Size([1, 13, 13])
✅ Voice conversion completed!
✅ Converted voice saved at: D:\projects\voice_conversion\results\LJ001-0004_converted_audio.wav
