In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import librosa
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader

# Define the Diffusion Transformer model (you can adjust the architecture as needed)
class DiffusionTransformer(nn.Module):
    def __init__(self, dim, nhead, num_layers):
        super(DiffusionTransformer, self).__init__()
        self.transformer = nn.Transformer(d_model=dim, nhead=nhead, num_encoder_layers=num_layers)
        self.fc_out = nn.Linear(dim, dim)  # Output layer for denoised output

    def forward(self, x, t):
        # Example of a forward pass (adjust as needed)
        x = self.transformer(x)
        return self.fc_out(x)

# Define a custom Dataset for loading mel-spectrograms
class MelSpectrogramDataset(Dataset):
    def __init__(self, folder_path, target_length=100):
        self.folder_path = folder_path
        self.target_length = target_length
        self.files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        # Load audio file
        file_name = self.files[idx]
        file_path = os.path.join(self.folder_path, file_name)
        audio, sr = librosa.load(file_path, sr=16000)

        # Extract mel spectrogram
# Corrected part of the code
        mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=80)

        # Pad or truncate the spectrogram to match the target length
        mel_spec = self.pad_or_truncate(mel_spec)

        return torch.tensor(mel_spec, dtype=torch.float32)

    def pad_or_truncate(self, mel_spec):
        """
        Ensure the mel spectrogram is of the same length.
        """
        current_length = mel_spec.shape[1]
        if current_length < self.target_length:
            # Pad with zeros
            pad_width = self.target_length - current_length
            mel_spec = np.pad(mel_spec, ((0, 0), (0, pad_width)), mode='constant')
        elif current_length > self.target_length:
            # Truncate
            mel_spec = mel_spec[:, :self.target_length]

        return mel_spec

# Define the training class
class DiffusionTrainer:
    def __init__(self, model, lr=1e-4):
        self.model = model
        self.optimizer = optim.Adam(model.parameters(), lr=lr)
        self.loss_fn = nn.MSELoss()

    def train_step(self, source, target):
        """
        Perform one training step.
        source: Input mel-spectrograms (Indian accent).
        target: Target mel-spectrograms (American accent).
        """
        self.optimizer.zero_grad()
        noise = torch.randn_like(source)  # Add noise
        noisy_source = source + noise

        # Predict denoised spectrogram
        predicted = self.model(noisy_source, torch.tensor([0.5]))  # Pass a fixed timestep (e.g., t=0.5)

        loss = self.loss_fn(predicted, target)  # Compare with target
        loss.backward()
        self.optimizer.step()
        return loss.item()

# Paths to your Indian and American accent data
indian_folder = r"D:\Speech_Processing\Accentrix\Accentrix\data\cmu_arctic\indian"
american_folder = r"D:\Speech_Processing\Accentrix\Accentrix\data\cmu_arctic\american"

# Set target length for mel spectrograms
target_length = 100  # Adjust this depending on your data

# Initialize datasets
indian_dataset = MelSpectrogramDataset(indian_folder, target_length=target_length)
american_dataset = MelSpectrogramDataset(american_folder, target_length=target_length)

# Initialize data loaders
indian_loader = DataLoader(indian_dataset, batch_size=32, shuffle=True)
american_loader = DataLoader(american_dataset, batch_size=32, shuffle=True)

# Initialize the model
dim = 80  # Mel-spectrogram dimension
model = DiffusionTransformer(dim=dim, nhead=8, num_layers=6)
trainer = DiffusionTrainer(model)

# Training loop
for epoch in range(10):
    epoch_loss = 0
    for indian, american in zip(indian_loader, american_loader):
        loss = trainer.train_step(indian, american)
        epoch_loss += loss

    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(indian_loader):.4f}")


TypeError: melspectrogram() takes 0 positional arguments but 1 positional argument (and 1 keyword-only argument) were given