In [None]:
import os
import librosa
import numpy as np
from tqdm import tqdm
import random
from google.colab import drive
import torch
from torch.utils.data import Dataset, DataLoader

# Mount Google Drive
drive.mount('/content/drive')

# Path setup
dataset_path = "/content/drive/MyDrive/recordings"
target_accent = "english"
target_path = os.path.join(dataset_path, target_accent)

# Parameters
sr = 16000
n_fft = 512
hop_length = 256
max_frames = 64

# Load target accent files
target_files = [
    os.path.join(target_path, f)
    for f in os.listdir(target_path)
    if f.endswith('.mp3')
]

if len(target_files) == 0:
    raise RuntimeError("❌ No target files found!")

# Helper to pad/truncate specs
def fix_length(spec, max_len):
    if spec.shape[1] > max_len:
        return spec[:, :max_len]
    elif spec.shape[1] < max_len:
        pad_width = max_len - spec.shape[1]
        return np.pad(spec, ((0, 0), (0, pad_width)), mode='constant')
    else:
        return spec

# Custom Dataset class
class AccentDataset(Dataset):
    def __init__(self, dataset_path, target_files, sr, n_fft, hop_length, max_frames):
        self.samples = []
        self.sr = sr
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.max_frames = max_frames
        self.target_files = target_files

        # Load data into memory
        for accent in os.listdir(dataset_path):
            if accent == target_accent:
                continue
            accent_path = os.path.join(dataset_path, accent)
            if not os.path.isdir(accent_path):
                continue

            for file in os.listdir(accent_path):
                if not file.endswith(".mp3"):
                    continue
                input_file = os.path.join(accent_path, file)
                self.samples.append(input_file)

        if len(self.samples) == 0:
            raise RuntimeError("❌ No valid input files found!")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        input_file = self.samples[idx]
        target_file = random.choice(self.target_files)

        try:
            x_audio, _ = librosa.load(input_file, sr=self.sr)
            y_audio, _ = librosa.load(target_file, sr=self.sr)

            min_len = min(len(x_audio), len(y_audio))
            x_audio = x_audio[:min_len]
            y_audio = y_audio[:min_len]

            # X_spec = np.abs(librosa.stft(x_audio, n_fft=self.n_fft, hop_length=self.hop_length))
            # Y_spec = np.abs(librosa.stft(y_audio, n_fft=self.n_fft, hop_length=self.hop_length))

            n_mels = 80  # You can tune this
            X_spec = librosa.feature.melspectrogram(y=x_audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
            Y_spec = librosa.feature.melspectrogram(y=y_audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)


            X_log = np.log1p(X_spec)
            Y_log = np.log1p(Y_spec)

            X_log = fix_length(X_log, self.max_frames)
            Y_log = fix_length(Y_log, self.max_frames)

            X_tensor = torch.tensor(X_log, dtype=torch.float32).unsqueeze(0)  # [1, freq, time]
            Y_tensor = torch.tensor(Y_log, dtype=torch.float32).unsqueeze(0)

            return X_tensor, Y_tensor

        except Exception as e:
            print(f"⚠️ Skipping {input_file}: {e}")
            return self[random.randint(0, len(self.samples) - 1)]  # try another sample

# Dataset and DataLoader
accent_dataset = AccentDataset(dataset_path, target_files, sr, n_fft, hop_length, max_frames)
data_loader = DataLoader(accent_dataset, batch_size=1, shuffle=True)
print(f"✅ Loaded dataset with {len(accent_dataset)} samples.")

X_list = []
Y_list = []

for X, Y in data_loader:
    X_list.append(X)
    Y_list.append(Y)

# Stack along batch dimension
X_tensor = torch.cat(X_list, dim=0)
Y_tensor = torch.cat(Y_list, dim=0)

print("X_tensor shape:", X_tensor.shape)  # [samples, 1, freq, time]
print("Y_tensor shape:", Y_tensor.shape)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Loaded dataset with 772 samples.
X_tensor shape: torch.Size([772, 1, 80, 64])
Y_tensor shape: torch.Size([772, 1, 80, 64])


In [None]:
import torch.nn as nn

class AccentCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # more filters
            nn.BatchNorm2d(32),                          # added batchnorm
            nn.ReLU(),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.Conv2d(32, 1, kernel_size=3, padding=1)
        )

    def forward(self, x):
        return self.net(x)


In [None]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = AccentCNN().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = nn.L1Loss()


for epoch in range(35):
    model.train()
    optimizer.zero_grad()
    out = model(X_tensor)
    min_freq = min(out.shape[2], Y_tensor.shape[2])
    min_time = min(out.shape[3], Y_tensor.shape[3])
    out = out[:, :, :min_freq, :min_time]
    Y_tensor = Y_tensor[:, :, :min_freq, :min_time]
    loss = loss_fn(out, Y_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")


Epoch 1: Loss = 0.1304
Epoch 2: Loss = 0.2054
Epoch 3: Loss = 0.1423
Epoch 4: Loss = 0.0912
Epoch 5: Loss = 0.0597
Epoch 6: Loss = 0.0858
Epoch 7: Loss = 0.0878
Epoch 8: Loss = 0.0749
Epoch 9: Loss = 0.0573
Epoch 10: Loss = 0.0472
Epoch 11: Loss = 0.0549
Epoch 12: Loss = 0.0609
Epoch 13: Loss = 0.0580
Epoch 14: Loss = 0.0498
Epoch 15: Loss = 0.0403
Epoch 16: Loss = 0.0442
Epoch 17: Loss = 0.0494
Epoch 18: Loss = 0.0499
Epoch 19: Loss = 0.0459
Epoch 20: Loss = 0.0410
Epoch 21: Loss = 0.0372
Epoch 22: Loss = 0.0410
Epoch 23: Loss = 0.0406
Epoch 24: Loss = 0.0366
Epoch 25: Loss = 0.0365
Epoch 26: Loss = 0.0391
Epoch 27: Loss = 0.0391
Epoch 28: Loss = 0.0357
Epoch 29: Loss = 0.0353
Epoch 30: Loss = 0.0374
Epoch 31: Loss = 0.0364
Epoch 32: Loss = 0.0341
Epoch 33: Loss = 0.0347
Epoch 34: Loss = 0.0351
Epoch 35: Loss = 0.0336


In [None]:
from IPython.display import Audio

model.eval()

# Load input accent audio (e.g., Arabic saying the English sentence)
input_path = os.path.join(dataset_path, 'mallu', 'mallu1.mp3')
wave, _ = librosa.load(input_path, sr=sr)

# Compute mel spectrogram
n_mels = 80
mel_spec = librosa.feature.melspectrogram(y=wave, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
log_mel = np.log1p(mel_spec)

# Prepare input for model
test_input = torch.tensor(log_mel, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)

# Predict log-mel of target (English-accented) version
with torch.no_grad():
    pred = model(test_input).squeeze().cpu().numpy()

pred_mel = np.expm1(pred)  # undo log1p
pred_mel = pred_mel / np.max(pred_mel + 1e-8)

original_griffin = librosa.feature.inverse.mel_to_audio(mel_spec, sr=sr, n_fft=n_fft, hop_length=hop_length)
Audio(original_griffin, rate=sr)


# # Invert mel spectrogram to waveform using Griffin-Lim
# y_out = librosa.feature.inverse.mel_to_audio(pred_mel, sr=sr, n_fft=n_fft, hop_length=hop_length)

# print("✅ Morphed audio ready.")
# Audio(y_out, rate=sr)


  wave, _ = librosa.load(input_path, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [None]:
!git add .
!git commit -m "Initial commit of accent morphing project"
!git branch -M main
!git push -u origin main


On branch main
nothing to commit, working tree clean
Enumerating objects: 3, done.
Counting objects: 100% (3/3), done.
Delta compression using up to 2 threads
Compressing objects: 100% (3/3), done.
Writing objects: 100% (3/3), 140.47 KiB | 8.78 MiB/s, done.
Total 3 (delta 0), reused 0 (delta 0), pack-reused 0
To https://github.com/snehastest/accent_morph.git
 * [new branch]      main -> main
Branch 'main' set up to track remote branch 'main' from 'origin'.


In [33]:
!cp /content/drive/MyDrive/Colab Notebooks/accent_morph.ipynb /content/accent_morph/



cp: target '/content/accent_morph/' is not a directory


In [None]:
torch.save(model.state_dict(), "accent_morpher_good.pth")
