In [1]:
import librosa
import numpy as np
from multiprocessing import Manager
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.utils.data import DataLoader, Dataset

from pydub import AudioSegment
import soundfile as sf

import matplotlib.pyplot as plt
import librosa.display
import random
import torchaudio

import matplotlib.pyplot as plt
import torchaudio
import torchaudio.transforms as T
import os
from tqdm import tqdm
import torch.cuda.amp as amp
from tqdm import tqdm, trange
import math

In [2]:
SNIPPET_SECONDS = 2
SR = 22050
HOP_LENGTH = 512
N_MELS = 80
BATCH_SIZE = 8
LATENT_DIM = (1, N_MELS, 512)
TRAIN_STEPS = 5000
OPT_STEPS = 70
LR = 1e-4
WEIGHT_DECAY = 1e-5

In [3]:
manager = Manager()
shared_snippet_log = manager.dict()  # key=(file_id, start), value=True


In [4]:
N_FFT = 1024  # Make sure this matches your librosa calls

class DynamicAudioMelodyDataset(Dataset):
    def __init__(self, wav_paths, shared_log, snippet_duration=SNIPPET_SECONDS, sr=SR, epoch_samples=5000):
        self.sr = sr
        self.snippet_duration = max(snippet_duration, N_FFT / sr)
        self.samples = max(int(self.sr * self.snippet_duration), N_FFT)
        self.data = []
        self.snippet_log = shared_log
        self.epoch_samples = epoch_samples

        for path in tqdm(wav_paths, desc="Preloading audio files"):
            y, _ = librosa.load(path, sr=sr)
            if len(y) < self.samples:
                y = np.pad(y, (0, self.samples - len(y)))
            self.data.append(y)

    def __len__(self):
        # Number of samples per epoch
        return self.epoch_samples

    def __getitem__(self, idx):
        y = random.choice(self.data)
        file_id = id(y)

        # Always get exactly self.samples
        start = random.randint(0, len(y) - self.samples)
        snippet = y[start:start + self.samples]

        # Defensive: should not be needed, but be paranoid
        if len(snippet) < N_FFT:
            snippet = np.pad(snippet, (0, N_FFT - len(snippet)))
        elif len(snippet) > self.samples:
            snippet = snippet[:self.samples]

        self.snippet_log[(file_id, start)] = True

        # Feature extraction (use float32 for PyTorch)
        mel = librosa.feature.melspectrogram(y=snippet, sr=self.sr, n_mels=N_MELS)
        mel_db = librosa.power_to_db(mel).astype(np.float32)
        chroma = librosa.feature.chroma_cqt(y=snippet, sr=self.sr, hop_length=HOP_LENGTH)
        chroma = librosa.decompose.nn_filter(chroma, aggregate=np.median)
        melody = np.argmax(chroma, axis=0).astype(np.int64)

        # Truncate to multiple of 16 for time dim
        T = mel_db.shape[1]
        T_16 = (T // 16) * 16
        mel_db = mel_db[:, :T_16]
        mel_tensor = torch.tensor(mel_db, dtype=torch.float32).unsqueeze(0)
        melody = melody[:T_16]
        melody_tensor = torch.tensor(melody, dtype=torch.long)

        return mel_tensor, melody_tensor


In [5]:
def center_crop(tensor, target_shape):
    # tensor: [B, C, H, W]
    _, _, h, w = tensor.shape
    target_h, target_w = target_shape
    start_h = (h - target_h) // 2
    start_w = (w - target_w) // 2
    return tensor[:, :, start_h:start_h + target_h, start_w:start_w + target_w]


In [6]:
class SinusoidalPosEmb(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, t):
        device = t.device
        half_dim = self.dim // 2
        emb = math.log(10000) / (half_dim - 1)
        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
        emb = t[:, None] * emb[None, :]
        emb = torch.cat([emb.sin(), emb.cos()], dim=-1)
        return emb  # [B, dim]
    

class StrongUNet(nn.Module):
    def __init__(self, in_channels=1, base_channels=64, time_emb_dim=256):
        super().__init__()

        self.enc1 = self.conv_block(in_channels, base_channels)
        self.enc2 = self.conv_block(base_channels, base_channels * 2)
        self.enc3 = self.conv_block(base_channels * 2, base_channels * 4)
        self.enc4 = self.conv_block(base_channels * 4, base_channels * 8)

        self.middle = self.conv_block(base_channels * 8, base_channels * 16)

        self.dec4 = self.up_block(base_channels * 16, base_channels * 8)
        self.dec3 = self.up_block(base_channels * 8 * 2, base_channels * 4)
        self.dec2 = self.up_block(base_channels * 4 * 2, base_channels * 2)
        self.dec1 = self.up_block(base_channels * 2 * 2, base_channels)

        self.out = nn.Conv2d(base_channels * 2, in_channels, kernel_size=1)

        # --- Time embedding modules ---
        self.time_mlp = nn.Sequential(
            SinusoidalPosEmb(time_emb_dim),
            nn.Linear(time_emb_dim, time_emb_dim * 4),
            nn.ReLU(),
            nn.Linear(time_emb_dim * 4, base_channels * 16),
        )

        # Project t_emb to each decoder block's channel size
        self.tproj_dec4 = nn.Linear(base_channels * 16, base_channels * 8)
        self.tproj_dec3 = nn.Linear(base_channels * 16, base_channels * 4)
        self.tproj_dec2 = nn.Linear(base_channels * 16, base_channels * 2)
        self.tproj_dec1 = nn.Linear(base_channels * 16, base_channels)

    def conv_block(self, in_ch, out_ch):
        return nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
        )

    def up_block(self, in_ch, out_ch):
        return nn.Sequential(
            nn.ConvTranspose2d(in_ch, out_ch, 2, stride=2),
            nn.ReLU(inplace=True)
        )

    def forward(self, x, t):
        # --- Embed timestep ---
        t_emb = self.time_mlp(t.float())  # [B, base_channels*16]

        e1 = self.enc1(x)
        e2 = self.enc2(F.max_pool2d(e1, 2))
        e3 = self.enc3(F.max_pool2d(e2, 2))
        e4 = self.enc4(F.max_pool2d(e3, 2))

        m = self.middle(F.max_pool2d(e4, 2))  # [B, base_channels*16, ...]

        # --- Add t embedding to the bottleneck and decoders ---
        # m: [B, C, H, W]
        m = m + t_emb[:, :, None, None]  # broadcast [B, C, 1, 1]

        d4 = self.dec4(m)
        d4 = d4 + self.tproj_dec4(t_emb)[:, :, None, None]  # add t embedding
        e4_c = center_crop(e4, d4.shape[2:])
        d4 = torch.cat([d4, e4_c], dim=1)

        d3 = self.dec3(d4)
        d3 = d3 + self.tproj_dec3(t_emb)[:, :, None, None]
        e3_c = center_crop(e3, d3.shape[2:])
        d3 = torch.cat([d3, e3_c], dim=1)

        d2 = self.dec2(d3)
        d2 = d2 + self.tproj_dec2(t_emb)[:, :, None, None]
        e2_c = center_crop(e2, d2.shape[2:])
        d2 = torch.cat([d2, e2_c], dim=1)

        d1 = self.dec1(d2)
        d1 = d1 + self.tproj_dec1(t_emb)[:, :, None, None]
        e1_c = center_crop(e1, d1.shape[2:])
        d1 = torch.cat([d1, e1_c], dim=1)

        return self.out(d1)


In [7]:
# Typical linear beta schedule for DDPM
def make_beta_schedule(T, beta_start=1e-4, beta_end=0.02):
    return torch.linspace(beta_start, beta_end, T)

T = 1000  # number of diffusion steps
betas = make_beta_schedule(T)  # [T]
alphas = 1. - betas
alphas_cumprod = torch.cumprod(alphas, dim=0)  # [T]

def train_ddpm(model, dataloader, shared_log, device):
    optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1500, gamma=0.5)
    loss_fn = nn.MSELoss()
    model.train()

    T = 1000
    betas = make_beta_schedule(T).to(device)
    alphas = 1. - betas
    alphas_cumprod = torch.cumprod(alphas, dim=0)

    pbar = tqdm(enumerate(dataloader), total=TRAIN_STEPS, desc="Training DDPM")
    for step, (mel, _) in pbar:
        if step >= TRAIN_STEPS:
            break

        mel = mel.to(device)  # shape: [B, ...]
        B = mel.size(0)

        # Sample random timesteps for each item in batch
        t = torch.randint(0, T, (B,), device=device).long()  # [B]

        # Get corresponding alphas for each t
        alphas_cumprod_t = alphas_cumprod[t].view(B, 1, 1, 1)  # Adjust dimensions as needed for mel shape

        # Sample noise
        noise = torch.randn_like(mel)
        x_t = torch.sqrt(alphas_cumprod_t) * mel + torch.sqrt(1 - alphas_cumprod_t) * noise

        # Predict noise (most common DDPM target)
        pred_noise = model(x_t, t)

        loss = loss_fn(pred_noise, noise)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        if step % 10 == 0:
            pbar.set_postfix({
                "loss": f"{loss.item():.4f}",
                "unique": len(shared_log)
            })



In [8]:
# --- WAV loading ---
wav_dir = "data3"
wav_files = [os.path.join(wav_dir, f) for f in os.listdir(wav_dir) if f.endswith(".wav")]

# --- Shared log for snippet tracking ---
manager = Manager()
shared_snippet_log = manager.dict()

# --- Dataset and DataLoader ---
dataset = DynamicAudioMelodyDataset(wav_files, shared_log=shared_snippet_log)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)

# --- Import your StrongUNet (with time conditioning) ---
# from your_model_file import StrongUNet

model = StrongUNet().cuda()  # <-- use the *patched* time-aware version

# --- Train ---
train_ddpm(model, dataloader, shared_log=shared_snippet_log, device=torch.device("cuda"))

# --- Save ---
os.makedirs("checkpoints2", exist_ok=True)
torch.save(model.state_dict(), "checkpoints2/trained_ddpm_melody.pt")
print("Training complete. Model saved.")


Preloading audio files:   0%|          | 0/4 [00:00<?, ?it/s]

Preloading audio files: 100%|██████████| 4/4 [00:01<00:00,  2.65it/s]
Training DDPM:   0%|          | 0/5000 [00:01<?, ?it/s]


KeyboardInterrupt: 

# Vocoder

In [None]:
class HiFiGANGenerator(nn.Module):
    def __init__(self):
        super().__init__()
        self.mel_conv = nn.Conv1d(80, 512, 7, padding=3)
        self.upsample = nn.Sequential(
            nn.ConvTranspose1d(512, 256, 16, stride=8, padding=4),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose1d(256, 128, 16, stride=8, padding=4),
            nn.LeakyReLU(0.2),
            nn.Conv1d(128, 1, 7, padding=3)
        )

    def forward(self, mel):
        x = self.mel_conv(mel)
        return self.upsample(x)

In [None]:
class MelAudioDataset(Dataset):
    def __init__(self, wav_paths, segment_duration=2.0, sr=22050):
        self.sr = sr
        self.segment_samples = int(segment_duration * sr)
        self.wav_paths = wav_paths

    def __len__(self):
        return len(self.wav_paths) * 20

    def __getitem__(self, idx):
        path = random.choice(self.wav_paths)
        audio, _ = librosa.load(path, sr=self.sr)
        if len(audio) < self.segment_samples:
            audio = np.pad(audio, (0, self.segment_samples - len(audio)))
        start = random.randint(0, len(audio) - self.segment_samples)
        segment = audio[start:start + self.segment_samples]
        mel = librosa.feature.melspectrogram(y=segment, sr=self.sr, n_fft=1024, hop_length=256, n_mels=80)
        mel = librosa.power_to_db(mel).astype(np.float32)
        mel_tensor = torch.tensor(mel)
        audio_tensor = torch.tensor(segment).unsqueeze(0)  # [1, T]
        return mel_tensor, audio_tensor

In [None]:
def train_hifigan(generator, dataloader, epochs=20, lr=2e-4, step_size=5, gamma=0.5):
    gen_opt = torch.optim.Adam(generator.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(gen_opt, step_size=step_size, gamma=gamma)
    loss_fn = nn.MSELoss()
    generator.train()

    for epoch in range(epochs):
        pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}")
        running_loss = 0.0

        for step, (mel, audio) in enumerate(pbar):
            mel = mel.to("cuda")           # [B, 80, T]
            audio = audio.to("cuda")       # [B, 1, samples]
            gen_out = generator(mel)       # [B, 1, samples]

            min_len = min(gen_out.shape[-1], audio.shape[-1])
            loss = loss_fn(gen_out[..., :min_len], audio[..., :min_len])

            gen_opt.zero_grad()
            loss.backward()
            gen_opt.step()

            running_loss += loss.item()
            avg_loss = running_loss / (step + 1)
            pbar.set_postfix({"avg_loss": f"{avg_loss:.4f}", "lr": gen_opt.param_groups[0]['lr']})

        scheduler.step()

    return generator



In [None]:
def plot_concat_mel(original_audio, remix_audio, sr=22050):
    concat_audio = np.concatenate([original_audio, remix_audio])
    S_concat = librosa.feature.melspectrogram(y=concat_audio, sr=sr, n_fft=2048, hop_length=512, n_mels=80)
    S_db_concat = librosa.power_to_db(S_concat, ref=np.max)
    plt.figure(figsize=(12, 4))
    img = librosa.display.specshow(S_db_concat, sr=sr, hop_length=512, y_axis='mel', x_axis='time', cmap='magma')
    plt.title("Concatenated (Original + Remix) Mel-Spectrogram")
    plt.colorbar(img, format='%+2.0f dB')
    plt.tight_layout()
    plt.show()


In [None]:
def griffin_lim_vocoder(mel_spec, sr=22050, n_iter=32):
    mel = mel_spec.squeeze(0).detach().cpu().numpy()
    db = librosa.db_to_power(mel)
    audio = librosa.feature.inverse.mel_to_audio(db, sr=sr, n_fft=2048, hop_length=512, n_iter=n_iter)
    return audio

def extract_pitch_classes(audio, sr=22050, hop_length=512):
    chroma = librosa.feature.chroma_cqt(y=audio, sr=sr, hop_length=hop_length)
    chroma = librosa.decompose.nn_filter(chroma, aggregate=np.median)
    labels = np.argmax(chroma, axis=0)
    return torch.tensor(labels, dtype=torch.long)

def ancestral_sample(model, x_T, steps=50):
    x = x_T.clone()
    if x.dim() == 3:
        x = x.unsqueeze(0)  # Ensure [1, 80, T]
    for _ in trange(steps, desc="Sampling"):
        noise = model(x)
        x = x - 0.1 * noise
    return x


def ditto_optimize_latent(model, vocoder, y_target, target_len_samples, sr=22050, steps=70, latent_shape=(1, 80, 512)):
    x_T = torch.randn(latent_shape, device='cuda', requires_grad=True)
    optimizer = torch.optim.Adam([x_T], lr=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.7)  # decay every 30 steps

    y_target = y_target.to(x_T.device)
    project = torch.nn.Linear(latent_shape[1], 12).to(x_T.device)  # Only ONCE before the loop

    for step in range(steps):
        x_0 = ancestral_sample(model, x_T, steps=10)  # [1, 80, T] or [1, 1, 80, T]
        chroma_like = torch.nn.functional.normalize(x_0, dim=1)
        if chroma_like.dim() == 4:
            logits = chroma_like.squeeze(0).squeeze(0)   # [80, T]
        elif chroma_like.dim() == 3:
            logits = chroma_like.squeeze(0)              # [80, T]
        else:
            logits = chroma_like
        logits = logits.permute(1, 0)  # [T, 80], always safe
        y_hat = project(logits)  # [T, 12]

        min_len = min(len(y_target), y_hat.shape[0])
        loss = torch.nn.functional.cross_entropy(y_hat[:min_len], y_target[:min_len])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        torch.cuda.empty_cache()  # optional, but helps avoid CUDA memory issues

    x_final = ancestral_sample(model, x_T.detach(), steps=30)  # [1, 80, T]
    # x_final: [1, 80, T] or [1, 1, 80, T]
    if x_final.dim() == 4:
        mel_final = x_final.squeeze(0).squeeze(0)  # [80, T]
    elif x_final.dim() == 3:
        mel_final = x_final.squeeze(0)             # [80, T]
    else:
        mel_final = x_final

    mel_final = mel_final.unsqueeze(0)  # [1, 80, T]
    audio = vocoder(mel_final).squeeze().detach().cpu().numpy()  # [T]


    # 🔧 Adjust output length to match original
    if len(audio) > target_len_samples:
        audio = audio[:target_len_samples]
    elif len(audio) < target_len_samples:
        audio = np.pad(audio, (0, target_len_samples - len(audio)))

    return audio

In [None]:
def extract_random_snippet(wav_path, duration_sec=5, sr=22050):
    y, _ = librosa.load(wav_path, sr=sr)
    total_samples = len(y)
    snippet_len = sr * duration_sec
    if total_samples < snippet_len:
        y = np.pad(y, (0, snippet_len - total_samples))
    start = random.randint(0, max(1, total_samples - snippet_len))
    return y[start:start + snippet_len]

In [None]:
wav_dir = "data3"
wav_files = [os.path.join(wav_dir, f) for f in os.listdir(wav_dir) if f.endswith(".wav")]

dataset = MelAudioDataset(wav_files)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=16)

generator = HiFiGANGenerator().cuda()
trained_gen = train_hifigan(generator, dataloader, epochs=20)

torch.save(trained_gen.state_dict(), "checkpoints2/trained_hifigan.pt")

Epoch 1: 100%|██████████| 10/10 [00:01<00:00,  5.82it/s, avg_loss=0.2127, lr=0.0002]
Epoch 2: 100%|██████████| 10/10 [00:01<00:00,  6.03it/s, avg_loss=0.0910, lr=0.0002]
Epoch 3: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s, avg_loss=0.0875, lr=0.0002]
Epoch 4: 100%|██████████| 10/10 [00:01<00:00,  5.82it/s, avg_loss=0.0885, lr=0.0002]
Epoch 5: 100%|██████████| 10/10 [00:01<00:00,  6.16it/s, avg_loss=0.0824, lr=0.0002]
Epoch 6: 100%|██████████| 10/10 [00:01<00:00,  5.88it/s, avg_loss=0.0833, lr=0.0001]
Epoch 7: 100%|██████████| 10/10 [00:01<00:00,  5.75it/s, avg_loss=0.0823, lr=0.0001]
Epoch 8: 100%|██████████| 10/10 [00:01<00:00,  5.56it/s, avg_loss=0.0795, lr=0.0001]
Epoch 9: 100%|██████████| 10/10 [00:01<00:00,  6.03it/s, avg_loss=0.0813, lr=0.0001]
Epoch 10: 100%|██████████| 10/10 [00:01<00:00,  5.67it/s, avg_loss=0.0799, lr=0.0001]
Epoch 11: 100%|██████████| 10/10 [00:01<00:00,  6.11it/s, avg_loss=0.0908, lr=5e-5]
Epoch 12: 100%|██████████| 10/10 [00:01<00:00,  6.08it/s, avg_los

In [None]:
combined_audio = []

for i in range(2):
    path = random.choice(wav_files)
    snippet = extract_random_snippet(path, duration_sec=2, sr=SR)

    y_target = extract_pitch_classes(snippet, sr=SR)
    # ditto_optimize_latent ensures remix_audio has len(snippet)
    remix_audio = ditto_optimize_latent(model, generator, y_target, len(snippet), sr=SR, steps=50)

    # Visualize concatenated mel
    plot_concat_mel(snippet, remix_audio, sr=SR)

    combined_audio.append(remix_audio)

# Concatenate and save the final audio
final_audio = np.concatenate(combined_audio)
sf.write("remixed_combined.wav", final_audio, SR)
print("Saved connected remix to remixed_combined.wav")


Sampling: 100%|██████████| 10/10 [00:00<00:00, 617.15it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 391.03it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 393.73it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 389.79it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 402.86it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 405.40it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 402.64it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 387.06it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 393.92it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 390.73it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 390.22it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 394.85it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 395.68it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 404.31it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 403.76it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 409.95it/s]
Sampling: 100%|██████████| 10/10 [00:00<00:00, 398.80it/

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.62 GiB of which 13.06 MiB is free. Process 4015399 has 52.00 MiB memory in use. Process 4015398 has 52.00 MiB memory in use. Process 4015428 has 52.00 MiB memory in use. Process 4015364 has 52.00 MiB memory in use. Process 4015464 has 52.00 MiB memory in use. Process 4015456 has 52.00 MiB memory in use. Process 4015383 has 52.00 MiB memory in use. Process 4015405 has 52.00 MiB memory in use. Process 4015403 has 52.00 MiB memory in use. Process 4015440 has 52.00 MiB memory in use. Process 4015373 has 52.00 MiB memory in use. Process 4015259 has 52.00 MiB memory in use. Process 4015475 has 52.00 MiB memory in use. Process 4015202 has 52.00 MiB memory in use. Process 4015471 has 52.00 MiB memory in use. Process 4015427 has 52.00 MiB memory in use. Process 4015378 has 52.00 MiB memory in use. Process 4015451 has 52.00 MiB memory in use. Process 4015460 has 52.00 MiB memory in use. Process 4015248 has 52.00 MiB memory in use. Process 4015457 has 52.00 MiB memory in use. Process 4015363 has 52.00 MiB memory in use. Process 4015397 has 52.00 MiB memory in use. Process 4015458 has 52.00 MiB memory in use. Process 4015472 has 52.00 MiB memory in use. Process 4015241 has 52.00 MiB memory in use. Process 4015237 has 52.00 MiB memory in use. Process 4015454 has 52.00 MiB memory in use. Process 4015366 has 52.00 MiB memory in use. Process 4015453 has 52.00 MiB memory in use. Process 4015465 has 52.00 MiB memory in use. Process 4015367 has 52.00 MiB memory in use. Process 4015244 has 52.00 MiB memory in use. Process 4015400 has 52.00 MiB memory in use. Process 4015239 has 52.00 MiB memory in use. Process 4015402 has 52.00 MiB memory in use. Process 4015371 has 52.00 MiB memory in use. Process 4015242 has 52.00 MiB memory in use. Process 4015201 has 52.00 MiB memory in use. Process 4015474 has 52.00 MiB memory in use. Process 4015265 has 52.00 MiB memory in use. Process 4015455 has 52.00 MiB memory in use. Process 4015382 has 52.00 MiB memory in use. Process 4015525 has 52.00 MiB memory in use. Process 4015384 has 52.00 MiB memory in use. Process 4015249 has 52.00 MiB memory in use. Process 4015240 has 52.00 MiB memory in use. Process 4015243 has 52.00 MiB memory in use. Process 4015432 has 52.00 MiB memory in use. Process 4015395 has 52.00 MiB memory in use. Process 4015329 has 52.00 MiB memory in use. Process 4015365 has 52.00 MiB memory in use. Process 4015433 has 52.00 MiB memory in use. Process 4015404 has 52.00 MiB memory in use. Process 4015206 has 52.00 MiB memory in use. Process 4015266 has 52.00 MiB memory in use. Process 4015396 has 52.00 MiB memory in use. Process 4015469 has 52.00 MiB memory in use. Process 4015459 has 52.00 MiB memory in use. Process 4015444 has 52.00 MiB memory in use. Process 4015256 has 52.00 MiB memory in use. Process 4015359 has 52.00 MiB memory in use. Process 4015213 has 52.00 MiB memory in use. Process 2307265 has 1.65 GiB memory in use. Process 3375915 has 924.00 MiB memory in use. Process 3528805 has 738.00 MiB memory in use. Including non-PyTorch memory, this process has 7.78 GiB memory in use. Of the allocated memory 7.67 GiB is allocated by PyTorch, and 21.96 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)