In [1]:
import os
import sys
import warnings
import subprocess
from subprocess import call
from functools import partial

import librosa

from tqdm import tqdm

import numpy as np

import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch.nn.functional import pad

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [5]:
path_folder_in = '/mnt/d/musdb18/test'
path_folder_out = '/mnt/d/createdmusdb18/test'

flag = False


files = os.listdir(path_folder_in)
for file in tqdm(files, position=0, leave=True):
  # tqdm.write(f"current song : {file}")
  if flag == False and file[:5] == "Nerve":
    flag = True
  elif flag == False:
    continue
  file_in = path_folder_in + '/' + file
  for i in range(5):
    filesp = file.split(".")
    filesp[-1] = f"{i}.mp3"
    filesp = ".".join(filesp)
    file_out = f"{path_folder_out}/{filesp}"
    # tqdm.write(f"processing {file_out}", end='\r')
    call(('ffmpeg', '-y', '-i', file_in, '-map', f'0:{i}', '-vn', file_out),
         stdout=subprocess.DEVNULL,
         stderr=subprocess.STDOUT)

100%|██████████| 50/50 [14:07<00:00, 16.96s/it]


In [3]:
class MUSDBDataset(Dataset):
    def __init__(self, data_dir: str):
        self.crop_size = 284672
        self.data_dir = os.path.join(data_dir, 'data_numpy')
        if not os.path.exists(self.data_dir) or \
            len([name for name in os.listdir(self.data_dir)]) < 500:
            print("Data has not been saved as numpy object. Converting...")
            if not os.path.exists(self.data_dir):
                os.makedirs(self.data_dir)
            self.convert_to_numpy(data_dir, self.data_dir)
        self.music_fulllist = self.get_filenames(self.data_dir)
        self.music_list, self.sep_list = self.separate_source(self.music_fulllist)

    def __len__(self):
        return len(self.music_list)

    def __getitem__(self, idx):
        base_music = self.music_list[idx]
        base_music = np.load(base_music)
        base_music = np.stack([base_music[:self.crop_size]])

        sep_music = self.sep_list[idx*4 : idx*4+4]
        sep_music = np.stack([np.load(idx)[:self.crop_size] for idx in sep_music])
        return base_music, sep_music

    def get_filenames(self, path):
        files_list = list()
        for filename in os.listdir(path):
            if not filename == "data_numpy":
                files_list.append(os.path.join(path, filename))
        return files_list

    def convert_to_numpy(self, music_dir, target_dir):
        warnings.filterwarnings('ignore')
        music_list = self.get_filenames(music_dir)
        for music in tqdm(music_list):
            outfile_name = music.split("/")[-1]
            outfile_name = target_dir + "/" + outfile_name
            arr, _ = librosa.load(music)
            np.save(outfile_name, arr)

    def separate_source(self, mus_list):
        warnings.filterwarnings('ignore')
        music_list = list()
        sep_list = list()
        for music in tqdm(mus_list):
            mus_type = music.split(".")[-3]
            if mus_type == '0':
                music_list.append(music)
            else:
                sep_list.append(music)

        return music_list, sep_list

In [50]:
train_ds = MUSDBDataset('/mnt/d/createdmusdb18/train')
# test_ds = MUSDBDataset('/mnt/d/musdb18/test')

100%|██████████| 500/500 [00:00<00:00, 1192919.23it/s]


Data has not been saved as numpy object. Converting...


  0%|          | 0/50 [00:05<?, ?it/s]


KeyboardInterrupt: 

In [56]:
train_dataloader = DataLoader(train_ds, batch_size=1)
# test_dataloader = DataLoader(test_ds, batch_size=4)

In [57]:

class DownSampling(nn.Module):
    def __init__(self, in_ch=1, out_ch=24, kernel_size=15):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_ch, out_ch, kernel_size=kernel_size, padding=7),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(out_ch, out_ch, kernel_size=kernel_size, padding=7),
            nn.LeakyReLU(inplace=True),
        )
        
    def forward(self, x: Tensor):
        x = self.net(x)
        return x

In [58]:
class UpSampling(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size):
        super().__init__()
        self.upsample = nn.Upsample(scale_factor=2, mode="linear", align_corners=True)
        self.conv = nn.Sequential(
            nn.Conv1d(in_ch, out_ch, kernel_size=kernel_size, padding=2),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(out_ch, out_ch, kernel_size=kernel_size, padding=2),
            nn.LeakyReLU(inplace=True),
        )
        
    def forward(self, x, x_back):
        x = self.upsample(x);
        diff = x_back.shape[-1] - x.shape[-1]
        x = pad(x, (0, diff))
        x = torch.cat([x, x_back], axis=1)
        return self.conv(x)

In [59]:
class WaveUNet(nn.Module):
    def __init__(self, n_level=12, n_source=4):
        super().__init__()
        self.level = n_level
        
        layers=[DownSampling(in_ch=1,out_ch=24,kernel_size=15)]
        
        for i in range(self.level-1):
            layers.append(DownSampling(in_ch=24*(i+1),out_ch=24*(i+2),kernel_size=15))
            
        # layers.append(DownSampling(in_ch=24*(self.level), out_ch=24*(self.level+1), kernel_size=15, decimate=False))
        layers.append(DownSampling(in_ch=24*(self.level), out_ch=24*(self.level+1), kernel_size=15))
            
        for i in range(self.level):
            layers.append(UpSampling(in_ch=24*(self.level+1-i) + 24*(self.level - i), out_ch=24*(self.level-i), kernel_size=5))
            
        self.net = nn.ModuleList(layers)
        self.separation = nn.Sequential(
            nn.Conv1d(25, n_source, kernel_size=1),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(n_source, n_source, kernel_size=1),
            nn.LeakyReLU(inplace=True),
        )
    
    def forward(self, x: Tensor):
        layer_to_concat = []
        #print("before in ", x.shape)
        layer_to_concat.append(x)
        for layer in self.net[0: self.level]:
            x = layer(x)
            #print("conv ", x.shape)
            layer_to_concat.append(x)
            x = x[:, :, 1::2]
            #print("decimate ", x.shape)
        x = self.net[self.level](x)
        #print("middle out ", x.shape)
        layer_to_concat.append(x)
        for i, layer in enumerate(self.net[self.level+1:]):
            #print("before up ", x.shape)
            x = layer_to_concat[-1]
            x = layer(x, layer_to_concat[-1-i-1])
            #print("after up ", x.shape)
            layer_to_concat[-1] = x
            
        x = torch.cat([layer_to_concat[0], x], axis=1)
        x = self.separation(x)

        return x

In [60]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [61]:
model = WaveUNet().to(device)
print(model)

WaveUNet(
  (net): ModuleList(
    (0): DownSampling(
      (net): Sequential(
        (0): Conv1d(1, 24, kernel_size=(15,), stride=(1,), padding=(7,))
        (1): LeakyReLU(negative_slope=0.01, inplace=True)
        (2): Conv1d(24, 24, kernel_size=(15,), stride=(1,), padding=(7,))
        (3): LeakyReLU(negative_slope=0.01, inplace=True)
      )
    )
    (1): DownSampling(
      (net): Sequential(
        (0): Conv1d(24, 48, kernel_size=(15,), stride=(1,), padding=(7,))
        (1): LeakyReLU(negative_slope=0.01, inplace=True)
        (2): Conv1d(48, 48, kernel_size=(15,), stride=(1,), padding=(7,))
        (3): LeakyReLU(negative_slope=0.01, inplace=True)
      )
    )
    (2): DownSampling(
      (net): Sequential(
        (0): Conv1d(48, 72, kernel_size=(15,), stride=(1,), padding=(7,))
        (1): LeakyReLU(negative_slope=0.01, inplace=True)
        (2): Conv1d(72, 72, kernel_size=(15,), stride=(1,), padding=(7,))
        (3): LeakyReLU(negative_slope=0.01, inplace=True)
      

In [62]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=[0.9, 0.999])

In [63]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        

        print(f"loss : {loss.item()} ({batch*len(X)}/{size})")

        del pred
        del loss
        del X, y

In [64]:
epochs = 10
for t in range(epochs):
    print(f"epoch : {t+1}\n---------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    torch.cuda.empty_cache()

RuntimeError: CUDA error: unknown error
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
splittest = torch.Tensor(train_ds[0][0])

In [None]:
model.eval()
with torch.no_grad():
    pred = model(splittest)

In [None]:
import pydub 

def write(f, sr, x, normalized=False):
    """numpy array to MP3"""
    channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
    if normalized:  # normalized array - each item should be a float in [-1, 1)
        y = np.int16(x * 2 ** 15)
    else:
        y = np.int16(x)
    song = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
    song.export(f, format="mp3", bitrate="320k")

write('/content/gdrive/MyDrive/test.mp3', 22050, np.array(splittest[0][0].cpu()), normalized=True)