In [1]:
import os
import warnings

import librosa

from tqdm import tqdm

import numpy as np

from torch.utils.data import Dataset

class MUSDBDataset(Dataset):
    def __init__(self, data_dir: str):
        self.crop_size = 284672
        self.data_dir = os.path.join(data_dir, 'data_numpy')
        if not os.path.exists(self.data_dir) or \
            len([name for name in os.listdir(self.data_dir)]) < 500:
            print("Data has not been saved as numpy object. Converting...")
            if not os.path.exists(self.data_dir):
                os.makedirs(self.data_dir)
            self.convert_to_numpy(data_dir, self.data_dir)
        self.music_fulllist = self.get_filenames(self.data_dir)
        self.music_list, self.sep_list = self.separate_source(self.music_fulllist)

    def __len__(self):
        return len(self.music_list)

    def __getitem__(self, idx):
        base_music = self.music_list[idx]
        base_music = np.load(base_music)
        base_music = np.stack([base_music[:self.crop_size]])

        sep_music = self.sep_list[idx*4 : idx*4+4]
        sep_music = np.stack([np.load(idx)[:self.crop_size] for idx in sep_music])
        return base_music, sep_music

    def get_filenames(self, path):
        files_list = list()
        for filename in os.listdir(path):
            if not filename == "data_numpy":
                files_list.append(os.path.join(path, filename))
        return files_list

    def convert_to_numpy(self, music_dir, target_dir):
        warnings.filterwarnings('ignore')
        music_list = self.get_filenames(music_dir)
        for music in tqdm(music_list):
            outfile_name = music.split("/")[-1]
            outfile_name = target_dir + "/" + outfile_name
            arr, _ = librosa.load(music)
            np.save(outfile_name, arr)

    def separate_source(self, mus_list):
        warnings.filterwarnings('ignore')
        music_list = list()
        sep_list = list()
        for music in tqdm(mus_list):
            mus_type = music.split(".")[-3]
            if mus_type == '0':
                music_list.append(music)
            else:
                sep_list.append(music)

        return music_list, sep_list

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds = MUSDBDataset('/mnt/d/createdmusdb18/train')

100%|██████████| 500/500 [00:00<00:00, 1150385.08it/s]


In [118]:
# from scipy.io.wavfile import write
# data = sample
# scaled = np.int16(data/np.max(np.abs(data)) * 32767)
# write('/mnt/c/Users/lijm1/Desktop/test.wav', 22050, scaled)

In [50]:
from torch.utils.data import DataLoader

In [51]:
train_dataloader = DataLoader(ds, batch_size=4)

In [5]:
from model.waveunet import Waveunet

In [6]:
num_features = [32*2**i for i in range(0, 6)]

In [7]:
int(22000)

22000

In [35]:
import os
import time
from functools import partial

import torch
import pickle
import numpy as np

import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torch.optim import Adam
from tqdm import tqdm

import model.utils as model_utils
import utils
from data.dataset import SeparationDataset
# from data.musdb import get_musdb_folds
# from data.utils import crop_targets, random_amplify
# from test import evaluate, validate
from model.waveunet import Waveunet

In [36]:
model = Waveunet(1, num_features, 1, ["bass", "drums", "other", "vocals"], kernel_size=5,
                     target_output_size=32425, depth=1, strides=4,
                     conv_type="gn", res="fixed", separate=1)

model = model_utils.DataParallel(model)
print("move model to gpu")
model.cuda()

print('model: ', model)
print('parameter count: ', str(sum(p.numel() for p in model.parameters())))

Using valid convolutions with 42665 inputs and 33113 outputs
move model to gpu
model:  DataParallel(
  (module): Waveunet(
    (waveunets): ModuleDict(
      (bass): Module(
        (downsampling_blocks): ModuleList(
          (0): DownsamplingBlock(
            (pre_shortcut_convs): ModuleList(
              (0): ConvLayer(
                (filter): Conv1d(1, 32, kernel_size=(5,), stride=(1,))
                (norm): GroupNorm(4, 32, eps=1e-05, affine=True)
              )
            )
            (post_shortcut_convs): ModuleList(
              (0): ConvLayer(
                (filter): Conv1d(32, 64, kernel_size=(5,), stride=(1,))
                (norm): GroupNorm(8, 64, eps=1e-05, affine=True)
              )
            )
            (downconv): Resample1d()
          )
          (1): DownsamplingBlock(
            (pre_shortcut_convs): ModuleList(
              (0): ConvLayer(
                (filter): Conv1d(64, 64, kernel_size=(5,), stride=(1,))
                (norm): GroupNor

In [25]:
train_dataloader.dataset[0][0].shape

(284672,)

In [26]:
import torch

In [29]:
random = torch.rand(42665,)

In [30]:
model(random)

KeyError: None

In [38]:
from torch import Tensor

In [39]:
class UNet(nn.Module):
    def __init__(
        self,
        num_classes: int,
        input_channels: int = 3,
        num_layers: int = 5,
        features_start: int = 64,
        bilinear: bool = False,
    ):

        if num_layers < 1:
            raise ValueError(f"num_layers = {num_layers}, expected: num_layers > 0")

        super().__init__()
        self.num_layers = num_layers

        layers = [DoubleConv(input_channels, features_start)]

        feats = features_start
        for _ in range(num_layers - 1):
            layers.append(Down(feats, feats * 2))
            feats *= 2

        for _ in range(num_layers - 1):
            layers.append(Up(feats, feats // 2, bilinear))
            feats //= 2

        layers.append(nn.Conv2d(feats, num_classes, kernel_size=1))

        self.layers = nn.ModuleList(layers)

    def forward(self, x: Tensor) -> Tensor:
        xi = [self.layers[0](x)]
        # Down path
        for layer in self.layers[1 : self.num_layers]:
            xi.append(layer(xi[-1]))
        # Up path
        for i, layer in enumerate(self.layers[self.num_layers : -1]):
            xi[-1] = layer(xi[-1], xi[-2 - i])
        return self.layers[-1](xi[-1])


class DoubleConv(nn.Module):
    """[ Conv2d => BatchNorm => ReLU ] x 2."""

    def __init__(self, in_ch: int, out_ch: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_ch, out_ch, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv1d(out_ch, out_ch, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.net(x)


class Down(nn.Module):
    """Downscale with MaxPool => DoubleConvolution block."""

    def __init__(self, in_ch: int, out_ch: int):
        super().__init__()
        self.net = nn.Sequential(nn.MaxPool1d(kernel_size=2, stride=2), DoubleConv(in_ch, out_ch))

    def forward(self, x: Tensor) -> Tensor:
        return self.net(x)


class Up(nn.Module):
    """Upsampling (by either bilinear interpolation or transpose convolutions) followed by concatenation of feature
    map from contracting path, followed by DoubleConv."""

    def __init__(self, in_ch: int, out_ch: int, bilinear: bool = False):
        super().__init__()
        self.upsample = None
        if bilinear:
            self.upsample = nn.Sequential(
                nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True),
                nn.Conv2d(in_ch, in_ch // 2, kernel_size=1),
            )
        else:
            self.upsample = nn.ConvTranspose2d(in_ch, in_ch // 2, kernel_size=2, stride=2)

        self.conv = DoubleConv(in_ch, out_ch)

    def forward(self, x1: Tensor, x2: Tensor) -> Tensor:
        x1 = self.upsample(x1)

        # Pad x1 to the size of x2
        diff_h = x2.shape[2] - x1.shape[2]
        diff_w = x2.shape[3] - x1.shape[3]

        x1 = F.pad(x1, [diff_w // 2, diff_w - diff_w // 2, diff_h // 2, diff_h - diff_h // 2])

        # Concatenate along the channels axis
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


In [66]:
layer = DoubleConv(in_ch = 1, out_ch = 64)
layer2 = Down(32, 32)

In [67]:
for x, y in train_dataloader:
    out = layer(x)
    print(out.shape)
    out = layer2(out)
    print(out.shape)
    break

torch.Size([4, 64, 284672])
torch.Size([4, 32, 142336])
