In [1]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import torch as th
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Variable
from dataset_lmdb import *
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
sns.set()

### Reproducibility :)

In [2]:
th.__version__

'1.0.0.dev20181014'

In [3]:
import hashlib
th.manual_seed(int(hashlib.sha1(b'lucent').hexdigest(), 16) % (10 ** 8))
th.cuda.manual_seed_all(int(hashlib.sha1(b'lucent').hexdigest(), 16) % (10 ** 8))

### Utility Functions / Modules

In [4]:
def initialize_weights(modules, initializer):
    for m in modules:
        if isinstance(m, nn.Conv1d):
            initializer(tensor=m.weight.data)
            try: m.bias.data.zero_()
            except: pass
        elif isinstance(m, nn.Conv2d):
            initializer(tensor=m.weight.data)
            try: m.bias.data.zero_()
            except: pass
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1)
            try: m.bias.data.zero_()
            except: pass
        elif isinstance(m, nn.BatchNorm1d):
            m.weight.data.fill_(1)
            try: m.bias.data.zero_()
            except: pass
        elif isinstance(m, nn.Linear):
            initializer(tensor=m.weight.data)
            try: m.bias.data.zero_()
            except: pass

In [5]:
class DenseConv1d(nn.Module):
    "Conv-BN-Act-Pool."
    def __init__(self, input_size, output_size, activation, kernel_size, stride, padding, initializer, pool=True):
        super(DenseConv1d, self).__init__()
        self.conv = nn.Conv1d(input_size, output_size, kernel_size=kernel_size, stride=stride, padding=padding)
        self.bn = nn.BatchNorm1d(output_size)
        self.act = activation(inplace=True)
        if pool: self.pool = nn.MaxPool1d(kernel_size=(kernel_size - kernel_size % 2),
                                          stride=(kernel_size - kernel_size % 2))
        else: self.pool = None
        initialize_weights(self.modules(), initializer)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.act(out)
        return self.pool(out) if self.pool else out

In [6]:
class DenseConv2d(nn.Module):
    "Conv-BN-Act-Pool."
    def __init__(self, input_size, output_size, activation, kernel_size, stride, padding, initializer, pool=True):
        super(DenseConv2d, self).__init__()
        self.conv = nn.Conv2d(input_size, output_size, kernel_size=kernel_size, stride=stride, padding=padding)
        self.bn = nn.BatchNorm2d(output_size)
        self.act = activation(inplace=True)
        if pool: self.pool = nn.MaxPool2d(kernel_size=(kernel_size - kernel_size % 2),
                                          stride=(kernel_size - kernel_size % 2))
        else: self.pool = None
        initialize_weights(self.modules(), initializer)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.act(out)
        return self.pool(out) if self.pool else out

### 2D + 1D Convolution Architecture

In [7]:
class SubNet1D(nn.Module):
    num_layers = 5

    def __init__(self, in_dim=128, out_dim=128, activation='ReLU', initialization='kaiming_uniform'):
        super(SubNet1D, self).__init__()
        self.activation = getattr(nn, activation)
        self.initialization = getattr(init, initialization)

        "1D CNN"
        self.conv0 = DenseConv1d(in_dim, 32, activation=self.activation, kernel_size=4, stride=1, padding=2,
                                 initializer=self.initialization)
        self.conv1 = DenseConv1d(160, 32, activation=self.activation, kernel_size=4, stride=1, padding=2,
                                 initializer=self.initialization)
        self.conv2 = DenseConv1d(192, 32, activation=self.activation, kernel_size=4, stride=1, padding=2,
                                 initializer=self.initialization)
        self.conv3 = DenseConv1d(224, 32, activation=self.activation, kernel_size=3, stride=1, padding=1,
                                 initializer=self.initialization)
        self.conv4 = DenseConv1d(256, out_dim, activation=self.activation, kernel_size=1, stride=1, padding=0,
                                 pool=False, initializer=self.initialization)

        initialize_weights(self.modules(), self.initialization)

    def forward(self, x):
        out = x.transpose(1, 2)

        outs = [out]
        pools = [4, 4, 4, 2, 1]
        for i in range(SubNet1D.num_layers):
            out = getattr(self, f'conv{i}')(torch.cat(outs, 1))
            if pools[i] > 1:
                outs = [F.max_pool1d(o, kernel_size=pools[i], stride=pools[i]) for o in outs]
            outs.append(out)

        "Global Pooling"
        mean = F.adaptive_avg_pool1d(out, 1).squeeze(-1)
        return mean

In [8]:
class SubNet2D(nn.Module):
    num_layers = 5

    def __init__(self, in_dim=1, out_dim=128, activation='ReLU', initialization='kaiming_uniform'):
        super(SubNet2D, self).__init__()
        self.activation = getattr(nn, activation)
        self.initialization = getattr(init, initialization)

        "2D CNN"
        self.conv0 = DenseConv2d(1, 32, activation=self.activation, kernel_size=4, stride=1, padding=2,
                                 initializer=self.initialization)
        self.conv1 = DenseConv2d(33, 32, activation=self.activation, kernel_size=4, stride=1, padding=2,
                                 initializer=self.initialization)
        self.conv2 = DenseConv2d(65, 32, activation=self.activation, kernel_size=4, stride=1, padding=2,
                                 initializer=self.initialization)
        self.conv3 = DenseConv2d(97, 32, activation=self.activation, kernel_size=3, stride=1, padding=1,
                                 initializer=self.initialization)
        self.conv4 = DenseConv2d(129, out_dim, activation=self.activation, kernel_size=1, stride=1, padding=0,
                                 pool=False, initializer=self.initialization)

        initialize_weights(self.modules(), self.initialization)

    def forward(self, x):
        out = th.unsqueeze(x.transpose(1, 2), 1)

        outs = [out]
        pools = [4, 4, 4, 2, 1]
        for i in range(SubNet2D.num_layers):
            out = getattr(self, f'conv{i}')(torch.cat(outs, 1))
            if pools[i] > 1:
                outs = [F.max_pool2d(o, kernel_size=pools[i], stride=pools[i]) for o in outs]
            outs.append(out)

        "Global Pooling"
        mean = F.adaptive_avg_pool2d(out, 1).squeeze(-1).squeeze(-1)
        return mean

In [9]:
class MultiScaleCNN(nn.Module):
    def __init__(self, in_dim=128, out_dim=128, activation='ReLU', initialization='kaiming_uniform'):
        super(MultiScaleCNN, self).__init__()
        self.activation = getattr(nn, activation)
        self.initialization = getattr(init, initialization)

        "1D + 2D CNN"
        self.net1 = SubNet1D(in_dim, out_dim, activation, initialization)
        self.net2 = SubNet2D(in_dim, out_dim, activation, initialization)
        self.regression = nn.Linear(out_dim * 2, out_dim, bias=False)

        initialize_weights(self.modules(), self.initialization)

    def forward(self, x):
        a = self.net1(x)
        b = self.net2(x)
        out = self.regression(torch.cat([a, b], 1))
        return F.relu(out)

### Train

In [10]:
def eval(model, dataset, return_outputs=False):
    torch.cuda.empty_cache()
    model = model.eval()
    avg_loss, rets = [], []
    criterion = torch.nn.MSELoss()
    for batch in dataset:
        xs, y = batch['x'], batch['y']
        if len(xs) == 0 or len(xs[0]) == 0:
            if return_outputs: rets.append(np.zeros(y.shape))
            continue
        outputs = Variable(torch.zeros(*y.shape), requires_grad=False).type(torch.cuda.FloatTensor)
        y = Variable(y, requires_grad=False).type(torch.cuda.FloatTensor)
        for x in xs:
            x = Variable(x, requires_grad=False).type(torch.cuda.FloatTensor)
            outs = model(x)
            outputs += outs
        outputs /= len(xs)
        if return_outputs: rets.append(outputs.cpu().detach().numpy())
        loss = criterion(outputs, y)
        avg_loss.append(loss.data.item())
    torch.cuda.empty_cache()

    if return_outputs:
        rets = np.concatenate(rets, axis=0)
        return np.mean(avg_loss), rets
    return np.mean(avg_loss)

In [11]:
def train(model, train_dataset, valid_dataset, optimizer, max_epoch, early_stop, scheduler):
    criterion = nn.MSELoss()
    for epoch in range(max_epoch):
        model = model.train()
        avg_loss = []
        for batch in train_dataset:
            xs, y = batch['x'], batch['y']
            optimizer.zero_grad(); model.zero_grad()
            y = Variable(y).type(torch.cuda.FloatTensor)
            outputs = Variable(torch.zeros(*y.shape)).type(torch.cuda.FloatTensor)
            for x in xs:
                x = Variable(x).type(torch.cuda.FloatTensor)
                outs = model(x)
                outputs += outs
            outputs /= len(xs)
            loss = criterion(outputs, y)
            avg_loss.append(loss.data.item())
            loss.backward()
            optimizer.step()

        tr_loss = np.mean(avg_loss)
        val_loss = eval(model, valid_dataset)
        print(f'[Epoch {epoch+1:3d}/{max_epoch}] Train Loss: ' +\
              f'{tr_loss:.4f}, Valid Loss: {val_loss:.4f}.')

        if scheduler: scheduler.step(val_loss)
        if optimizer.param_groups[0]['lr'] <= early_stop:
            print('Early Stopping!')
            break

    ckpt_name = f'model.bin'
    torch.save(f=ckpt_name, obj=model.state_dict())
    return model


"Create a model, train the model, and finally test the model."
model = MultiScaleCNN(activation='ReLU', initialization='kaiming_uniform', out_dim=100).cuda()
max_epoch, lr, early_stop = 100, 0.01, 3.2e-6
optimizer = optim.Adam(model.parameters(), lr=lr, eps=1e-6, weight_decay=1e-6)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                      cooldown=0, factor=0.2, patience=3, verbose=True)

data = get_msd_songs(window_size=600, batch_size=50)
model = train(model, data.train, data.valid, optimizer, max_epoch,
              early_stop, scheduler)

"Create embeddings for every song, and then save it."
t_data = get_msd_songs_all(window_size=600, batch_size=1)
total_loss, encodings = eval(model, t_data, return_outputs=True)
emb_path = './kor_embedding.npy'
np.save(emb_path, encodings)
print(f'Total Loss: {total_loss}, embedding is saved to {emb_path}.')


  after removing the cwd from sys.path.
  


RuntimeError: CUDA error: out of memory

^^