# Configuration

In [87]:
config = [
    
"--experimentdir", "/home/schindlera/experiments/ismir2020_reviews/",
"--modeldir"     , "/home/schindlera/experiments/ismir2020_reviews/",
    
"--relcontent"   , "rel_content_emb_tag_lsi", 
"--audio"        , "melspec_128_10seconds_2ch_norm", 
    
"--model"        , "model.m1_3",     
"--gpu"          , "0",
    
"--loss"         , "original", 
"--lossagg"      , "max", 
"--margin"       , "1.0", 
"--uppersim"     , "0.90", 
    
"--finaldim"     , "128", 
"--epochs"       , "10", 
"--learnrate"    , "0.0001",
"--batchsize"    , "1000"
    
]

# Data Loading & Preprocessing

In [88]:
import sys
import argparse
import logging

In [89]:

parser = argparse.ArgumentParser()

parser.add_argument('--relcontent',    type=str)
parser.add_argument('--model',         type=str)
parser.add_argument('--audio',         type=str)
parser.add_argument('--experimentdir', type=str)
parser.add_argument('--modeldir',      type=str)
parser.add_argument('--gpu',           type=int)
parser.add_argument('--finaldim',      type=int)
parser.add_argument('--lossagg',       type=str, default="min")
parser.add_argument('--loss',          type=str, default="original")
parser.add_argument('--batchsize',     type=int, default=1000)
parser.add_argument('--margin',        type=float)
parser.add_argument('--learnrate',     type=float, default=0.0001)
parser.add_argument('--uppersim',      type=float)
parser.add_argument('--epochs',        type=int, default=100)
parser.add_argument("--log-level", default=logging.DEBUG, type=lambda x: getattr(logger, x), help="Configure the logger level.")

if sys.argv[0].find("ipykernel_launcher") != -1:
    args = parser.parse_args(config)
else:
    args = parser.parse_args()

# Imports

In [4]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)

import json

import numpy as np
import pandas as pd

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from torchsummary import summary

from tqdm.auto import tqdm

In [5]:
# control random processes
np.random.seed(1)

# Functions

In [6]:
def prepare_model_dir(model_path):
    if not os.path.exists(model_path):
        os.makedirs(model_path)

# Initialize Experiment

## Init Logger

In [7]:
logger = logging.getLogger("experiment.py")
logger.setLevel(logging.DEBUG)

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
logger.addHandler(ch)

logger.info("+------------------------------------------------------------------+")
logger.info("| STARTING EXPERIMENT                                              |")
logger.info("+------------------------------------------------------------------+")
logger.info("Logger initialized")

logger.info("Initializing model experiment directory")
model_storage_path = prepare_model_dir(args.modeldir)

logger.info("Initializing logger filehandler")
fh = logging.FileHandler("%s/experiment.log" % args.modeldir)
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)

logger.addHandler(fh)

2020-04-22 21:39:32 - experiment.py - INFO - +------------------------------------------------------------------+
2020-04-22 21:39:32 - experiment.py - INFO - | STARTING EXPERIMENT                                              |
2020-04-22 21:39:32 - experiment.py - INFO - +------------------------------------------------------------------+
2020-04-22 21:39:32 - experiment.py - INFO - Logger initialized
2020-04-22 21:39:32 - experiment.py - INFO - Initializing model experiment directory
2020-04-22 21:39:32 - experiment.py - INFO - Initializing logger filehandler


Print Experiment Summary

In [8]:
logger.info("+------------------------------------------------------------------+")
logger.info("| EXPERIMENT:                                                      |")
logger.info("+------------------------------------------------------------------+")
logger.info("| Experiment directory              : %s"           % args.experimentdir)
logger.info("| Model-Directory                   : %s"           % args.modeldir)
logger.info("| Related content filename          : %s"           % args.relcontent)
logger.info("| Audio-Features filename           : %s"           % args.audio)
logger.info("| GPU                               : %d"           % args.gpu)
logger.info("+------------------------------------------------------------------+")
logger.info("| Model                             : %s"           % args.model)
logger.info("| Dimensions Final Music-Embeddings : %d"           % args.finaldim)
logger.info("+------------------------------------------------------------------+")
logger.info("| Loss                              : %s"           % args.loss)
logger.info("| Loss-Aggregation                  : %s"           % args.lossagg)
logger.info("| Upper Sim                         : %f"           % args.uppersim)
logger.info("| Margin                            : %f"           % args.margin)
logger.info("+------------------------------------------------------------------+")
logger.info("| Learn Rate                        : %f"           % args.learnrate)
logger.info("| Batch Size                        : %d"           % args.batchsize)
logger.info("| Num Epochs                        : %d"           % args.epochs)
logger.info("+------------------------------------------------------------------+")

2020-04-22 21:39:36 - experiment.py - INFO - +------------------------------------------------------------------+
2020-04-22 21:39:36 - experiment.py - INFO - | EXPERIMENT:                                                      |
2020-04-22 21:39:36 - experiment.py - INFO - +------------------------------------------------------------------+
2020-04-22 21:39:36 - experiment.py - INFO - | Experiment directory              : /home/schindlera/experiments/ismir2020_reviews/
2020-04-22 21:39:36 - experiment.py - INFO - | Model-Directory                   : /home/schindlera/experiments/ismir2020_reviews/
2020-04-22 21:39:36 - experiment.py - INFO - | Related content filename          : rel_content_emb_tag_lsi
2020-04-22 21:39:36 - experiment.py - INFO - | Audio-Features filename           : melspec_128_10seconds_2ch_norm
2020-04-22 21:39:36 - experiment.py - INFO - | GPU                               : 0
2020-04-22 21:39:36 - experiment.py - INFO - +--------------------------------------------

Store configuration for reproducability

In [9]:
with open("%s/experiment_arguments.json" % args.modeldir, 'w') as json_file:
    args_json = json.dump(vars(args), json_file)

## Load Audio Data

In [10]:
logger.info("* Load Audio Data")

2020-04-22 21:39:40 - experiment.py - INFO - * Load Audio Data


Load Audio Data - Train Partition

In [11]:
%%time
logger.info("* Load Audio Data - Train Partition")

# load partition trackid file
par_file           = "%s/eval_partition_trackids_train.csv" % (args.experimentdir)
par_trackids_train = pd.read_csv(par_file, header=None, index_col=0)

# load audio content
par_filename_audio_train = "%s/%s_train.npz" % (args.experimentdir, args.audio)

with np.load(par_filename_audio_train, allow_pickle=True) as npz:
    data_audio_train      = npz["data"]
    track_ids_audio_train = npz["track_ids"].astype(str)
    
lookup_audio_train = pd.DataFrame(np.arange(track_ids_audio_train.shape[0], dtype=int), 
                                  index   = track_ids_audio_train, 
                                  columns = ["feature_line_nr"])

2020-04-22 21:39:48 - experiment.py - INFO - * Load Audio Data - Train Partition
CPU times: user 4min 13s, sys: 4min 58s, total: 9min 11s
Wall time: 10min 36s


In [12]:
# CHECK: ids and data have same length
assert(data_audio_train.shape[0] == track_ids_audio_train.shape[0])

In [13]:
#par_trackids_train.index.values == track_ids_audio_train

Load Audio Data - Validation Partition

In [14]:
logger.info("* Load Audio Data - Validation Partition")

# load partition trackid file
par_file         = "%s/eval_partition_trackids_val.csv" % (args.experimentdir)
par_trackids_val = pd.read_csv(par_file, header=None, index_col=0)

# load audio content
par_filename_audio_val = "%s/%s_val.npz" % (args.experimentdir, args.audio)

with np.load(par_filename_audio_val, allow_pickle=True) as npz:
    data_audio_val      = npz["data"]
    track_ids_audio_val = npz["track_ids"].astype(str)
    
lookup_audio_val = pd.DataFrame(np.arange(track_ids_audio_val.shape[0], dtype=int), 
                                  index   = track_ids_audio_val, 
                                  columns = ["feature_line_nr"])

2020-04-22 21:50:58 - experiment.py - INFO - * Load Audio Data - Validation Partition


In [15]:
# CHECK: ids and data have same length
assert(data_audio_val.shape[0] == track_ids_audio_val.shape[0])

In [16]:
logger.info("Num instances - audio data train : %d" % data_audio_train.shape[0])
logger.info("Num instances - audio data val   : %d" % data_audio_val.shape[0])
logger.info("data_audio dimensions            : %s" % str(data_audio_train.shape))

2020-04-22 21:51:05 - experiment.py - INFO - Num instances - audio data train : 247480
2020-04-22 21:51:05 - experiment.py - INFO - Num instances - audio data val   : 2500
2020-04-22 21:51:05 - experiment.py - INFO - data_audio dimensions            : (247480, 128, 880, 2)


## Load Related Content

In [17]:
logger.info("* Load Related Content Embeddings")

2020-04-22 21:51:05 - experiment.py - INFO - * Load Related Content Embeddings


Train Partition

In [18]:
par_filename_relcontent_train = "%s/%s_train.npz" % (args.experimentdir, args.relcontent)

with np.load(par_filename_relcontent_train, allow_pickle=True) as npz:
    data_relcontent_train     = npz["data"]
    trackids_relcontent_train = npz["trackids"].astype(str)

lookup_relcontent_train = pd.DataFrame(np.arange(trackids_relcontent_train.shape[0], dtype=int), 
                                       index   = trackids_relcontent_train, 
                                       columns = ["feature_line_nr"])

# CHECK: ids and data have same length
assert(data_relcontent_train.shape[0] == trackids_relcontent_train.shape[0])

# CHECK: ids of text and audio are aligned
#assert((track_ids_text == track_ids_audio).sum() == track_ids_audio.shape[0])

Validation Partition

In [19]:
par_filename_relcontent_val = "%s/%s_val.npz" % (args.experimentdir, args.relcontent)

with np.load(par_filename_relcontent_val, allow_pickle=True) as npz:
    data_relcontent_val     = npz["data"]
    trackids_relcontent_val = npz["trackids"].astype(str)

lookup_relcontent_val = pd.DataFrame(np.arange(trackids_relcontent_val.shape[0], dtype=int), 
                                       index   = trackids_relcontent_val, 
                                       columns = ["feature_line_nr"])

# CHECK: ids and data have same length
assert(data_relcontent_val.shape[0] == trackids_relcontent_val.shape[0])

# CHECK: ids of text and audio are aligned
#assert((track_ids_text == track_ids_audio).sum() == track_ids_audio.shape[0])

In [20]:
logger.info("Num instances - related content train : %d" % data_relcontent_train.shape[0])
logger.info("Num instances - related content val   : %d" % data_relcontent_val.shape[0])
logger.info("data_relcontent dimensions            : %s" % str(data_relcontent_train.shape))
logger.info("TEXT_EMBEDDINGS_DIMENSIONS            : %d" % data_relcontent_train.shape[1])
logger.info("+------------------------------------------------------------------+")

2020-04-22 21:51:07 - experiment.py - INFO - Num instances - related content train : 247480
2020-04-22 21:51:07 - experiment.py - INFO - Num instances - related content val   : 2500
2020-04-22 21:51:07 - experiment.py - INFO - data_relcontent dimensions            : (247480, 340)
2020-04-22 21:51:07 - experiment.py - INFO - TEXT_EMBEDDINGS_DIMENSIONS            : 340
2020-04-22 21:51:07 - experiment.py - INFO - +------------------------------------------------------------------+


In [21]:
TEXT_EMBEDDINGS_DIMENSIONS = data_relcontent_train.shape[1]

logger.info("Partitions overview - train/val")

logger.info("data_audio_train      dimensions    : %s" % str(data_audio_train.shape))
logger.info("data_relcontent_train dimensions    : %s" % str(data_relcontent_train.shape))
logger.info("data_audio_val        dimensions    : %s" % str(data_audio_val.shape))
logger.info("data_relcontent_val   dimensions    : %s" % str(data_relcontent_val.shape))
logger.info("+------------------------------------------------------------------+")

2020-04-22 21:51:07 - experiment.py - INFO - Partitions overview - train/val
2020-04-22 21:51:07 - experiment.py - INFO - data_audio_train      dimensions    : (247480, 128, 880, 2)
2020-04-22 21:51:07 - experiment.py - INFO - data_relcontent_train dimensions    : (247480, 340)
2020-04-22 21:51:07 - experiment.py - INFO - data_audio_val        dimensions    : (2500, 128, 880, 2)
2020-04-22 21:51:07 - experiment.py - INFO - data_relcontent_val   dimensions    : (2500, 340)
2020-04-22 21:51:07 - experiment.py - INFO - +------------------------------------------------------------------+


# Model

## Loss

In [132]:
class OnlineTripletLoss(nn.Module):
    
    def __init__(self, margin, upper_limit):
        super(OnlineTripletLoss, self).__init__()
        
        self.margin      = margin
        self.upper_limit = upper_limit

    def cosine_similarity(self, x1, x2=None, eps=1e-8):
        x2 = x1 if x2 is None else x2
        w1 = x1.norm(p=2, dim=1, keepdim=True)
        w2 = w1 if x2 is x1 else x2.norm(p=2, dim=1, keepdim=True)
        return torch.mm(x1, x2.t()) / (w1 * w2.t()).clamp(min=eps)

    def forward(self, audio_embeddings, text_embdeeings):

        # Get the pairwise distance matrix
        pairwise_dists_text  = self.cosine_similarity(text_embdeeings)
        pairwise_dists_audio = torch.cdist(audio_embeddings,audio_embeddings, p=2)

        # create filter masks
        max_pairwise_dist_audio = pairwise_dists_audio.max()

        # positive bool mask
        mask_positive = (pairwise_dists_text.fill_diagonal_(0) > self.upper_limit)

        # negative bool mask
        mask_negative = (1 - mask_positive.float()).fill_diagonal_(0)

        if args.lossagg == "max":

            audio_positive_dist       = pairwise_dists_audio * mask_positive.float()
            hardest_positive_dist, _  = audio_positive_dist.max(dim=1, keepdims=True)

        #elif args.lossagg == "min":
        # 
        #    audio_positive_dist       = pairwise_dists_audio * mask_positive_bool
        #    hardest_positive_dist, _  = audio_positive_dist.min(dim=1, keepdims=True)

        # negative
        max_audio_negative_dist, _ = pairwise_dists_audio.max(1, keepdim=True)
        audio_negative_dist        = pairwise_dists_audio + max_audio_negative_dist * (1.0 - mask_negative)

        hardest_negative_dist, _   = audio_negative_dist.min(dim=1, keepdims=True)
        
        # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
        delta = hardest_positive_dist - hardest_negative_dist
        
        #print(mask_positive)
        
        
        delta = delta[mask_positive.any(axis=0),:]
        #print(hardest_positive_dist.shape[0], delta.shape[0])

        if   args.loss == "original"     : 
            triplet_loss = (delta + self.margin)
            triplet_loss[triplet_loss < 0] = 0
            triplet_loss = triplet_loss.mean()


        #elif args.loss == "logistic_sum" : triplet_loss = tf.reduce_sum (tf.log1p(tf.reduce_sum(tf.exp(delta), axis=2)))
        #elif args.loss == "logistic_mean": triplet_loss = tf.reduce_mean(tf.log1p(tf.reduce_sum(tf.exp(delta), axis=2)))
        #elif args.loss == "hinge_sum"    : triplet_loss = tf.reduce_sum (tf.reduce_sum(tf.nn.relu(margin + delta)))
        #elif args.loss == "hinge_mean"   : triplet_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.relu(margin + delta)))
        #elif args.loss == "exp_sum"      : triplet_loss = tf.reduce_sum (tf.reduce_sum(tf.exp(delta), axis=2))
        #elif args.loss == "exp_mean"     : triplet_loss = tf.reduce_mean(tf.reduce_sum(tf.exp(delta), axis=2))
        else: raise NotImplementedError

        return triplet_loss

# Model

In [23]:
class NetKim2019(nn.Module):
    
    def __init__(self):
        super(NetKim2019, self).__init__()
        
        self.conv1      = nn.Conv2d(2,    16, kernel_size=5, stride=(2,1), padding=2)
        self.conv2      = nn.Conv2d(16,   32, kernel_size=3, padding=1)
        self.conv3      = nn.Conv2d(32,   64, kernel_size=3, padding=1)
        self.conv4      = nn.Conv2d(64,   64, kernel_size=3, padding=1)
        self.conv5      = nn.Conv2d(64,  128, kernel_size=3, padding=1)
        self.conv6a     = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv6b     = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        
        self.maxpool1   = nn.MaxPool2d((2,2))
        self.maxpool2   = nn.MaxPool2d((2,2))
        self.maxpool3   = nn.MaxPool2d((2,2))
        self.maxpool4   = nn.MaxPool2d((2,2))
        self.maxpool5   = nn.MaxPool2d((2,2))
        
        self.gap        = nn.AdaptiveMaxPool2d(1)
                
        self.dropout    = nn.Dropout()
        self.fc_feature = nn.Linear(256, 256)
        self.fc_output  = nn.Linear(256, 128)

    def forward(self, x):

        x = self.conv1(x)
        x = F.relu(x)
        x = self.maxpool1(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.maxpool2(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.maxpool3(x)
        
        x = self.conv4(x)
        x = F.relu(x)
        x = self.maxpool4(x)
        
        x = self.conv5(x)
        x = F.relu(x)
        x = self.maxpool5(x)
        
        x = self.conv6a(x)
        x = F.relu(x)
        x = self.conv6b(x)
        x = F.relu(x)
        
        x = self.gap(x)
        x = x.view(x.shape[:2])

        x = self.fc_feature(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc_output(x)
        x = F.sigmoid(x)
        
        return x

#summary(model.cuda(), (2, 216, 128))

## DataSet Loader

In [24]:
class MelSpecDataset(Dataset):
    
    def __init__(self, audio_data, text_data, random_cropping=False):
        self.audio_data      = audio_data
        self.text_data       = text_data
        self.random_cropping = random_cropping

    def __len__(self):
        return self.audio_data.shape[0]

    def __getitem__(self, idx):
        
        if torch.is_tensor(idx):
            idx = idx.tolist()

        result_text  = self.text_data[idx]
            
        result_audio = self.audio_data[idx]
        #print(result_audio.shape)
        result_audio = np.swapaxes(result_audio, 0,2)
        
        if self.random_cropping:
            start = np.random.randint(0, 880 - 216 + 1)
            stop  = start + 216
            result_audio = result_audio[:,start:stop,:]
        else:
            result_audio = result_audio[:,100:316,:]
            
        result_audio = torch.from_numpy(result_audio).float()
        result_text  = torch.from_numpy(result_text).float()
        
        return result_audio, result_text

# Train Model

In [25]:
# CUDA for PyTorch
#device1 = torch.device("cuda:0")
#device1 = torch.device("cpu")
device1 = torch.device("cuda:0")
#cudnn.benchmark = True

In [26]:
device1

device(type='cuda', index=0)

Create Model

In [133]:
# model
model_single = NetKim2019()

model = nn.DataParallel(model_single, device_ids = [0,1,2,3])
model = model.to(device1)

# loss
criterion = OnlineTripletLoss(args.margin, args.uppersim).to(device1)

In [99]:
# model
model = NetKim2019()
model = model.to(device1)

# loss
criterion = OnlineTripletLoss(args.margin, args.uppersim).to(device1)

In [91]:
# optimizer
optimizer = optim.Adam(model.parameters(), lr=args.learnrate, weight_decay=0.00001)

Create DataLoaders

In [29]:
# this approach uses an intermediate batchsize. forward passes will be made using this batchsize. 
# loss is calculated for each intermediate batch. When the number of precessed instances reaches the
# globally supplied batchsize, losses are accumulated and backpropagated.

INTERMEDIATE_BATCH_SIZE = 100

In [83]:
# Train Data Loader
dataset_train    = MelSpecDataset(data_audio_train, 
                                  data_relcontent_train, 
                                  random_cropping=True)

dataloader_train = DataLoader(dataset_train, 
                              batch_size  = INTERMEDIATE_BATCH_SIZE,
                              shuffle     = True, 
                              num_workers = 8)

# Validation Data Loader
dataset_val      = MelSpecDataset(data_audio_val, 
                                  data_relcontent_val, 
                                  random_cropping=False)

dataloader_val   = DataLoader(dataset_val, 
                              batch_size  = INTERMEDIATE_BATCH_SIZE,
                              shuffle     = False, 
                              num_workers = 8)

In [84]:
pbar_outer  = tqdm(total=args.epochs, desc='Epoch', position=0)
pbar_status = tqdm(total=0, position=1, bar_format='{desc}')

current_val_loss = float('nan')
running_loss     = 0.0

for epoch in range(args.epochs):
    
    pbar_inner = tqdm(total    = np.ceil(dataset_train.__len__() / args.batchsize).astype(int), 
                      desc     = f'[train_loss: {running_loss}, val_loss: {current_val_loss}]', 
                      position = 1)
    
    running_loss                   = 0.0
    current_batch_num              = 0
    current_batch_size             = 0
    current_batch_audio_embeddings = []
    current_batch_text_embeddings  = []
   

    for local_audio, local_text in dataloader_train:
        
        local_audio = local_audio.to(device1)
        local_text  = local_text.to(device1)
    
        # zero the parameter gradients
        optimizer.zero_grad()

        outputs = model(local_audio)
        
        #outputs = outputs.to(device2)
        
        current_batch_audio_embeddings.append(outputs)
        current_batch_text_embeddings.append(local_text)
        
        current_batch_size += outputs.shape[0]
        
        
        
        if current_batch_size >= args.batchsize:
            
            audio_embeddings = torch.cat(current_batch_audio_embeddings, dim=0)
            text_embeddings  = torch.cat(current_batch_text_embeddings,  dim=0)
            
            print(audio_embeddings.shape, text_embeddings.shape)
            
            loss    = criterion.forward(audio_embeddings, text_embeddings)
            
            #loss    = loss.to(device1)
            
            loss.backward()
            optimizer.step()
            
            current_batch_audio_embeddings.clear()
            current_batch_text_embeddings.clear()
        
            # print statistics
            running_loss += loss.item()
            pbar_status.set_description_str(f'[train_loss: {loss.item()}, val_loss: {current_val_loss}]')
            
            pbar_inner.update(1)
            
            #print('[%d, %5d] loss: %.3f' % (epoch + 1, current_batch_num + 1, running_loss / 1))
            
            current_batch_num += 1
            current_batch_size = 0
            

            #break
    
    pbar_outer.update(1)
    #break

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 0.0, val_loss: nan]', max=248.0, style=Progr…

torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000, 340])
torch.Size([1000, 128]) torch.Size([1000

KeyboardInterrupt: 

In [42]:
INTERMEDIATE_BATCH_SIZE = 1000

In [134]:
# Train Data Loader
dataset_train    = MelSpecDataset(data_audio_train, 
                                  data_relcontent_train, 
                                  random_cropping=True)

dataloader_train = DataLoader(dataset_train, 
                              batch_size  = INTERMEDIATE_BATCH_SIZE,
                              shuffle     = True, 
                              num_workers = 8)

# Validation Data Loader
dataset_val      = MelSpecDataset(data_audio_val, 
                                  data_relcontent_val, 
                                  random_cropping=False)

dataloader_val   = DataLoader(dataset_val, 
                              batch_size  = INTERMEDIATE_BATCH_SIZE,
                              shuffle     = False, 
                              num_workers = 8)

In [None]:
pbar_outer  = tqdm(total=args.epochs, desc='Epoch', position=0)
pbar_status = tqdm(total=0, position=1, bar_format='{desc}')

current_val_loss = float('nan')
running_loss     = 0.0

for epoch in range(args.epochs):
    
    pbar_inner = tqdm(total    = np.ceil(dataset_train.__len__() / args.batchsize).astype(int), 
                      desc     = f'[train_loss: {running_loss}, val_loss: {current_val_loss}]', 
                      position = 1)
    
    running_loss                   = 0.0
    current_batch_num              = 0
    current_batch_size             = 0
    current_batch_audio_embeddings = []
    current_batch_text_embeddings  = []
   
    model.train()

    for local_audio, local_text in dataloader_train:
        
        local_audio = local_audio.to(device1)
        local_text  = local_text.to(device1)
    
        # zero the parameter gradients
        optimizer.zero_grad()

        outputs = model(local_audio)
                    
        loss    = criterion.forward(outputs, local_text)
            
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        pbar_status.set_description_str(f'[train_loss: {loss.item()}, val_loss: {current_val_loss}]')
            
        pbar_inner.update(1)
            
        current_batch_num += 1
        current_batch_size = 0
            
            
    model.eval()

    for local_audio, local_text in dataloader_val:
        
        local_audio = local_audio.to(device1)
        local_text  = local_text.to(device1)
    
        # zero the parameter gradients
        optimizer.zero_grad()

        outputs = model(local_audio)
                    
        loss    = criterion.forward(outputs, local_text)
        
        current_val_loss = loss.item()

    pbar_outer.update(1)
    #break

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 0.0, val_loss: nan]', max=248.0, style=Progr…

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packag

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 252.88746333122253, val_loss: 1.002861976623…

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packag

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 252.89887011051178, val_loss: 1.002861976623…

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packag

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 252.89372265338898, val_loss: 1.002861976623…

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packag

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 252.88127291202545, val_loss: 1.002861976623…

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataload

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 252.89334642887115, val_loss: 1.002861976623…

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
    self._shutdown_workers()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdo

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 252.90145683288574, val_loss: 1.002861976623…

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 941, in _shutdown_workers
    w.join()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packag

HBox(children=(FloatProgress(value=0.0, description='[train_loss: 252.8718148469925, val_loss: 1.0028619766235…

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
Traceback (most recent call last):
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f88733c6898>>
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
Traceback (most recent call last):
    self._shutdown_workers()
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 961, in __del__
Traceback (most recent call last):
  File "/home/schindlera/.conda/envs/base36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 941, in _

In [32]:
loss.item()

0.1710646003484726

In [33]:
torch.cuda.empty_cache()

In [34]:
outputs.shape

torch.Size([80, 128])

In [35]:
        
# ===============================================================================
# # Train Model
# ===============================================================================

logger.info("* Prepare Evaluation")

# ===============================================================================
# ### Build and Train Model
# ===============================================================================

# define the model
model = model_def.get_model(args.finaldim)
logger.info("* Model created")

# define the optimizer
opt = Adam(lr=args.learnrate)
logger.info("* Optimizer: %s" % (str(opt)))


#from keras_radam import RAdam

#opt = RAdam(total_steps=10000, warmup_proportion=0.1, learning_rate=1e-4, min_lr=1e-5)

# compile the model
model.compile(loss      = triplet_loss,
              optimizer = opt)
logger.info("* Model compiled")
                    
# ===============================================================================
# Callbacks
# ===============================================================================

cb_modelcheckpoint = ModelCheckpoint(args.modeldir + "/model.h5", 
                                    monitor           = 'val_loss', 
                                    verbose           = 1, 
                                    save_best_only    = True, 
                                    save_weights_only = True, 
                                    mode              = 'auto')
    
cb_tensorboard =  TensorBoard(log_dir=args.modeldir, 
                                histogram_freq=0, 
                                write_graph=False, 
                                write_grads=False, 
                                write_images=False, 
                                embeddings_freq=0, 
                                embeddings_layer_names=None, 
                                embeddings_metadata=None, 
                                embeddings_data=None, 
                                update_freq='epoch')

cb_csv_logger = CSVLogger(args.modeldir + "/model_training_log.csv", separator=';', append=False)

cb_logger = LoggerCallback()
    
callbacks = [cb_tensorboard, cb_modelcheckpoint, cb_csv_logger, cb_logger]
logger.info("* Callbacks created")


logger.info("* Model Training: starting")
# first test - only to debug code
history = model.fit(audio_train,
                    text_train, 
                    batch_size       = args.batchsize, 
                    verbose          = 1, 
                    epochs           = args.epochs,
                    validation_data  = (audio_val, text_val),
                    callbacks        = callbacks,
                    shuffle          = True);

logger.info("* Model Training: completed")

model_path = args.modeldir + "/model.h5"
logger.info("* Loading best model: %s" % model_path)
model.load_weights(model_path)

logger.info("* Inference: Embedding audio data into learned representation")
embeddings = model.predict(data_audio, batch_size=100, verbose=1)

logger.info("* storing embeddings")
np.savez(args.modeldir + "/final_embeddings.npz", data=embeddings, track_ids=lookup_audio.index.values)

logger.info("* Experiment finished!")

2020-04-22 11:02:52 - experiment.py - INFO - * Prepare Evaluation


NameError: name 'model_def' is not defined