In [4]:
#prepare data 
#%load_ext autoreload
%autoreload 2
import torch
from datasets import TripletAudio


K, MAX_CLOSE_NEG, MAX_FAR_NEG, P_STRONG_NEG = 5, 15, 15, 0.9
BATCH_SIZE = 128

triplet_train_dataset = TripletAudio(True, K, MAX_CLOSE_NEG, MAX_FAR_NEG, P_STRONG_NEG)
triplet_test_dataset = TripletAudio(False, K, MAX_CLOSE_NEG, MAX_FAR_NEG, P_STRONG_NEG)
triplet_train_loader = torch.utils.data.DataLoader(triplet_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
triplet_test_loader = torch.utils.data.DataLoader(triplet_test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [69]:
# Set up the network and training parameters
from networks import AnchorNet, EmbeddingNet, TripletNet
from losses import TripletLoss
import torch.optim as optim
from recall import Recall
from torch.optim import lr_scheduler
from recall import Recall
from trainer import fit

INPUT_D, OUTPUT_D = 192, 128
MARGIN, LEARNING_RATE, N_EPOCHS, LOG_INT, N_RECALL_CAND = 0.5, 1e-3, 10, 100, 25

#define model
anchor_net = AnchorNet(triplet_train_dataset.get_dataset(), INPUT_D, OUTPUT_D)
embedding_net = EmbeddingNet(anchor_net)
model = TripletNet(embedding_net)

loss_fn = TripletLoss(MARGIN)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1, last_epoch=-1)

recall = Recall(N_RECALL_CAND, K)

initialising model biases
done


In [70]:
#run the model
train_loss, val_loss = fit(triplet_train_loader, triplet_test_loader, model, loss_fn, optimizer, scheduler, N_EPOCHS, {}, LOG_INT)

Epoch: 1/10. Train set: Average loss: 14.9202
Epoch: 1/10. Validation set: Average loss: 17.9979
Epoch: 2/10. Train set: Average loss: 7.9613
Epoch: 2/10. Validation set: Average loss: 9.2268
Epoch: 3/10. Train set: Average loss: 4.6526
Epoch: 3/10. Validation set: Average loss: 5.5178
Epoch: 4/10. Train set: Average loss: 3.2106
Epoch: 4/10. Validation set: Average loss: 3.7023
Epoch: 5/10. Train set: Average loss: 2.3511
Epoch: 5/10. Validation set: Average loss: 2.8918
Epoch: 6/10. Train set: Average loss: 1.9684
Epoch: 6/10. Validation set: Average loss: 2.3934
Epoch: 7/10. Train set: Average loss: 1.6270
Epoch: 7/10. Validation set: Average loss: 2.0061
Epoch: 8/10. Train set: Average loss: 1.4176
Epoch: 8/10. Validation set: Average loss: 1.7758
Epoch: 9/10. Train set: Average loss: 1.2998
Epoch: 9/10. Validation set: Average loss: 1.5654
Epoch: 10/10. Train set: Average loss: 1.1474
Epoch: 10/10. Validation set: Average loss: 1.3609


In [83]:
%%time
# perform recall on random batch
outputs = []
for d in (triplet_train_dataset, triplet_test_dataset):
    batch_indicies = np.random.choice(list(range(0, d.get_dataset().shape[0])), N_RECALL_SAMPLES, False)
    queries = d.get_dataset()[batch_indicies]
    true_knns = d.get_knn().iloc[batch_indicies, :]
    outputs.append(recall.calculate(d.get_dataset(), model.embedding_net, queries, true_knns, False))
train_recall, val_recall = np.mean(outputs[0]), np.mean(outputs[1])

In [72]:
with SummaryWriter() as w:
    w.add_hparams(
        {'LR': LEARNING_RATE, 'BSIZE': BATCH_SIZE, 'N_RECALL_S': N_RECALL_SAMPLES, 'LEARNING RATE': LEARNING_RATE, 'STEP_SIZE': STEP_SIZE},
        {'TRAIN_L': train_loss, 'VAL_L': val_loss, 'TRAIN_RECALL': train_recall, 'VAL_RECALL': val_recall})

In [3]:
%autoreload 2
import torch
from datasets import TripletAudio
from networks import AnchorNet, EmbeddingNet, TripletNet
from losses import TripletLoss
import torch.optim as optim
from recall import Recall
from torch.optim import lr_scheduler
from trainer import fit
from recall import Recall
import itertools

In [4]:
#define hyperparams
K, MAX_CLOSE_NEG, MAX_FAR_NEG, P_STRONG_NEG = 5, 5, 0, 1
BATCH_SIZE = 128
INPUT_D, OUTPUT_D = 192, 128
MARGIN, LEARNING_RATE, N_EPOCHS, LOG_INT, N_RECALL_CAND = 0.5, 1e-3, 25, 100, 25
N_RECALL_SAMPLES = 1000
# hyperparams = [MAX_CLOSE_NEGS, MAX_FAR_NEGS]

In [5]:
#computes the recall on datasets using global variables such as model and recall
def measure_recall(datasets):
    outputs = []
    for d in datasets:
        batch_indicies = np.random.choice(list(range(0, d.get_dataset().shape[0])), N_RECALL_SAMPLES, False)
        queries = d.get_dataset()[batch_indicies]
        true_knns = d.get_knn().iloc[batch_indicies, :]
        outputs.append(recall.calculate(d.get_dataset(), model.embedding_net, queries, true_knns, False))
    return [np.mean(x) for x in outputs]

In [6]:
for i in range(0,1): #itertools.product(*hyperparams):
    #setup datasets
    triplet_train_dataset = TripletAudio(True, K, MAX_CLOSE_NEG, MAX_FAR_NEG, P_STRONG_NEG)
    triplet_test_dataset = TripletAudio(False, K, MAX_CLOSE_NEG, MAX_FAR_NEG, P_STRONG_NEG)
    triplet_train_loader = torch.utils.data.DataLoader(triplet_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    triplet_test_loader = torch.utils.data.DataLoader(triplet_test_dataset, batch_size=BATCH_SIZE, shuffle=False)
    #define model 
    anchor_net = AnchorNet(triplet_train_dataset.get_dataset(), INPUT_D, OUTPUT_D)
    embedding_net = EmbeddingNet(anchor_net)
    model = TripletNet(embedding_net)
    loss_fn = TripletLoss(MARGIN)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1, last_epoch=-1)
    recall = Recall(N_RECALL_CAND, K)
    #run the model
    train_loss, val_loss = fit(triplet_train_loader, triplet_test_loader, model, loss_fn, optimizer, scheduler, N_EPOCHS, {}, LOG_INT)
    #measure recall
    train_recall, val_recall = measure_recall([triplet_train_dataset, triplet_test_dataset])
    #write to tensorboard
    with SummaryWriter() as w:
        w.add_hparams(
            {'LR': LEARNING_RATE, 'BSIZE': BATCH_SIZE, 'N_RECALL_S': N_RECALL_SAMPLES, 'N_RECALL_CAND': N_RECALL_CAND,
                 'CLOSE_NEG': MAX_CLOSE_NEG, 'FAR_NEG': MAX_FAR_NEG, 'P_STRONG_NEG': P_STRONG_NEG, 'OUTPUT_D': OUTPUT_D},
            {'TRAIN_L': train_loss, 'VAL_L': val_loss, 'TRAIN_RECALL': train_recall, 'VAL_RECALL': val_recall})

initialising model biases
done
index 38857, pos 44064, neg 14605
Epoch: 1/25. Train set: Average loss: 19.4634
Epoch: 1/25. Validation set: Average loss: 17.9263
index 38857, pos 44064, neg 37043
Epoch: 2/25. Train set: Average loss: 14.3974
Epoch: 2/25. Validation set: Average loss: 12.2914
index 38857, pos 44064, neg 37043
Epoch: 3/25. Train set: Average loss: 9.6451
Epoch: 3/25. Validation set: Average loss: 7.8716
index 38857, pos 44064, neg 37043
Epoch: 4/25. Train set: Average loss: 6.3651
Epoch: 4/25. Validation set: Average loss: 5.5207
index 38857, pos 44064, neg 20483
Epoch: 5/25. Train set: Average loss: 4.7106
Epoch: 5/25. Validation set: Average loss: 4.1835
index 38857, pos 44064, neg 33278
Epoch: 6/25. Train set: Average loss: 3.7087
Epoch: 6/25. Validation set: Average loss: 3.2636
index 38857, pos 44064, neg 29456
Epoch: 7/25. Train set: Average loss: 2.9282
Epoch: 7/25. Validation set: Average loss: 2.6450
index 38857, pos 44064, neg 29456
Epoch: 8/25. Train set: Aver

NameError: name 'SummaryWriter' is not defined

### Online Selection

In [None]:
#prepare data 
# %load_ext autoreload
%autoreload 2
import torch
from datasets import AudioTrainDataset, AudioTestDataset
from datasets import BalancedBatchSampler

K = 5
train_dataset = AudioTrainDataset(K)
test_dataset = AudioTestDataset(K)

train_batch_sampler = BalancedBatchSampler(train_dataset)
test_batch_sampler = BalancedBatchSampler(test_dataset)

online_train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_batch_sampler)
online_test_loader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_batch_sampler)

In [None]:
# Set up the network and training parameters
from networks import EmbeddingNet, AnchorNet
from losses import OnlineTripletLoss
from utils import SemihardNegativeTripletSelector, HardestNegativeTripletSelector
from metrics import AverageNonzeroTripletsMetric
import torch.optim as optim
from torch.optim import lr_scheduler

INPUT_D, OUTPUT_D = 192, 128
MARGIN, LEARNING_RATE, N_EPOCHS, LOG_INT = 0.5, 1e-3, 5, 50

#define model
anchor_net = AnchorNet(train_dataset.data, INPUT_D, OUTPUT_D)
model = EmbeddingNet(anchor_net)
loss_fn = OnlineTripletLoss(MARGIN, SemihardNegativeTripletSelector(MARGIN, train_dataset.KNN))
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)

In [None]:
#run model
from trainer import fit
fit(online_train_loader, online_test_loader, model, loss_fn, optimizer, scheduler, N_EPOCHS, {}, LOG_INT, metrics=[AverageNonzeroTripletsMetric()])