In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
print(gpu_info)

Thu Jul  1 10:37:20 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.80       Driver Version: 460.80       CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  TITAN V             Off  | 00000000:3B:00.0 Off |                  N/A |
| 28%   39C    P2    51W / 250W |   6319MiB / 12066MiB |     25%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  TITAN V             Off  | 00000000:5E:00.0 Off |                  N/A |
| 28%   40C    P2    43W / 250W |   2975MiB / 12066MiB |     22%      Default |
|       

In [2]:
import os
os.chdir('/home/l/liny/ruofan/pytorch-metric-learning/src')
os.environ["CUDA_VISIBLE_DEVICES"]="1, 0"

In [3]:
from pytorch_metric_learning import losses, miners, distances, reducers, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator
from torchvision import datasets
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np

from pytorch_metric_learning.models import bninception
from pytorch_metric_learning import samplers

In [4]:
device = torch.device("cuda")

train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

test_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

batch_size = 64
num_epochs = 200
result_dir = './log'
exp_name = 'targetlist'
os.makedirs(result_dir, exist_ok=True)

In [6]:
train_data = GetLoader(data_root='/home/l/liny/ruofan/lightly/datasets/targetlist/train/', 
                           data_list='/home/l/liny/ruofan/PhishIntention/src/siamese_retrain/train_targets.txt', 
                           label_dict='/home/l/liny/ruofan/PhishIntention/src/siamese_retrain/target_dict.json',
                           transform=train_transform)

test_data = GetLoader(data_root='/home/l/liny/ruofan/lightly/datasets/targetlist/test/', 
                      data_list='/home/l/liny/ruofan/PhishIntention/src/siamese_retrain/test_targets.txt', 
                      label_dict='/home/l/liny/ruofan/PhishIntention/src/siamese_retrain/target_dict.json',
                      transform=test_transform)

In [7]:
sampler = samplers.MPerClassSampler(train_data.labels, 
                                    m=5, 
                                    length_before_new_iter=100000)

In [8]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=sampler)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [9]:
# for data, target in train_loader:
#     print(target)

In [14]:
model = bninception(dim=512, pretrained=None)
model = torch.nn.DataParallel(model).to(device)

In [15]:
### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ### 
def train(model, loss_func, mining_func, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        embeddings = model(data)
        indices_tuple = mining_func(embeddings, labels)
        loss = loss_func(embeddings, labels, indices_tuple)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print("Epoch {} Iteration {}: Loss = {}, Number of mined triplets = {}".format(epoch, batch_idx, loss, mining_func.num_triplets))

### convenient function from pytorch-metric-learning ###
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester()
    return tester.get_all_embeddings(dataset, model)

### compute accuracy using AccuracyCalculator from pytorch-metric-learning ###
def test(train_set, test_set, model, accuracy_calculator):
    train_embeddings, train_labels = get_all_embeddings(train_set, model)
    test_embeddings, test_labels = get_all_embeddings(test_set, model)
    print("Computing accuracy")
    accuracies = accuracy_calculator.get_accuracy(test_embeddings, 
                                                  train_embeddings,
                                                  test_labels,
                                                  train_labels,
                                                  False)
    print("Test set accuracy (Precision@1) = {}".format(accuracies["precision_at_1"]))
    return accuracies["precision_at_1"]

In [16]:
### pytorch-metric-learning stuff ###
distance = distances.CosineSimilarity()
reducer = reducers.MeanReducer()
loss_func = losses.SoftTripleLoss(num_classes=277, 
                                  embedding_size=512, 
                                  centers_per_class=5, 
                                  la=20, 
                                  gamma=0.1, 
                                  margin=0.01).to(device)

loss_optimizer = torch.optim.Adam([{"params": model.parameters(), "lr": 1e-4},
                                   {"params": loss_func.parameters(), "lr": 1e-2}])

mining_func = miners.TripletMarginMiner(margin = 0.2, distance = distance, type_of_triplets = "semi-hard")
accuracy_calculator = AccuracyCalculator(include = ("precision_at_1",), k = 1)

In [None]:
### pytorch-metric-learning stuff ###

for epoch in range(1, num_epochs+1):
    train(model, loss_func, mining_func, device, train_loader, loss_optimizer, epoch)
    knn_acc = test(train_data, test_data, model, accuracy_calculator)
    if epoch % 20 == 0 or epoch == 1:
        torch.save(model.state_dict(), 
                   os.path.join(result_dir, '{}_epoch{}_knnAcc{:.4f}.pt'.format(exp_name, epoch, knn_acc)))


Epoch 1 Iteration 0: Loss = 5.140124320983887, Number of mined triplets = 13326
Epoch 1 Iteration 100: Loss = 3.8162100315093994, Number of mined triplets = 7420
Epoch 1 Iteration 200: Loss = 2.860548496246338, Number of mined triplets = 7212
Epoch 1 Iteration 300: Loss = 2.2279305458068848, Number of mined triplets = 6073
Epoch 1 Iteration 400: Loss = 1.239741563796997, Number of mined triplets = 4349
Epoch 1 Iteration 500: Loss = 1.0321711301803589, Number of mined triplets = 3183
Epoch 1 Iteration 600: Loss = 1.0425498485565186, Number of mined triplets = 4776
Epoch 1 Iteration 700: Loss = 0.6735363602638245, Number of mined triplets = 3626
Epoch 1 Iteration 800: Loss = 0.4943087697029114, Number of mined triplets = 4728
Epoch 1 Iteration 900: Loss = 0.14444376528263092, Number of mined triplets = 4051
Epoch 1 Iteration 1000: Loss = 0.13914911448955536, Number of mined triplets = 6326
Epoch 1 Iteration 1100: Loss = 0.08922342956066132, Number of mined triplets = 5048
Epoch 1 Iterati

100%|██████████| 75/75 [00:10<00:00,  7.21it/s]
100%|██████████| 19/19 [00:08<00:00,  2.33it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.5810810951516032
Epoch 2 Iteration 0: Loss = 0.032736364752054214, Number of mined triplets = 6161
Epoch 2 Iteration 100: Loss = 0.02631489746272564, Number of mined triplets = 4329
Epoch 2 Iteration 200: Loss = 0.033279165625572205, Number of mined triplets = 5718
Epoch 2 Iteration 300: Loss = 0.031628288328647614, Number of mined triplets = 4197
Epoch 2 Iteration 400: Loss = 0.010961171239614487, Number of mined triplets = 5924
Epoch 2 Iteration 500: Loss = 0.012311478145420551, Number of mined triplets = 5192
Epoch 2 Iteration 600: Loss = 0.0054053086787462234, Number of mined triplets = 4318
Epoch 2 Iteration 700: Loss = 0.004264707677066326, Number of mined triplets = 5287
Epoch 2 Iteration 800: Loss = 0.00423540361225605, Number of mined triplets = 4576
Epoch 2 Iteration 900: Loss = 0.0050986148416996, Number of mined triplets = 7239
Epoch 2 Iteration 1000: Loss = 0.01993974670767784, Number of mined triplets = 6851
