## Siamese

In [10]:
import os
os.chdir('../phishpedia/')
from siamese.model_resnetv2 import *
import torch.utils.data as data
from PIL import Image, ImageOps
import pickle
import numpy as np
from siamese.utils import *
os.environ["CUDA_VISIBLE_DEVICES "]="0,1"

- Create croppped logo for sampled 1000 phishing

In [9]:
data_folder = 'benchmark/Sample_phish1000/'
annot_file = 'benchmark/phish1000_coord.txt'
os.makedirs('benchmark/Sample_phish1000_crop', exist_ok=True)

for brand in os.listdir(data_folder):
    img = Image.open(data_folder + brand + '/shot.png')

    ## get ground-truth 
    with open(annot_file, 'r') as annotation_file:
        for num, line in enumerate(annotation_file):
            annotation = line.strip().split(',')
            site = ','.join(annotation[:-4])
            if site == brand:
                bbox_data_gt = np.array(list(annotation[-4:]))
                if len(bbox_data_gt) != 0:
                    bboxes_gt = bbox_data_gt[:4]
                    x_min_gt, y_min_gt, x_max_gt, y_max_gt = list(map(float, bboxes_gt))
                    gt_bbox = [x_min_gt, y_min_gt, x_max_gt, y_max_gt]
                    break   
#     print(gt_bbox)
    cropped = img.crop((x_min_gt, y_min_gt, x_max_gt, y_max_gt))
    cropped.save(os.path.join('benchmark/Sample_phish1000_crop', brand+'.png'))
    del gt_bbox


- Load data

In [121]:
## dataloader 
class GetLoader(data.Dataset):
    def __init__(self, data_root, label_dict, transform=None, grayscale=False):
        self.transform = transform
        self.data_root = data_root
        self.grayscale = grayscale

        with open(label_dict, 'rb') as handle:
            self.label_dict = pickle.load(handle)

        self.classes = list(self.label_dict.keys())

        self.img_paths = []
        self.labels = []

        for data in os.listdir(data_root):
            label = data.split('.png')[0].split('+')[0]
            
            if brand_converter(label) == 'Microsoft':
                self.labels.append(label)
            elif brand_converter(label) == 'DHL Airways':
                self.labels.append('DHL')
            elif brand_converter(label) == 'DGI French Tax Authority':
                self.labels.append('DGI (French Tax Authority)')
            elif label == 'Instagram':
                continue
            else:
                self.labels.append(brand_converter(label))
                
            self.img_paths.append(data)
                
        self.n_data = len(self.img_paths)

    def __getitem__(self, item):

     #   print(self.data_root)
        img_path, label= self.img_paths[item], self.labels[item]
        img_path_full = os.path.join(self.data_root, img_path)
        if self.grayscale:
            img = Image.open(img_path_full).convert('L').convert('RGB')
        else:
            img = Image.open(img_path_full).convert('RGB')

        img = ImageOps.expand(img, (
            (max(img.size) - img.size[0]) // 2, (max(img.size) - img.size[1]) // 2,
            (max(img.size) - img.size[0]) // 2, (max(img.size) - img.size[1]) // 2), fill=(255, 255, 255))
        img = img.resize((128, 128))

        # label = np.array(label,dtype='float32')
        label = self.label_dict[label]
        if self.transform is not None:
            img = self.transform(img)

        return img, label

    def __len__(self):
        return self.n_data

In [122]:
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

img_transforms = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(mean=mean, std=std),
    ])

valid_set = GetLoader(data_root='/home/l/liny/ruofan/phishpedia/benchmark/Sample_phish1000_crop/', 
                    label_dict='/home/l/liny/ruofan/phishpedia/benchmark/targetlist_labeldict.pkl',
                     transform=img_transforms)

# train_set = GetLoader(data_root='/home/l/liny/ruofan/phishpedia/benchmark/Sample_phish1000_crop/', 
#                     label_dict='/home/l/liny/ruofan/phishpedia/benchmark/targetlist_labeldict.pkl',
#                      transform=img_transforms)

In [123]:
with open('/home/l/liny/ruofan/phishpedia/benchmark/targetlist_labeldict.pkl', 'rb') as handle:
    label_dict = pickle.load(handle)

In [124]:
# train_loader = torch.utils.data.DataLoader(
#   train_set, batch_size=64, shuffle=False, pin_memory=True, drop_last=False)

valid_loader = torch.utils.data.DataLoader(
  valid_set, batch_size=1, shuffle=False, pin_memory=True, drop_last=False)


In [125]:
len(valid_loader)

993

- Accuracy function

In [26]:
def compute_acc(dataloader, model, device):
    correct = 0
    total = 0

    for b, (x, y) in enumerate(dataloader):
        with torch.no_grad():
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            logits = model(x)
            pred_cls = torch.argmax(logits, dim=1)

            correct += torch.sum(torch.eq(pred_cls, y)).item()
            total += y.shape[0]
            
    print('Accuracy after changing relu function: {:.2f}'.format(correct/total))    
    return correct/total

- Load model (original)

In [135]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Initialize model
model = KNOWN_MODELS["BiT-M-R50x1"](head_size=180, zero_head=True)
# Load weights
model.load_state_dict(torch.load('/home/l/liny/ruofan/phishpedia/siamese/checkpoints/resnetv2_rgb.pth', map_location=device))
model.to(device)
model.eval()

ResNetV2(
  (root): Sequential(
    (conv): StdConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (pad): ConstantPad2d(padding=(1, 1, 1, 1), value=0)
    (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (body): Sequential(
    (block1): Sequential(
      (unit01): PreActBottleneck(
        (gn1): GroupNorm(32, 64, eps=1e-05, affine=True)
        (conv1): StdConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (gn2): GroupNorm(32, 64, eps=1e-05, affine=True)
        (conv2): StdConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (gn3): GroupNorm(32, 64, eps=1e-05, affine=True)
        (conv3): StdConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (relu): ReLU(inplace=True)
        (downsample): StdConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (unit02): PreActBottleneck(
        (gn1): GroupNorm(32, 256, eps=1e-05, aff

In [17]:
# compute_acc(train_loader, model, device)

In [126]:
compute_acc(valid_loader, model, device)

Accuracy after changing relu function: 0.93


0.9345417925478349

- Load model (change activation function)

In [127]:
import torch
import torch.nn.functional as F
from torch import nn

In [128]:
class QuantizeRelu(nn.Module):
    def __init__(self, step_size = 0.01):
        super().__init__()
        self.step_size = step_size

    def forward(self, x):
        mask = torch.ge(x, 0).bool() # mask for positive values
        quantize = torch.ones_like(x) * self.step_size
        out = torch.mul(torch.floor(torch.div(x, quantize)), self.step_size) # quantize by step_size
        out = torch.mul(out, mask) # zero-out negative values
        out = torch.abs(out) # remove sign
        return out

In [129]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Initialize model
model = KNOWN_MODELS["BiT-M-R50x1"](head_size=180, zero_head=True)
# Load weights
model.load_state_dict(torch.load('/home/l/liny/ruofan/phishpedia/siamese/checkpoints/resnetv2_rgb.pth', map_location=device))


model.body.block4.unit01.relu = QuantizeRelu()
model.body.block4.unit02.relu = QuantizeRelu()
model.body.block4.unit03.relu = QuantizeRelu()
model.to(device)
model.eval()

ResNetV2(
  (root): Sequential(
    (conv): StdConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (pad): ConstantPad2d(padding=(1, 1, 1, 1), value=0)
    (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (body): Sequential(
    (block1): Sequential(
      (unit01): PreActBottleneck(
        (gn1): GroupNorm(32, 64, eps=1e-05, affine=True)
        (conv1): StdConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (gn2): GroupNorm(32, 64, eps=1e-05, affine=True)
        (conv2): StdConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (gn3): GroupNorm(32, 64, eps=1e-05, affine=True)
        (conv3): StdConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (relu): ReLU(inplace=True)
        (downsample): StdConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (unit02): PreActBottleneck(
        (gn1): GroupNorm(32, 256, eps=1e-05, aff

In [130]:
# compute_acc(train_loader, model, device)

In [131]:
compute_acc(valid_loader, model, device)

Accuracy after changing relu function: 0.93


0.9345417925478349

- Attack

In [None]:
os.chdir('/home/l/liny/ruofan/Network_Signal/')
from attack.Attack import *

criterion = nn.CrossEntropyLoss()
check = adversarial_attack(method='jsma', model=model, dataloader=valid_loader, 
                           device=device, num_classes=180, save_data=True)
acc, _ = check.batch_attack()

0
Test Accuracy = 1.0
1
Test Accuracy = 1.0
2
Test Accuracy = 1.0
3
Test Accuracy = 1.0
4
5
Test Accuracy = 0.8333333333333334
6
Test Accuracy = 0.8571428571428571
7
Test Accuracy = 0.875
tensor([[27]], device='cuda:0')
tensor([[15]], device='cuda:0')
8
Test Accuracy = 0.7777777777777778
9
Test Accuracy = 0.8
10
Test Accuracy = 0.8181818181818182
11
12
Test Accuracy = 0.7692307692307693
13
14
Test Accuracy = 0.7333333333333333
15
Test Accuracy = 0.75
16
Test Accuracy = 0.7647058823529411
17
Test Accuracy = 0.7777777777777778
18
Test Accuracy = 0.7894736842105263
tensor([[21]], device='cuda:0')
tensor([[15]], device='cuda:0')
19
Test Accuracy = 0.75
20
Test Accuracy = 0.7619047619047619
21
Test Accuracy = 0.7727272727272727
22
Test Accuracy = 0.782608695652174
23
Test Accuracy = 0.7916666666666666
24
Test Accuracy = 0.8
25
Test Accuracy = 0.8076923076923077
26
Test Accuracy = 0.8148148148148148
27
Test Accuracy = 0.8214285714285714
28
Test Accuracy = 0.8275862068965517
29
Test Accuracy 

In [None]:
acc