In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch._C import Size
from torch.nn.modules.module import T
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os
import copy


SEED = 1234

import random
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

data_dir = 'TestSet_cropped'

batch_size = 512
epochs = 8
workers = 0 if os.name == 'nt' else 8 

device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

dataset = datasets.ImageFolder(data_dir)


loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization # [0,255] -> [-1,1]
])

dataset = datasets.ImageFolder(data_dir, transform=trans)
img_inds = np.arange(len(dataset))

test_loader = DataLoader( 
    dataset,
    num_workers=workers,
    batch_size=1,
)
resnet = InceptionResnetV1(
    classify=True,
#    pretrained='vggface2',
    pretrained='casia-webface', #'vggface2'
    num_classes=len(dataset.class_to_idx)
).to(device)

resnet.load_state_dict(torch.load('./Rushi_GeorgiaTech-TestDataset_StateDict.pth'))

loss_fn = torch.nn.CrossEntropyLoss()
metrics = { 
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

import cv2
import mediapipe as mp
import numpy as np
from math import hypot
from imutils import paths
import os


# Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh() 

mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder(data_dir)
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

def get_landmarks(location, frame_patch_width, frame_patch_height, global_patch_width, facial_landmarks):

 # Nose Co-ordinates
    if location == 'Nose':
        top = (facial_landmarks.landmark[8].x * frame_patch_width, facial_landmarks.landmark[8].y * frame_patch_height)
        center = (facial_landmarks.landmark[195].x * frame_patch_width, facial_landmarks.landmark[195].y * frame_patch_height)

        left_point = (facial_landmarks.landmark[190].x * frame_patch_width, facial_landmarks.landmark[190].y * frame_patch_height)
        right_point = (facial_landmarks.landmark[413].x * frame_patch_width, facial_landmarks.landmark[413].y * frame_patch_height)

        # patch_width = int(hypot(left_point[0] - right_point[0],
        #                     left_point[1] - right_point[1]))

        patch_width = global_patch_width
        patch_height = int(patch_width)                      

        top_left = (int(center[0] - patch_width / 2),
                        int(center[1] - patch_height /2))

        bottom_right = (int(center[0] + patch_width / 2),
                            int(center[1] + patch_height / 2))


    # Chin Coordinates
    if location == 'Chin':
        center = (facial_landmarks.landmark[199].x * frame_patch_width, facial_landmarks.landmark[199].y * frame_patch_height)

        left_point = (facial_landmarks.landmark[140].x * frame_patch_width, facial_landmarks.landmark[140].y * frame_patch_height)
        right_point = (facial_landmarks.landmark[428].x * frame_patch_width, facial_landmarks.landmark[428].y * frame_patch_height)

        # patch_width = int(hypot(left_point[0] - right_point[0],
        #                     left_point[1] - right_point[1]))

        patch_width = global_patch_width
        patch_height = int(patch_width)                      

        top_left = (int(center[0] - patch_width / 2),
                        int(center[1] - patch_height /2))

        bottom_right = (int(center[0] + patch_width / 2),
                            int(center[1] + patch_height / 2))

    # Cheek Coordinates
    if location == 'Cheek':
        center = (facial_landmarks.landmark[205].x * frame_patch_width, facial_landmarks.landmark[205].y * frame_patch_height)

        # patch_width = int(hypot(left_point[0] - right_point[0],
        #                     left_point[1] - right_point[1]))

        patch_width = global_patch_width
        patch_height = int(patch_width)                      

        top_left = (int(center[0] - patch_width / 2),
                        int(center[1] - patch_height /2))


    if location == 'Forehead':
        center = (facial_landmarks.landmark[151].x * frame_patch_width, facial_landmarks.landmark[151].y * frame_patch_height)

        # patch_width = int(hypot(left_point[0] - right_point[0],
        #                     left_point[1] - right_point[1]))

        patch_width = global_patch_width
        patch_height = int(patch_width)                      

        top_left = (int(center[0] - patch_width / 2),
                        int(center[1] - patch_height /2))



            
    return patch_width, patch_height, top_left


def get_multi_landmarks(location, frame_width, frame_height, target_patch_size_w, target_patch_size_h, facial_landmarks):

    if location == 'N-F-C':  # NOSE-Forehead-Cheek
        ## NOSE

        nose_center = (facial_landmarks.landmark[195].x * frame_width, facial_landmarks.landmark[195].y * frame_height)

        nose_patch_width = target_patch_size_w
        nose_patch_height = target_patch_size_h                      

        nose_top_left = (int(nose_center[0] - nose_patch_width / 2),
                        int(nose_center[1] - nose_patch_height /2))            

        ## Forehead 
        forehead_center = (facial_landmarks.landmark[151].x * frame_width, facial_landmarks.landmark[151].y * frame_height)

        forehead_patch_width = target_patch_size_w
        forehead_patch_height = target_patch_size_h                      

        forehead_top_left = (int(forehead_center[0] - forehead_patch_width / 2),
                        int(forehead_center[1] - forehead_patch_height /2))

        ## CHEEK 
        cheek_center = (facial_landmarks.landmark[205].x * frame_width, facial_landmarks.landmark[205].y * frame_height)

        cheek_patch_width = target_patch_size_w
        cheek_patch_height = target_patch_size_h                      

        cheek_top_left = (int(cheek_center[0] - cheek_patch_width / 2),
                        int(cheek_center[1] - cheek_patch_height /2))

    return nose_patch_width, nose_patch_height, nose_top_left, forehead_patch_width, forehead_patch_height, forehead_top_left, cheek_patch_width, cheek_patch_height, cheek_top_left


def fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [-1,1] range
    perturbed_image = torch.clamp(perturbed_image, -1, 1)
    # Return the perturbed image   
    noise_matrix = epsilon*sign_data_grad
    #torch.save(noise_matrix, "NoiseMatrix_{0}.pth".format(eps_value))

    return perturbed_image, noise_matrix

loop_005 = []
loop_01 = []
loop_02 = []
loop_04 = []
loop_08 = []
loop_1 = []

def pass_epoch(
    model, eps, patch, LOCATION, SIZE, loss_fn, loader,
    device='cuda'
):
    
    mode = 'Train' if model.training else 'Valid'
    loss = 0
    metrics = {}
    
    epsilon_value = eps
    location = LOCATION
    global_patch_width = SIZE
    target_patch_size_w, target_patch_size_h = SIZE, SIZE

    correct = 0
    num_fooled = 0
    num_initial_error = 0
    
    adv_examples = []
    
    bd_image = cv2.imread("BD.jpg")
    bd_image = cv2.cvtColor(bd_image, cv2.COLOR_BGR2RGB)

    from torchvision import transforms
    trans = transforms.Compose([transforms.ToTensor()])
    
    for i_batch, (X, y) in enumerate(loader):
        X = X.to(device)
        y = y.to(device)
        
        loop_count = 0
        
        X.requires_grad = True
        
        y_pred = model(X)
        loss_batch = loss_fn(y_pred, y)

        init_pred = y_pred.max(1, keepdim=True)[1] # get the index of the max log-probability
        

        #If the initial prediction is wrong, dont bother attacking, just move on
        if init_pred.item() != y.item():
            num_initial_error += 1
            print("INSIDE")
            print(init_pred, y)
            continue
            
        model.zero_grad()
        loss_batch.backward()
        
        data_grad = X.grad.data 

        perturbed_data, noise_matrix_orig = fgsm_attack(X, epsilon_value, data_grad)
        
        ####### FRAME OPERATIONS NOW ###########
        
        frame = X[0].detach().cpu().numpy()
        frame = np.transpose(frame, (1,2,0))
        frame = (frame*128)+127.5
        frame = frame.astype(np.uint8)
    

        Orig_image = copy.deepcopy(frame)
        Orig_image = cv2.cvtColor(Orig_image, cv2.COLOR_RGB2BGR)

        frame_height, frame_width, _ = frame.shape

        result = face_mesh.process(frame)    
        
        np_img = perturbed_data[0].detach().cpu().numpy()
        np_img = np.transpose(np_img, (1,2,0))
        np_img = (np_img*128)+127.5
        np_img = np_img.astype(np.uint8)
    
    
        for facial_landmarks in result.multi_face_landmarks:
            
            if location in ['N-F-C', 'N-F']:
                nose_patch_width, nose_patch_height, nose_top_left, forehead_patch_width, forehead_patch_height, forehead_top_left, cheek_patch_width, cheek_patch_height, cheek_top_left = get_multi_landmarks(location, frame_width, frame_height, 
                                                                                     target_patch_size_w, target_patch_size_h, facial_landmarks)

                #### NOSE AREA ####
                
                nose_area = np_img[nose_top_left[1]: nose_top_left[1] + nose_patch_height, 
                                   nose_top_left[0]: nose_top_left[0] + nose_patch_width]
                
                frame[nose_top_left[1]: nose_top_left[1] + nose_patch_height,
                    nose_top_left[0]: nose_top_left[0] + nose_patch_width] = copy.deepcopy(nose_area)
                
                #### FOREHEAD AREA ####
                
                forehead_area = np_img[forehead_top_left[1]: forehead_top_left[1] + forehead_patch_height, 
                                   forehead_top_left[0]: forehead_top_left[0] + forehead_patch_width]
                
                frame[forehead_top_left[1]: forehead_top_left[1] + forehead_patch_height,
                    forehead_top_left[0]: forehead_top_left[0] + forehead_patch_width] = copy.deepcopy(forehead_area)
                
                #### CHEEK AREA ####
                
                cheek_area = np_img[cheek_top_left[1]: cheek_top_left[1] + cheek_patch_height, 
                                    cheek_top_left[0]: cheek_top_left[0] + cheek_patch_width]
                
                frame[cheek_top_left[1]: cheek_top_left[1] + cheek_patch_height,
                    cheek_top_left[0]: cheek_top_left[0] + cheek_patch_width] = copy.deepcopy(cheek_area)
                
            else:
                patch_width, patch_height, top_left = get_landmarks(location, frame_width, frame_height, global_patch_width, facial_landmarks)

                reqd_matrix = np_img[top_left[1]: top_left[1] + patch_height, top_left[0]: top_left[0] + patch_width]
                
                frame[top_left[1]: top_left[1] + patch_height, top_left[0]: top_left[0] + patch_width] = copy.deepcopy(reqd_matrix)

        #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        aligned = []
        x_aligned, prob = mtcnn(frame, return_prob=True)
        aligned.append(x_aligned)
        
        try:
            faces = torch.stack(aligned).to(device)
        except:
            print("Issue in tensor - ", faces.shape)
                        
        output = model(faces)
        
        # Check for success
        final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        
        if final_pred.item() != y.item():
            print("Fooled in the first iteration itself, IMAGE - ", i_batch)
            #frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            #cv2.imwrite(f'./Pic_Base/{location}_{SIZE}/{epsilon_value:.2f}_{i_batch:02}_Fooled_first.jpg', frame)
            #Noise_image = np.abs(frame-Orig_image)
            #cv2.imwrite(f'./Pic_Base/{location}_{SIZE}/{epsilon_value:.2f}_{i_batch:02}_Noise_first.jpg', Noise_image)
            
            if epsilon_value == 0.05:
                loop_005.append(1)

            if epsilon_value == 0.1:
                loop_01.append(1)

            if epsilon_value == 0.2:
                loop_02.append(1)

            if epsilon_value == 0.4:
                loop_04.append(1)
                
            if epsilon_value == 0.8:
                loop_08.append(1)

            if epsilon_value == 1:
                loop_1.append(1)
            
        if final_pred.item() == y.item():
            #print("Image No - ", i_batch)
            
            for i in range(200):

                #update data_grad-----------------------
#                 face_clone = faces.clone()
#                 face_clone.requires_grad = True
        
                faces.requires_grad = True
                y_pred = model(faces) #face_clone
                loss_batch = loss_fn(y_pred, y)

                model.zero_grad()
                loss_batch.backward(retain_graph=True)
        
                data_grad = faces.grad.data  #face_clone
                #---------------------------------------
                perturbed_data, noise_matrix = fgsm_attack(faces, epsilon_value, data_grad)
                    
                np_img = perturbed_data[0].detach().cpu().numpy()
                np_img = np.transpose(np_img, (1,2,0))
                np_img = (np_img*128)+127.5
                np_img = np_img.astype(np.uint8)

                if location in ['N-F-C', 'N-F']:
                    nose_patch_width, nose_patch_height, nose_top_left, forehead_patch_width, forehead_patch_height, forehead_top_left, cheek_patch_width, cheek_patch_height, cheek_top_left = get_multi_landmarks(location, frame_width, frame_height, 
                                                                                         target_patch_size_w, target_patch_size_h, facial_landmarks)

                    #### NOSE AREA ####

                    nose_area = np_img[nose_top_left[1]: nose_top_left[1] + nose_patch_height, 
                                       nose_top_left[0]: nose_top_left[0] + nose_patch_width]

                    frame[nose_top_left[1]: nose_top_left[1] + nose_patch_height,
                        nose_top_left[0]: nose_top_left[0] + nose_patch_width] = copy.deepcopy(nose_area)

                    #### FOREHEAD AREA ####

                    forehead_area = np_img[forehead_top_left[1]: forehead_top_left[1] + forehead_patch_height, 
                                       forehead_top_left[0]: forehead_top_left[0] + forehead_patch_width]

                    frame[forehead_top_left[1]: forehead_top_left[1] + forehead_patch_height,
                        forehead_top_left[0]: forehead_top_left[0] + forehead_patch_width] = copy.deepcopy(forehead_area)

                    #### CHEEK AREA ####

                    cheek_area = np_img[cheek_top_left[1]: cheek_top_left[1] + cheek_patch_height, 
                                        cheek_top_left[0]: cheek_top_left[0] + cheek_patch_width]

                    frame[cheek_top_left[1]: cheek_top_left[1] + cheek_patch_height,
                        cheek_top_left[0]: cheek_top_left[0] + cheek_patch_width] = copy.deepcopy(cheek_area)
                
                else:
                    patch_width, patch_height, top_left = get_landmarks(location, frame_width, frame_height, global_patch_width, facial_landmarks)

                    reqd_matrix = np_img[top_left[1]: top_left[1] + patch_height, top_left[0]: top_left[0] + patch_width]

                aligned = []
                x_aligned, prob = mtcnn(frame, return_prob=True)
                aligned.append(x_aligned)
                
                try:
                    faces = torch.stack(aligned).to(device)
                except:
                    print("Issue in tensor - ", faces.shape)

                output = model(faces)

                 # Check for success
                final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
                
                if i > 198:
                    print("Breaking out of loop for Image - {0} didn't fool".format(i_batch))
                    #frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    #cv2.imwrite(f'./Pic_Base/{location}_{SIZE}/{epsilon_value:.2f}_{i_batch:02}_Failed.jpg', frame)
                    break
                
                if final_pred.item() != y.item():
                    print("For epsilon - {0} and Image - {1} it took {2} loops".format(epsilon_value, i_batch, i+2))
                    
                    if epsilon_value == 0.05:
                        loop_005.append(i+2)

                    if epsilon_value == 0.1:
                        loop_01.append(i+2)

                    if epsilon_value == 0.2:
                        loop_02.append(i+2)

                    if epsilon_value == 0.4:
                        loop_04.append(i+2)
                        
                    if epsilon_value == 0.8:
                        loop_08.append(1)

                    if epsilon_value == 1:
                        loop_1.append(i+2)
                    
                    #frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    #cv2.imwrite(f'./Pic_Base/{location}_{SIZE}/{epsilon_value:.2f}_{i_batch:02}_Fooled.jpg', frame)
                    #Noise_image = np.abs(frame-Orig_image)
                    #cv2.imwrite(f'./Pic_Base/{location}_{SIZE}/{epsilon_value:.2f}_{i_batch:02}_Noise.jpg', Noise_image)
                    break
                    
            #print(epsilon_value, final_pred.item(), y.item())
             
            
        else:
            correct += 1
                
        final_pred = output.max(1, keepdim=True)[1]
        if final_pred.item() != y.item():
            #print("FOOLED !!! ", final_pred.item(), y.item())
            num_fooled += 1
                
                
    fool_rate = num_fooled/(float(len(loader)-num_initial_error))
    print("Epsilon: {}\tFool Rate = {} / {} = {}".format(epsilon_value, num_fooled, len(loader)-num_initial_error, fool_rate))
    
    return num_fooled, epsilon_value, loop_005, loop_01, loop_02, loop_04, loop_08, loop_1, fool_rate






Running on device: cuda:3


In [5]:
import time

fool_rates = []
examples = []

#====パラメータ指定=====

#epsilons = [0.1]
epsilons = [0.1, 0.2, 0.4, 0.8, 1]
#epsilons = [0.05, 0.1, 0.2, 0.4, 1]
#LOCATIONS = ['Nose','Forehead','Cheek', 'Chin']
LOCATION = 'N-F-C'
#TARGET_PATCH_SIZES = [30]
TARGET_PATCH_SIZES = [30] # 15, 20, 35, 40

#=====================

resnet.eval()

print('Calcuate non_adversarial accuracy')
moderu = resnet
missNumber = 0
photoNumber = 0

# for j_batch, (A, b) in enumerate(test_loader):
#         A = A.to(device)
#         b = b.to(device)
#         A.requires_grad = True
#         b_pred = moderu(A)
#         loss_batch = loss_fn(b_pred, b)

#         first_pred = b_pred.max(1, keepdim=True)[1] # get the index of the max log-probability
        
#         #If the initial prediction is wrong, dont bother attacking, just move on
#         #print('pred: {}  : label: {}'.format(first_pred.item(),b.item()))
#         photoNumber =  photoNumber+1
#         if first_pred.item() != b.item():
#             print("Can'tPredict")
#             print(first_pred, b)
#             missNumber = missNumber+1
#             continue
             
# seitouritu = (missNumber/photoNumber)*100
# print('{}: {} : {}'.format(missNumber, photoNumber, seitouritu))

print('LOCATION TEST')

result = {}
eps_dict = {0.05 : "loop_005", 0.1 : "loop_01", 0.2 : "loop_02", 0.4 : "loop_04", 1 : "loop_1"}
             
for TARGET_PATCH_SIZE in TARGET_PATCH_SIZES:

    patch = False

    if not os.path.exists(f'./Pic_Base250/{LOCATION}_{TARGET_PATCH_SIZE}') :
        os.mkdir(f'./Pic_Base250/{LOCATION}_{TARGET_PATCH_SIZE}')
    if not os.path.exists(f'./Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}') :
        os.mkdir(f'./Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}')

    start = time.time()
    print(LOCATION)
    fool_nums = []
    shift_fool_rates = np.zeros((4,2))

    for eps in epsilons:
        num_fooled, eps_value, loop_005, loop_01, loop_02, loop_04, loop_08, loop_1, fool_rate = pass_epoch(
            resnet, eps, patch, LOCATION, TARGET_PATCH_SIZE, loss_fn, test_loader, device=device
        )

#             fool_nums.append(num_fooled)

#             fool_rate = [(len(fool_nums)/250) * 100]

#         if eps in eps_dict:
#             if fool_rate!=0:
#                 a_file = open(f'Array_Base250/Eps_{eps}/{LOCATION}_{TARGET_PATCH_SIZE}/{eps_dict[eps]}.txt', "w")

#                 np.savetxt(a_file, fool_rate, delimiter=',', fmt="%s")
#                 a_file.close()

        #result[LOCATION] = {eps, fool_rate}

    end = time.time() - start

    print("Time taken for this run - ", end)

    print(len(loop_005)) 
    print(len(loop_01))
    print(len(loop_02))
    print(len(loop_04))
    print(len(loop_08))
    print(len(loop_1))

    np.savetxt(f'Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}/Loop_0.05.csv', loop_005, delimiter=',')
    np.savetxt(f'Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}/Loop_0.10.csv', loop_01, delimiter=',')
    np.savetxt(f'Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}/Loop_0.20.csv', loop_02, delimiter=',')
    np.savetxt(f'Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}/Loop_0.40.csv', loop_04, delimiter=',')
    np.savetxt(f'Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}/Loop_0.80.csv', loop_08, delimiter=',')
    np.savetxt(f'Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}/Loop_1.00.csv', loop_1, delimiter=',')

    np.savetxt(f'Array_Base250/{LOCATION}_{TARGET_PATCH_SIZE}/fool_nums.csv', fool_nums, delimiter=',')
    loop_005 = []
    loop_01 = []
    loop_02 = []
    loop_04 = []
    loop_08 = []
    loop_1 = []

Calcuate non_adversarial accuracy
LOCATION TEST
N-F-C
For epsilon - 0.1 and Image - 0 it took 3 loops
For epsilon - 0.1 and Image - 1 it took 8 loops
For epsilon - 0.1 and Image - 2 it took 10 loops
For epsilon - 0.1 and Image - 3 it took 5 loops
For epsilon - 0.1 and Image - 4 it took 8 loops
For epsilon - 0.1 and Image - 5 it took 4 loops
For epsilon - 0.1 and Image - 6 it took 3 loops
For epsilon - 0.1 and Image - 7 it took 2 loops
For epsilon - 0.1 and Image - 8 it took 3 loops
For epsilon - 0.1 and Image - 9 it took 6 loops
For epsilon - 0.1 and Image - 10 it took 2 loops
For epsilon - 0.1 and Image - 11 it took 2 loops
For epsilon - 0.1 and Image - 12 it took 4 loops
For epsilon - 0.1 and Image - 13 it took 2 loops
For epsilon - 0.1 and Image - 14 it took 2 loops
For epsilon - 0.1 and Image - 15 it took 7 loops
For epsilon - 0.1 and Image - 16 it took 9 loops
For epsilon - 0.1 and Image - 17 it took 7 loops
For epsilon - 0.1 and Image - 18 it took 3 loops
For epsilon - 0.1 and Im

In [24]:
frame = faces[0].detach().cpu().numpy()
frame = np.transpose(frame, (1,2,0))
frame = (frame*128)+127.5
frame = frame.astype(np.uint8)
    
print(faces.shape)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

cv2.imwrite("Multiple-N-F-C.png", frame)

torch.Size([1, 3, 160, 160])


True