In [1]:
import os
from utils import *
import utils
from agents import *
import time
import torch
import torch.nn as nn
from copy import deepcopy
import argparse

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--seeds',          type=int,       default=[2023, 2024, 2025])
parser.add_argument('--dataset',        type=str,       default='svhn')
parser.add_argument('--batch_size',     type=int,       default=256)
parser.add_argument('--model_name',     type=str,       default='vgg11')
parser.add_argument('--retrain',        type=bool,      default=False)
parser.add_argument('--unlearn_class',  type=list,      default=3)
args = parser.parse_args("")
args.time_str = time.strftime("%m-%d-%H-%M", time.localtime())
if args.dataset.lower() == 'fmnist':
    args.n_channels = 1
else:
    args.n_channels = 3

if args.dataset.lower() == 'cifar100':
    args.num_classes = 100
else:
    args.num_classes = 10
criterion = nn.CrossEntropyLoss()

In [5]:
def get_unlearn_dataloader(data_loader):
    dataset = data_loader.dataset
    _indices = data_loader.sampler.indices

    if args.dataset.lower() == 'svhn':
        train_targets = np.array(dataset.labels)[_indices]
    else:
        train_targets = np.array(dataset.labels)[_indices]
    unlearn_indices, remain_indices = [], []
    for i, target in enumerate(train_targets):
        if target in args.unlearn_class:
            unlearn_indices.append(i)
        else:
            remain_indices.append(i)

    unlearn_indices = np.array(_indices)[unlearn_indices]
    remain_indices = np.array(_indices)[remain_indices]

    unlearn_sampler = torch.utils.data.SubsetRandomSampler(unlearn_indices)
    unlearn_loader = torch.utils.data.DataLoader(dataset,
                                                batch_size=args.batch_size,
                                                sampler = unlearn_sampler,)

    remain_sampler = torch.utils.data.SubsetRandomSampler(remain_indices)
    remain_loader = torch.utils.data.DataLoader(dataset,
                                                batch_size=args.batch_size,
                                                sampler = remain_sampler)
    return remain_loader, unlearn_loader

def get_dataloader(args):
    train_loader, test_loader = utils._get_dataloader(args)

    indices = np.arange(len(train_loader.dataset))
    a = np.split(indices,[int(len(indices)*0.9), int(len(indices))])
    idx_train = a[0]
    idx_val = a[1]
    train_sampler = torch.utils.data.SubsetRandomSampler(idx_train)
    val_sampler = torch.utils.data.SubsetRandomSampler(idx_val)

    train_loader = torch.utils.data.DataLoader(train_loader.dataset,
                                                batch_size=args.batch_size,
                                                sampler=train_sampler)
    
    val_loader = torch.utils.data.DataLoader(train_loader.dataset,
                                                batch_size=args.batch_size,
                                                sampler=val_sampler)
    
    test_loader = torch.utils.data.DataLoader(test_loader.dataset,
                                            batch_size=args.batch_size,
                                            shuffle=False)
    
    if args.retrain:
        train_loader, _ = get_unlearn_dataloader(train_loader)
        val_loader, _ = get_unlearn_dataloader(val_loader)
        
    return train_loader, val_loader, test_loader

In [8]:
arxiv_name = 'original_model_12-15-02-49'
train_loader, val_loader, test_loader = get_dataloader(args)
train_targets_list = np.array(train_loader.dataset.labels)[train_loader.sampler.indices]
unlearn_indices = np.where(np.isin(train_targets_list, args.unlearn_class))[0]

# conver to the original indices
unlearn_indices = train_loader.sampler.indices[unlearn_indices]

unlearn_sampler = torch.utils.data.SubsetRandomSampler(unlearn_indices)
unlearn_subset_loader = torch.utils.data.DataLoader(train_loader.dataset, 
                                                    batch_size=args.batch_size, 
                                                    sampler=unlearn_sampler)
remain_class = np.setdiff1d(np.arange(args.num_classes), args.unlearn_class)

remain_indices = np.where(~np.isin(train_targets_list, args.unlearn_class))[0]
remain_indices = train_loader.sampler.indices[remain_indices]

remain_sampler = torch.utils.data.SubsetRandomSampler(remain_indices)
remain_loader = torch.utils.data.DataLoader(train_loader.dataset, 
                                            batch_size=args.batch_size, 
                                            sampler=remain_sampler)

Using downloaded and verified file: ../data/svhn/train_32x32.mat
Using downloaded and verified file: ../data/svhn/test_32x32.mat


In [10]:
for i in range(3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))
    test_by_class(model, test_loader, i=args.unlearn_class)

print('------------ Retrained model ------------')
for i in range(3):
    model_r = get_model(args)
    try:
        model_r.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/retrain_model_{args.seeds[i]}.pth'))
        test_by_class(model_r, test_loader, i=args.unlearn_class)
    except:
        print('No retrained model')
        break

0.9151, 0.9202, 0.9140, 0.7359, 0.8803, 0.6774, 0.6606, 0.8841, 0.7578, 0.8176, Acc_f: 0.7359, Acc_r: 0.8252
0.9203, 0.8537, 0.8166, 0.5749, 0.9076, 0.7718, 0.8655, 0.9029, 0.5687, 0.8458, Acc_f: 0.5749, Acc_r: 0.8281
0.8481, 0.8837, 0.9371, 0.6888, 0.8914, 0.7966, 0.7446, 0.8286, 0.7175, 0.6871, Acc_f: 0.6888, Acc_r: 0.8150
------------ Retrained model ------------
0.8739, 0.0000, 0.7756, 0.6971, 0.9738, 0.5168, 0.3804, 0.7855, 0.8873, 0.7386, Acc_f: 0.6971, Acc_r: 0.6591
0.8882, 0.0000, 0.8855, 0.4441, 0.8736, 0.7286, 0.7678, 0.8465, 0.6892, 0.8251, Acc_f: 0.4441, Acc_r: 0.7227
0.8331, 0.0000, 0.7308, 0.7366, 0.9164, 0.6976, 0.4148, 0.9584, 0.3898, 0.4069, Acc_f: 0.7366, Acc_r: 0.5942


In [11]:
Acc_f = 100*np.array([0.9362, 0.9198, 0.9410])
Acc_r = 100*np.array([0.9549, 0.9574, 0.9493])
print(f'Original model Acc_f: {Acc_f.mean():.2f} $\pm$ {Acc_f.std():.2f}')
print(f'Original model Acc_r: {Acc_r.mean():.2f} $\pm$ {Acc_r.std():.2f}')

Acc_r = 100*np.array([0.9174, 0.9585, 0.9302])
print(f'Retrained model: {Acc_r.mean():.2f} $\pm$ {Acc_r.std():.2f}')


Original model Acc_f: 93.23 $\pm$ 0.91
Original model Acc_r: 95.39 $\pm$ 0.34
Retrained model: 93.54 $\pm$ 1.72


## Ours

## Random labeling

In [12]:
Acc_r, Acc_f = np.zeros(3), np.zeros(3)
for i in range(3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))
    sgd_mr_model = deepcopy(model)
    test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)

    optimizer = torch.optim.SGD(sgd_mr_model.parameters(), lr=0.0001)
    sgd_mr_model.train()
    for m in sgd_mr_model.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.eval()

    for ep in range(20):
        for batch, (x, y) in enumerate(unlearn_subset_loader):
            x = x.cuda()
            y = torch.from_numpy(np.random.choice(remain_class, size=x.shape[0])).cuda()
            pred_y = sgd_mr_model(x)
            loss = criterion(pred_y, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('[train] epoch {}, batch {}, loss {}'.format(ep, batch, loss))
        Acc_r[i], Acc_f[i] = test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)

0.9151, 0.9202, 0.9140, 0.7359, 0.8803, 0.6774, 0.6606, 0.8841, 0.7578, 0.8176, Acc_f: 0.7359, Acc_r: 0.8252
[train] epoch 0, batch 29, loss 4.340445041656494
0.9753, 0.8763, 0.9250, 0.4476, 0.8906, 0.7118, 0.7142, 0.8975, 0.5958, 0.7273, Acc_f: 0.4476, Acc_r: 0.8126
[train] epoch 1, batch 29, loss 3.316979169845581
0.9868, 0.8313, 0.9147, 0.3102, 0.8748, 0.6023, 0.6009, 0.8737, 0.4325, 0.5875, Acc_f: 0.3102, Acc_r: 0.7449
[train] epoch 2, batch 29, loss 2.9311957359313965
0.9903, 0.7988, 0.9019, 0.2425, 0.8637, 0.4987, 0.4729, 0.8470, 0.3235, 0.4865, Acc_f: 0.2425, Acc_r: 0.6870
[train] epoch 3, batch 29, loss 2.763293504714966
0.9908, 0.7803, 0.8915, 0.1957, 0.8510, 0.4362, 0.3728, 0.8257, 0.2614, 0.4176, Acc_f: 0.1957, Acc_r: 0.6475
[train] epoch 4, batch 29, loss 2.7665843963623047
0.9914, 0.7762, 0.8812, 0.1728, 0.8470, 0.3863, 0.3197, 0.8123, 0.2151, 0.3561, Acc_f: 0.1728, Acc_r: 0.6206
[train] epoch 5, batch 29, loss 2.5609397888183594
0.9920, 0.7737, 0.8752, 0.1554, 0.8438, 0.3

In [13]:
Acc_f = 100*np.array([0.0139, 0.0111, 0.0101])
Acc_r = 100*np.array([0.8372, 0.8010, 0.6379])
print(f'Random label Acc_f: {Acc_f.mean():.2f} \pm {Acc_f.std():.2f}')
print(f'Random label Acc_r: {Acc_r.mean():.2f} \pm {Acc_r.std():.2f}')

Random label Acc_f: 1.17 \pm 0.16
Random label Acc_r: 75.87 \pm 8.67


In [14]:
Proj_mat_lst =[]
for i in range(3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))
    test_by_class(model, test_loader, i=args.unlearn_class)
    
    feature_list = []
    merged_feat_mat = []
    for batch, (x, y) in enumerate(remain_loader):
        x = x.cuda()
        y = y.cuda()
        mat_list = get_representation_matrix(model, x, batch_list=[256]*30)
        break
    threshold = 0.99
    merged_feat_mat = update_GPM(mat_list, threshold, merged_feat_mat)
    proj_mat = [torch.Tensor(np.dot(layer_basis, layer_basis.transpose())) for layer_basis in merged_feat_mat]
    Proj_mat_lst.append(proj_mat)

0.9151, 0.9202, 0.9140, 0.7359, 0.8803, 0.6774, 0.6606, 0.8841, 0.7578, 0.8176, Acc_f: 0.7359, Acc_r: 0.8252
Threshold:  0.99
----------------------------------------
Gradient Constraints Summary
----------------------------------------
Layer 1 : 5/27
Layer 2 : 67/576
Layer 3 : 372/1152
Layer 4 : 1318/2304
Layer 5 : 575/2304
Layer 6 : 710/4608
Layer 7 : 0/4608
Layer 8 : 0/4608
Layer 9 : 14/512
Layer 10 : 34/4096
Layer 11 : 20/4096
----------------------------------------
0.9203, 0.8537, 0.8166, 0.5749, 0.9076, 0.7718, 0.8655, 0.9029, 0.5687, 0.8458, Acc_f: 0.5749, Acc_r: 0.8281
Threshold:  0.99
----------------------------------------
Gradient Constraints Summary
----------------------------------------
Layer 1 : 5/27
Layer 2 : 65/576
Layer 3 : 378/1152
Layer 4 : 1293/2304
Layer 5 : 572/2304
Layer 6 : 703/4608
Layer 7 : 0/4608
Layer 8 : 0/4608
Layer 9 : 15/512
Layer 10 : 35/4096
Layer 11 : 22/4096
----------------------------------------
0.8481, 0.8837, 0.9371, 0.6888, 0.8914, 0.7966, 

In [15]:
for i in range(3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))
    sgd_mr_model = deepcopy(model)
    test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)
    optimizer = torch.optim.SGD(sgd_mr_model.parameters(), lr=0.001)
    sgd_mr_model.train()
    for m in sgd_mr_model.modules():
        if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
            m.eval()

    for ep in range(10):
        for batch, (x, y) in enumerate(unlearn_subset_loader):
            x = x.cuda()
            y = torch.from_numpy(np.random.choice(remain_class, size=x.shape[0])).cuda()
            pred_y = sgd_mr_model(x)
            loss = criterion(pred_y, y)
            optimizer.zero_grad()
            loss.backward()
            kk = 0 
            for k, (m,params) in enumerate(sgd_mr_model.named_parameters()):
                if len(params.size())!=1:
                    sz =  params.grad.data.size(0)
                    params.grad.data = params.grad.data - torch.mm(params.grad.data.view(sz,-1),\
                                            Proj_mat_lst[i][kk].cuda()).view(params.size())
                    kk +=1
                elif len(params.size())==1:
                    params.grad.data.fill_(0)
            optimizer.step()
        print('[train] epoch {}, batch {}, loss {}'.format(ep, batch, loss))
        test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)

0.9151, 0.9202, 0.9140, 0.7359, 0.8803, 0.6774, 0.6606, 0.8841, 0.7578, 0.8176, Acc_f: 0.7359, Acc_r: 0.8252
[train] epoch 0, batch 29, loss 3.4053196907043457
0.9604, 0.9298, 0.9241, 0.4011, 0.8676, 0.7022, 0.7334, 0.8940, 0.5361, 0.7887, Acc_f: 0.4011, Acc_r: 0.8152
[train] epoch 1, batch 29, loss 2.8472275733947754
0.9708, 0.9300, 0.9113, 0.2991, 0.8514, 0.5680, 0.6323, 0.8861, 0.3536, 0.7335, Acc_f: 0.2991, Acc_r: 0.7597
[train] epoch 2, batch 29, loss 2.791172742843628
0.9759, 0.9294, 0.9012, 0.2398, 0.8426, 0.4958, 0.5210, 0.8752, 0.2452, 0.6978, Acc_f: 0.2398, Acc_r: 0.7205
[train] epoch 3, batch 29, loss 2.5219521522521973
0.9771, 0.9302, 0.8927, 0.2065, 0.8419, 0.4518, 0.4734, 0.8692, 0.1880, 0.6796, Acc_f: 0.2065, Acc_r: 0.7004
[train] epoch 4, batch 29, loss 2.573842763900757
0.9788, 0.9276, 0.8872, 0.1811, 0.8419, 0.4253, 0.4380, 0.8653, 0.1536, 0.6639, Acc_f: 0.1811, Acc_r: 0.6869
[train] epoch 5, batch 29, loss 2.4584286212921143
0.9788, 0.9272, 0.8850, 0.1582, 0.8450, 0.

In [16]:
Acc_r = 100*np.array([0.9521, 0.9499, 0.9363])
Acc_f = 100*np.array([0.0160, 0.0139, 0.0566])

print(f'Random label + Subspace Acc_f: {Acc_f.mean():.2f} \pm {Acc_f.std():.2f}')
print(f'Random label + Subspace Acc_r: {Acc_r.mean():.2f} \pm {Acc_r.std():.2f}')

Random label + Subspace Acc_f: 2.88 \pm 1.97
Random label + Subspace Acc_r: 94.61 \pm 0.70


In [17]:
def get_2nd_score(model, x, y):
    indices = torch.topk(model(x), k=2, dim=1).indices
    top1_matches = indices[:, 0] == y
    selected_labels = torch.where(top1_matches, indices[:, 1], indices[:, 0])
    return selected_labels

for i in range(3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))
    sgd_mr_model = deepcopy(model)
    test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)
    optimizer = torch.optim.SGD(sgd_mr_model.parameters(), lr=0.03)
    sgd_mr_model.train()
    for m in sgd_mr_model.modules():
        if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
            m.eval()

    model.eval()
    for ep in range(10):
        for batch, (x, y) in enumerate(unlearn_subset_loader):
            x = x.cuda()
            y = get_2nd_score(model, x, y.cuda())
            pred_y = sgd_mr_model(x)
            loss = criterion(pred_y, y)
            optimizer.zero_grad()
            loss.backward()
            kk = 0 
            for k, (m,params) in enumerate(sgd_mr_model.named_parameters()):
                if len(params.size())!=1:
                    sz =  params.grad.data.size(0)
                    params.grad.data = params.grad.data - torch.mm(params.grad.data.view(sz,-1),\
                                            Proj_mat_lst[i][kk].cuda()).view(params.size())
                    kk +=1
                elif len(params.size())==1:
                    params.grad.data.fill_(0)
            optimizer.step()
        print('[train] epoch {}, batch {}, loss {}'.format(ep, batch, loss))
        test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)

0.9151, 0.9202, 0.9140, 0.7359, 0.8803, 0.6774, 0.6606, 0.8841, 0.7578, 0.8176, Acc_f: 0.7359, Acc_r: 0.8252
[train] epoch 0, batch 29, loss 0.5064550042152405
0.8234, 0.9339, 0.9332, 0.0014, 0.8573, 0.7626, 0.4057, 0.8182, 0.9295, 0.8420, Acc_f: 0.0014, Acc_r: 0.8118
[train] epoch 1, batch 29, loss 0.4426722228527069
0.8698, 0.9139, 0.9460, 0.0000, 0.8672, 0.9065, 0.5286, 0.8697, 0.8530, 0.7655, Acc_f: 0.0000, Acc_r: 0.8356
[train] epoch 2, batch 29, loss 0.21191827952861786
0.8790, 0.9208, 0.9487, 0.0000, 0.8648, 0.8796, 0.5468, 0.8643, 0.8717, 0.7455, Acc_f: 0.0000, Acc_r: 0.8357
[train] epoch 3, batch 29, loss 0.19525010883808136
0.8888, 0.9361, 0.9405, 0.0000, 0.8688, 0.8586, 0.5362, 0.8529, 0.8892, 0.7624, Acc_f: 0.0000, Acc_r: 0.8370
[train] epoch 4, batch 29, loss 0.38105636835098267
0.8905, 0.9082, 0.9409, 0.0000, 0.8664, 0.9207, 0.5938, 0.8985, 0.8187, 0.7241, Acc_f: 0.0000, Acc_r: 0.8402
[train] epoch 5, batch 29, loss 0.24008722603321075
0.9008, 0.9390, 0.9361, 0.0000, 0.87

In [18]:
Acc_r = 100*np.array([0.9597, 0.9613, 0.9520])
Acc_f = 100*np.array([0.0000, 0.0000, 0.0000])
print(f'UNSC Acc_f: {Acc_f.mean():.2f} \pm {Acc_f.std():.2f}')
print(f'UNSC Acc_r: {Acc_r.mean():.2f} \pm {Acc_r.std():.2f}')

UNSC Acc_f: 0.00 \pm 0.00
UNSC Acc_r: 95.77 \pm 0.41


# Boundary Unlearning

In [19]:
from agents.adv import FGSM

def find_adjacent_cls(adv_agent, x, y):
    x_adv = adv_agent.perturb(x, y)
    adv_logits = model(x_adv)
    adv_pred = torch.argmax(adv_logits.data, 1)
    return adv_pred, x_adv

for i in range(3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))  

    adv_agent = FGSM(deepcopy(model), bound=0.5, norm=False, random_start=True, device='cuda')
    sgd_mr_model = deepcopy(model)
    print('==='*60)
    test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)
    optimizer = torch.optim.SGD(sgd_mr_model.parameters(), lr=0.001)

    sgd_mr_model.train()
    for m in sgd_mr_model.modules():
        if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
            m.eval()
            
    model.eval()
    for ep in range(15):
        for batch, (x, y) in enumerate(unlearn_subset_loader):
            x = x.cuda()
            adv_pred, x_adv = find_adjacent_cls(adv_agent, x, y)
            adv_y = torch.argmax(model(x_adv), dim=1).detach().cuda()
            pred_y = sgd_mr_model(x)
            loss = criterion(pred_y, adv_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('[train] epoch {}, batch {}, loss {}'.format(ep, batch, loss))
        test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)

0.9151, 0.9202, 0.9140, 0.7359, 0.8803, 0.6774, 0.6606, 0.8841, 0.7578, 0.8176, Acc_f: 0.7359, Acc_r: 0.8252
[train] epoch 0, batch 29, loss 1.9348245859146118
0.9719, 0.9396, 0.8479, 0.0965, 0.7753, 0.2433, 0.2944, 0.8103, 0.3446, 0.5762, Acc_f: 0.0965, Acc_r: 0.6448
[train] epoch 1, batch 29, loss 1.7836272716522217
0.9237, 0.9718, 0.7756, 0.0468, 0.6492, 0.1418, 0.2777, 0.7578, 0.3922, 0.6690, Acc_f: 0.0468, Acc_r: 0.6176
[train] epoch 2, batch 29, loss 1.8350627422332764
0.8979, 0.9733, 0.7510, 0.0291, 0.5549, 0.1007, 0.2949, 0.7395, 0.4012, 0.7279, Acc_f: 0.0291, Acc_r: 0.6046
[train] epoch 3, batch 29, loss 1.7259252071380615
0.8658, 0.9723, 0.7332, 0.0184, 0.5069, 0.0881, 0.3237, 0.7360, 0.4102, 0.7674, Acc_f: 0.0184, Acc_r: 0.6004
[train] epoch 4, batch 29, loss 1.634499430656433
0.8440, 0.9686, 0.7110, 0.0139, 0.4665, 0.0784, 0.3101, 0.7286, 0.3994, 0.7843, Acc_f: 0.0139, Acc_r: 0.5879
[train] epoch 5, batch 29, loss 1.5350629091262817
0.8280, 0.9671, 0.7004, 0.0118, 0.4522, 0

In [20]:
from agents.adv import FGSM

def find_adjacent_cls(adv_agent, x, y):
    x_adv = adv_agent.perturb(x, y)
    adv_logits = model(x_adv)
    adv_pred = torch.argmax(adv_logits.data, 1)
    return adv_pred, x_adv

for i in range(2, 3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))  

    adv_agent = FGSM(deepcopy(model), bound=0.5, norm=False, random_start=True, device='cuda')
    sgd_mr_model = deepcopy(model)
    print('==='*60)
    test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)
    optimizer = torch.optim.SGD(sgd_mr_model.parameters(), lr=0.001)

    sgd_mr_model.train()
    for m in sgd_mr_model.modules():
        if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
            m.eval()
            
    model.eval()
    for ep in range(15):
        for batch, (x, y) in enumerate(unlearn_subset_loader):
            x = x.cuda()
            adv_pred, x_adv = find_adjacent_cls(adv_agent, x, y)
            adv_y = torch.argmax(model(x_adv), dim=1).detach().cuda()
            pred_y = sgd_mr_model(x)
            loss = criterion(pred_y, adv_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('[train] epoch {}, batch {}, loss {}'.format(ep, batch, loss))
        test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)

0.9513, 0.9723, 0.9687, 0.9410, 0.9568, 0.9379, 0.9449, 0.9297, 0.9434, 0.9386, Acc_f: 0.9410, Acc_r: 0.9493
[train] epoch 0, batch 29, loss 2.134913921356201
0.6325, 0.9645, 0.5924, 0.2065, 0.7063, 0.9723, 0.7274, 0.3725, 0.6199, 0.1498, Acc_f: 0.2065, Acc_r: 0.6375
[train] epoch 1, batch 29, loss 2.041485071182251
0.5476, 0.9873, 0.5577, 0.2738, 0.6595, 0.9354, 0.6935, 0.2744, 0.6614, 0.1135, Acc_f: 0.2738, Acc_r: 0.6034
[train] epoch 2, batch 29, loss 1.991804599761963
0.5126, 0.9890, 0.5421, 0.3123, 0.6377, 0.9299, 0.6748, 0.2382, 0.6849, 0.0903, Acc_f: 0.3123, Acc_r: 0.5888
[train] epoch 3, batch 29, loss 1.968560814857483
0.5178, 0.9910, 0.5401, 0.2890, 0.6457, 0.9195, 0.6884, 0.2402, 0.7120, 0.1034, Acc_f: 0.2890, Acc_r: 0.5953
[train] epoch 4, batch 29, loss 1.9913945198059082
0.5487, 0.9914, 0.6035, 0.2856, 0.6829, 0.9123, 0.6970, 0.2759, 0.7337, 0.1417, Acc_f: 0.2856, Acc_r: 0.6208
[train] epoch 5, batch 29, loss 1.9811981916427612
0.5677, 0.9918, 0.6226, 0.2720, 0.6932, 0.91

In [20]:
Acc_r = 100*np.array([0.8015, 0.8627,  0.7656])
Acc_f = 100*np.array([0.0219, 0.0659,  0.0330])
print(f'Boundary Unlearning Acc_f: {Acc_f.mean():.2f} \pm {Acc_f.std():.2f}')
print(f'Boundary Unlearning Acc_r: {Acc_r.mean():.2f} \pm {Acc_r.std():.2f}')

Boundary Unlearning Acc_f: 4.03 \pm 1.87
Boundary Unlearning Acc_r: 80.99 \pm 4.01


# Gradient Ascent

In [21]:
for i in range(3):
    print('\n\n')
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))  

    sgd_mr_model = deepcopy(model)
    optimizer = torch.optim.SGD(sgd_mr_model.parameters(), lr=0.00008)
    sgd_mr_model.train()
    for m in sgd_mr_model.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.eval()

    for ep in range(30):
        for batch, (x, y) in enumerate(unlearn_subset_loader):
            x = x.cuda()
            y = y.cuda()
            pred_y = sgd_mr_model(x)
            loss = -criterion(pred_y, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('[train] epoch {}, batch {}, loss {}'.format(ep, batch, loss))
        test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)




[train] epoch 0, batch 29, loss -0.8680832982063293
0.9375, 0.9123, 0.9185, 0.5958, 0.8831, 0.7106, 0.7360, 0.8980, 0.7572, 0.8270, Acc_f: 0.5958, Acc_r: 0.8422
[train] epoch 1, batch 29, loss -1.5237183570861816
0.9650, 0.8847, 0.8997, 0.3838, 0.8744, 0.6460, 0.7562, 0.8970, 0.7102, 0.8232, Acc_f: 0.3838, Acc_r: 0.8285
[train] epoch 2, batch 29, loss -2.780498743057251
0.9845, 0.7896, 0.8094, 0.1367, 0.8109, 0.3272, 0.6864, 0.8544, 0.5849, 0.7812, Acc_f: 0.1367, Acc_r: 0.7365
[train] epoch 3, batch 29, loss -7.0883965492248535
0.9925, 0.2646, 0.3476, 0.0007, 0.4923, 0.0042, 0.3652, 0.6246, 0.3458, 0.5674, Acc_f: 0.0007, Acc_r: 0.4449
[train] epoch 4, batch 29, loss -56.50541687011719
1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, Acc_f: 0.0000, Acc_r: 0.1111
[train] epoch 5, batch 29, loss nan
1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, Acc_f: 0.0000, Acc_r: 0.1111
[train] epoch 6, batch 29, loss nan
1.0000, 0.000

In [22]:
Acc_r = 100*np.array([0.7419,  0.6733,  0.8381])
Acc_f = 100*np.array([0.0070,  0.0000,  0.0861])

print(f'GA Acc_f: {Acc_f.mean():.2f} \pm {Acc_f.std():.2f}')
print(f'GA Acc_r: {Acc_r.mean():.2f} \pm {Acc_r.std():.2f}')

GA Acc_f: 3.10 \pm 3.90
GA Acc_r: 75.11 \pm 6.76


## Fisher unlearning

In [23]:
import copy
import torch.nn.functional as F

def hessian(dataset, model):
    model.eval()
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=512, shuffle=False)
    loss_fn = torch.nn.CrossEntropyLoss(reduction="mean")
    device = torch.device("cuda")

    for p in model.parameters():
        p.grad2_acc = 0
    
    for data, orig_target in tqdm(train_loader):
        data, orig_target = data.to(device), orig_target.to(device)
        output = model(data)
        prob = F.softmax(output, dim=-1).data

        for y in range(output.shape[1]):
            target = torch.empty_like(orig_target).fill_(y)
            loss = loss_fn(output, target)
            model.zero_grad()
            loss.backward(retain_graph=True)
            for p in model.parameters():
                if p.requires_grad:
                    p.grad2_acc += torch.mean(prob[:, y]) * p.grad.data.pow(2) 

    for p in model.parameters():
        p.grad2_acc /= len(train_loader)
    
def get_mean_var(args, p, alpha=1e-7):
    var = copy.deepcopy(1./(p.grad2_acc+1e-8))
    var = var.clamp(max=1e3) 
    if p.size(0) == args.num_classes:
        var = var.clamp(max=1e2)
    var = alpha * var 
    
    if p.ndim > 1:
        var = var.mean(dim=1, keepdim=True).expand_as(p).clone()
    mu = copy.deepcopy(p.data0.clone())

    if p.size(0) == args.num_classes:
        mu[args.unlearn_class] = 0
        var[args.unlearn_class] = 0.0001
        var *= 10
    elif p.ndim == 1:
        var *= 10 
    return mu, var

def fisher_new(dataset, model):
    for p in model.parameters():
        p.data0 = copy.deepcopy(p.data.clone())
    hessian(dataset, model)
    for i, p in enumerate(model.parameters()):
        mu, var = get_mean_var(args, p)
        p.data = mu + var.sqrt() * torch.empty_like(p.data).normal_()
    return model

In [24]:

remain_dataset = torch.utils.data.Subset(train_loader.dataset, remain_indices)

for i in range(3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))  
    test_by_class(model, test_loader, i=args.unlearn_class)
    fisher_model = copy.deepcopy(model)
    fisher_new(remain_dataset, fisher_model)
    test_by_class(fisher_model, test_loader, i=args.unlearn_class)

0.9151, 0.9202, 0.9140, 0.7359, 0.8803, 0.6774, 0.6606, 0.8841, 0.7578, 0.8176, Acc_f: 0.7359, Acc_r: 0.8252


100%|██████████| 114/114 [00:41<00:00,  2.75it/s]


0.8010, 0.9774, 0.9255, 0.0000, 0.7380, 0.7898, 0.6459, 0.8539, 0.7916, 0.8094, Acc_f: 0.0000, Acc_r: 0.8147
0.9203, 0.8537, 0.8166, 0.5749, 0.9076, 0.7718, 0.8655, 0.9029, 0.5687, 0.8458, Acc_f: 0.5749, Acc_r: 0.8281


100%|██████████| 114/114 [00:41<00:00,  2.73it/s]


0.8922, 0.9196, 0.7007, 0.0000, 0.8359, 0.8054, 0.8690, 0.9009, 0.4500, 0.8439, Acc_f: 0.0000, Acc_r: 0.8019
0.8481, 0.8837, 0.9371, 0.6888, 0.8914, 0.7966, 0.7446, 0.8286, 0.7175, 0.6871, Acc_f: 0.6888, Acc_r: 0.8150


100%|██████████| 114/114 [00:40<00:00,  2.78it/s]


0.8091, 0.9149, 0.9679, 0.0000, 0.8419, 0.8859, 0.7790, 0.7954, 0.4807, 0.5937, Acc_f: 0.0000, Acc_r: 0.7854


In [25]:
Acc_r = 100*np.array([0.9596,  0.9590, 0.9530])
Acc_f = 100*np.array([0.0378,  0.0163, 0.0219])

print(f'Fisher Acc_f: {Acc_f.mean():.2f} \pm {Acc_f.std():.2f}')
print(f'Fisher Acc_r: {Acc_r.mean():.2f} \pm {Acc_r.std():.2f}')

Fisher Acc_f: 2.53 \pm 0.91
Fisher Acc_r: 95.72 \pm 0.30


In [26]:
import copy
import torch.nn.functional as F

def hessian(dataset, model):
    model.eval()
    train_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False)
    loss_fn = torch.nn.CrossEntropyLoss(reduction="mean")
    device = torch.device("cuda")

    for p in model.parameters():
        p.grad2_acc = 0
    
    for data, orig_target in tqdm(train_loader):
        data, orig_target = data.to(device), orig_target.to(device)
        output = model(data)
        prob = F.softmax(output, dim=-1).data

        for y in range(output.shape[1]):
            target = torch.empty_like(orig_target).fill_(y)
            loss = loss_fn(output, target)
            model.zero_grad()
            loss.backward(retain_graph=True)
            for p in model.parameters():
                if p.requires_grad:
                    p.grad2_acc += torch.mean(prob[:, y]) * p.grad.data.pow(2) 

    for p in model.parameters():
        p.grad2_acc /= len(train_loader)
    
def get_mean_var(args, p, alpha=1.25e-7):
    var = copy.deepcopy(1./(p.grad2_acc+1e-8))
    var = var.clamp(max=1e3) 
    if p.size(0) == args.num_classes:
        var = var.clamp(max=1e2)
    var = alpha * var 
    
    if p.ndim > 1:
        var = var.mean(dim=1, keepdim=True).expand_as(p).clone()
    mu = copy.deepcopy(p.data0.clone())

    if p.size(0) == args.num_classes:
        mu[unlearn_class] = 0
        var[unlearn_class] = 0.0001
        var *= 10
    elif p.ndim == 1:
        var *= 10 
    return mu, var

def fisher_new(dataset, model):
    for p in model.parameters():
        p.data0 = copy.deepcopy(p.data.clone())
    hessian(dataset, model)
    for i, p in enumerate(model.parameters()):
        mu, var = get_mean_var(args, p)
        p.data = mu + var.sqrt() * torch.empty_like(p.data).normal_()
    return model

In [27]:
unlearn_class = 6
remain_class = list(set(list(range(10))) -set([unlearn_class]))
train_targets_list = np.array(train_loader.dataset.labels)
remain_cls_indices = np.where(~np.isin(train_targets_list, unlearn_class))[0]
cls_sampler = torch.utils.data.SubsetRandomSampler(remain_cls_indices)
remain_loader = torch.utils.data.DataLoader(train_loader.dataset, 
                                            batch_size=args.batch_size, 
                                            sampler=cls_sampler)

In [28]:
fisher_model = copy.deepcopy(model)
criterion = torch.nn.CrossEntropyLoss()
remain_dataset = torch.utils.data.Subset(train_loader.dataset, remain_cls_indices)
fisher_new(remain_dataset, fisher_model)
test_by_class(fisher_model, test_loader, i=unlearn_class)

100%|██████████| 2111/2111 [03:04<00:00, 11.47it/s]


0.8245, 0.9335, 0.8952, 0.6325, 0.9041, 0.8192, 0.0000, 0.6196, 0.7084, 0.7937, Acc_f: 0.0000, Acc_r: 0.7923


(0.0, 0.7923146250346373)

In [33]:
train_loader, test_loader = get_dataloader(args)
remain_dataset = torch.utils.data.Subset(train_loader.dataset, remain_cls_indices)

for i in range(1,4):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}_{i}.pth'))
    test_by_class(model, test_loader, i=6)
    fisher_model = copy.deepcopy(model)
    fisher_new(remain_dataset, fisher_model)
    test_by_class(fisher_model, test_loader, i=unlearn_class)

Using downloaded and verified file: ../data/svhn/train_32x32.mat
Using downloaded and verified file: ../data/svhn/test_32x32.mat


ValueError: too many values to unpack (expected 2)

In [34]:
Acc_r = 100*np.array([0.8307, 0.8237, 0.7079])
Acc_f = 100*np.array([0.0020, 0.0290, 0.0000])

print(f'Remain {np.mean(Acc_r):.4f}-{np.std(Acc_r):.4f}')
print(f'Forget {np.mean(Acc_f):.4f}-{np.std(Acc_f):.4f}')

Remain 78.7433-5.6311
Forget 1.0333-1.3225


# SalUn

In [35]:
arxiv_name = 'original_model_12-15-02-49'
train_loader, val_loader, test_loader = get_dataloader(args)
remain_train_loader, unlearn_train_loader = split_2_remain_unlearn(args, train_loader)
remain_val_loader, unlearn_val_loader = split_2_remain_unlearn(args, val_loader)
remain_test_loader, unlearn_test_loader = split_2_remain_unlearn(args, test_loader)

remain_class = np.setdiff1d(np.arange(args.num_classes), args.unlearn_class)

Using downloaded and verified file: ../data/svhn/train_32x32.mat
Using downloaded and verified file: ../data/svhn/test_32x32.mat


In [36]:
# create saliency map
def save_gradient_ratio(unlearn_train_loader, model, criterion, args, seed):
    optimizer = torch.optim.SGD(
        model.parameters(),
        args.unlearn_lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay,
    )
    gradients = {}
    model.eval()
    for name, param in model.named_parameters():
        gradients[name] = 0

    for i, (image, target) in enumerate(unlearn_train_loader):
        image = image.cuda()
        target = target.cuda()

        # compute output
        output_clean = model(image)
        loss = - criterion(output_clean, target)

        optimizer.zero_grad()
        loss.backward()

        with torch.no_grad():
            for name, param in model.named_parameters():
                if param.grad is not None:
                    gradients[name] += param.grad.data

    with torch.no_grad():
        for name in gradients:
            gradients[name] = torch.abs_(gradients[name])

    threshold_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

    for i in threshold_list:
        print(i)
        sorted_dict_positions = {}
        hard_dict = {}

        # Concatenate all tensors into a single tensor
        all_elements = - torch.cat([tensor.flatten() for tensor in gradients.values()])

        # Calculate the threshold index for the top 10% elements
        threshold_index = int(len(all_elements) * i)

        # Calculate positions of all elements
        positions = torch.argsort(all_elements)
        ranks = torch.argsort(positions)

        start_index = 0
        for key, tensor in gradients.items():
            num_elements = tensor.numel()
            # tensor_positions = positions[start_index: start_index + num_elements]
            tensor_ranks = ranks[start_index : start_index + num_elements]

            sorted_positions = tensor_ranks.reshape(tensor.shape)
            sorted_dict_positions[key] = sorted_positions

            # Set the corresponding elements to 1
            threshold_tensor = torch.zeros_like(tensor_ranks)
            threshold_tensor[tensor_ranks < threshold_index] = 1
            threshold_tensor = threshold_tensor.reshape(tensor.shape)
            hard_dict[key] = threshold_tensor
            start_index += num_elements

        torch.save(hard_dict, f'./save/{args.dataset}/{args.model_name}/mask_threshold_{seed}_{i}.pt')


args.unlearn_lr=0.01
args.momentum=0.9
args.weight_decay=5e-4
criterion = nn.CrossEntropyLoss()

for i in range(3):
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))
    save_gradient_ratio(unlearn_train_loader, model, criterion, args, args.seeds[i])

0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
1.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
1.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
1.0


In [37]:
from agents.svc_mia import SVC_MIA
indice = remain_train_loader.sampler.indices
neg_size = len(test_loader.sampler) + len(val_loader.sampler.indices)
balanced_indice = np.random.choice(indice, size=neg_size, replace=False)
balanced_sampler = torch.utils.data.SubsetRandomSampler(balanced_indice)
balanced_train_loader = torch.utils.data.DataLoader(remain_train_loader.dataset,
                                                    batch_size=args.batch_size,
                                                    sampler=balanced_sampler)

threshold = 0.8
MIA_acc = np.zeros(3)
for i in range(3):
    print("======="*50)
    mask = torch.load(f'./save/{args.dataset}/{args.model_name}/mask_threshold_{args.seeds[i]}_{threshold}.pt')
    model = get_model(args)
    model.load_state_dict(torch.load(f'./save/{args.dataset}/{args.model_name}/{arxiv_name}_{args.seeds[i]}.pth'))
    sgd_mr_model = deepcopy(model)
    test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)
    optimizer = torch.optim.SGD(sgd_mr_model.parameters(), lr=0.0001)
    sgd_mr_model.train()
    for m in sgd_mr_model.modules():
        if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
            m.eval()

    for ep in range(5):
        for batch, (x, y) in enumerate(unlearn_train_loader):
            x = x.cuda()
            y = torch.from_numpy(np.random.choice(remain_class, size=x.shape[0])).cuda()
            pred_y = sgd_mr_model(x)
            loss = criterion(pred_y, y)
            optimizer.zero_grad()
            loss.backward()
            for name, param in sgd_mr_model.named_parameters():
                if param.grad is not None:
                    param.grad *= mask[name]
            optimizer.step()
        print('[train] epoch {}, batch {}, loss {}'.format(ep, batch, loss))
        test_by_class(sgd_mr_model, test_loader, i=args.unlearn_class)

    MIA_acc[i] =  SVC_MIA(shadow_train=balanced_train_loader, 
            target_train=None, 
            target_test=unlearn_train_loader,
            shadow_test=test_loader, 
            model=sgd_mr_model)
print(f'MIA acc: {MIA_acc.mean():.2f} \pm {MIA_acc.std():.2f}')

0.9151, 0.9202, 0.9140, 0.7359, 0.8803, 0.6774, 0.6606, 0.8841, 0.7578, 0.8176, Acc_f: 0.7359, Acc_r: 0.8252
[train] epoch 0, batch 29, loss 4.167675971984863
0.9753, 0.8763, 0.9253, 0.4511, 0.8910, 0.7110, 0.7137, 0.8945, 0.6048, 0.7298, Acc_f: 0.4511, Acc_r: 0.8135
[train] epoch 1, batch 29, loss 3.2399892807006836
0.9868, 0.8325, 0.9142, 0.3137, 0.8751, 0.6032, 0.6019, 0.8707, 0.4373, 0.5868, Acc_f: 0.3137, Acc_r: 0.7454
[train] epoch 2, batch 29, loss 2.9124772548675537
0.9903, 0.8007, 0.9021, 0.2450, 0.8644, 0.4975, 0.4669, 0.8455, 0.3247, 0.4809, Acc_f: 0.2450, Acc_r: 0.6859
[train] epoch 3, batch 29, loss 2.7284464836120605
0.9908, 0.7847, 0.8908, 0.1978, 0.8533, 0.4308, 0.3698, 0.8222, 0.2633, 0.4119, Acc_f: 0.1978, Acc_r: 0.6464
[train] epoch 4, batch 29, loss 2.646198272705078
0.9914, 0.7829, 0.8817, 0.1759, 0.8498, 0.3872, 0.3217, 0.8093, 0.2223, 0.3599, Acc_f: 0.1759, Acc_r: 0.6229
0.9203, 0.8537, 0.8166, 0.5749, 0.9076, 0.7718, 0.8655, 0.9029, 0.5687, 0.8458, Acc_f: 0.5749

In [38]:
Acc_r = 100*np.array([0.7825,  0.8501,  0.7317])
Acc_f = 100*np.array([0.0416,  0.0281,  0.0371])

print(f'SalUn Acc_f: {Acc_f.mean():.2f} \pm {Acc_f.std():.2f}')
print(f'SalUn Acc_r: {Acc_r.mean():.2f} \pm {Acc_r.std():.2f}')

SalUn Acc_f: 3.56 \pm 0.56
SalUn Acc_r: 78.81 \pm 4.85
