<a href="https://colab.research.google.com/github/nem-42098/SL_Proj_Unlearning/blob/main/Unlearn_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# !git clone https://github.com/nem-42098/SL_Proj_Unlearning.git
# import os
# os.chdir('/content/SL_Proj_Unlearning')

In [2]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import models
import numpy as np


### Load Pre-Trained VGG network
> #### https://github.com/chenyaofo
> ### Note: There is some issue with using Batch Norm before ReLu as it creates a bias in the network. So people exchange the order between the two for tackling the bias

In [3]:
### First time when you wan to download the model
device=torch.device('cuda')
model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_vgg11_bn", pretrained=True)
model
# model=model.to(device)
### For future uses:Loading from the local

# model_1=torch.hub.load("C:/Users/nmura/.cache/torch/hub/chenyaofo_pytorch-cifar-models_master",'hubconf.py',source='local')



Using cache found in C:\Users\nmura/.cache\torch\hub\chenyaofo_pytorch-cifar-models_master


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

### Check which pre-trained model are available:

In [4]:
torch.hub.list("chenyaofo/pytorch-cifar-models", force_reload=True)

Downloading: "https://github.com/chenyaofo/pytorch-cifar-models/zipball/master" to C:\Users\nmura/.cache\torch\hub\master.zip


['cifar100_mobilenetv2_x0_5',
 'cifar100_mobilenetv2_x0_75',
 'cifar100_mobilenetv2_x1_0',
 'cifar100_mobilenetv2_x1_4',
 'cifar100_repvgg_a0',
 'cifar100_repvgg_a1',
 'cifar100_repvgg_a2',
 'cifar100_resnet20',
 'cifar100_resnet32',
 'cifar100_resnet44',
 'cifar100_resnet56',
 'cifar100_shufflenetv2_x0_5',
 'cifar100_shufflenetv2_x1_0',
 'cifar100_shufflenetv2_x1_5',
 'cifar100_shufflenetv2_x2_0',
 'cifar100_vgg11_bn',
 'cifar100_vgg13_bn',
 'cifar100_vgg16_bn',
 'cifar100_vgg19_bn',
 'cifar100_vit_b16',
 'cifar100_vit_b32',
 'cifar100_vit_h14',
 'cifar100_vit_l16',
 'cifar100_vit_l32',
 'cifar10_mobilenetv2_x0_5',
 'cifar10_mobilenetv2_x0_75',
 'cifar10_mobilenetv2_x1_0',
 'cifar10_mobilenetv2_x1_4',
 'cifar10_repvgg_a0',
 'cifar10_repvgg_a1',
 'cifar10_repvgg_a2',
 'cifar10_resnet20',
 'cifar10_resnet32',
 'cifar10_resnet44',
 'cifar10_resnet56',
 'cifar10_shufflenetv2_x0_5',
 'cifar10_shufflenetv2_x1_0',
 'cifar10_shufflenetv2_x1_5',
 'cifar10_shufflenetv2_x2_0',
 'cifar10_vgg11_bn

### Downlaoding the Dataset and Creating the Dataloaders

In [5]:
### Transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
       (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
    )
])
### Pytorch Datasets
train_dataset = torchvision.datasets.CIFAR10(
    root= './data', train = True,
    download =True, transform = transform)
test_dataset = torchvision.datasets.CIFAR10(
    root= './data', train = False,
    download =True, transform = transform)
### Dataloaders
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
len(train_dataset)

50000

### Create the Forget Dataset

In [7]:
# Define the classes
classes = ['forget', 'retain']

# Create a dictionary to store datasets for each class
class_datasets = {class_name: [] for class_name in classes}

# Iterate through the CIFAR-10 dataset and split it into class-specific subsets
for image, label in train_dataset:
  if label == 1:
    class_datasets['forget'].append((image, label))

  else:
      class_datasets['retain'].append((image, label))

# You now have class-specific subsets in the class_datasets dict


#### Forget and Retain

In [8]:
# Class split
retain_dataloader = torch.utils.data.DataLoader(class_datasets['retain'], batch_size=128, shuffle=True, num_workers=2)
forget_dataloader = torch.utils.data.DataLoader(class_datasets['forget'], batch_size=128, shuffle=True, num_workers=2)

# Random split
# train_split_dataset,forget_split_dataset=torch.utils.data.random_split(train_dataset,lengths=[45000,5000])
# retain_dataloader = torch.utils.data.DataLoader(train_split_dataset,  batch_size=128, shuffle=True, num_workers=2)
# forget_dataloader = torch.utils.data.DataLoader(forget_split_dataset, batch_size=128, shuffle=True, num_workers=2)

# Unlearner class


## Check that dumb model output random values
We expect this values to be uniform in [0,9], thus accuracy to be 10%.

In [9]:
from tools.Unlearner_FM import Unlearner_FM
torch.cuda.empty_cache()
unlearner = Unlearner_FM(0.02,model, lr = 1e-6)


## Run the unlearning


In [10]:
### Getting the new model masked model

new_model, mask_index,num_of_param=unlearner.Fisher_Masking(retain_dataloader,forget_dataloader)

OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 6.00 GiB total capacity; 5.24 GiB already allocated; 0 bytes free; 5.27 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

We can see that the unlearning process may need many epochs in the erasure phase to converge. We could also try to increase the learning rate for faster convergence.

In [None]:
import pandas as pd
import seaborn as sns

log_df = pd.DataFrame.from_records(unlearner.log, columns=['phase', 'epoch', 'batch', 'tp', 'n', 'loss'])
unlearn_history = log_df.groupby(['phase', 'epoch']).agg({'tp':sum, 'n':sum, 'loss': 'mean'}).reset_index()
unlearn_history['accuracy'] = unlearn_history.tp / unlearn_history.n
unlearn_history.loc[unlearn_history.phase == 'erasure', 'loss_scale'] = unlearn_history.loc[unlearn_history.phase == 'erasure', 'loss'] /unlearn_history.loc[unlearn_history.phase == 'erasure', 'loss'].max() 
unlearn_history.loc[unlearn_history.phase == 'retrain', 'loss_scale'] = unlearn_history.loc[unlearn_history.phase == 'retrain', 'loss'] /unlearn_history.loc[unlearn_history.phase == 'retrain', 'loss'].max() 

sns.lineplot(data = unlearn_history, x = 'epoch', y='loss_scale', hue='phase')

## Check model performance
We now try check the model performance on the retain and forget set. 

In [None]:
models = {'original':model, 'dumb':unlearner.dumb_model, 'erased':unlearner.erased_model, 'retrained':unlearner.retrained_model}
dataloaders = {'retain':retain_dataloader, 'forget':forget_dataloader, 'test':testloader}

performances = []
for model_name, m in models.items():
  for dl_name, dl in dataloaders.items():
    acc = test(m, dl)
    performances.append((model_name, dl_name, acc))

Strangely the dumb network has 0% performance on the forget set, and this may have a negative impact, note that the performance on the forget set is even too low (we expected 10%, not less).

Outside of that we note that this procedure works great for the retain and test which are fitted very nicely, and we can even see that there's a big leap in performance from the erased model to the retrained model.

In [None]:
perf_df = pd.DataFrame.from_records(performances, columns=['model', 'data_partition', 'accuracy'])

tb = pd.pivot_table(perf_df, index='data_partition', columns='model', values='accuracy')
sns.heatmap(tb, annot = True, fmt='.2%')