In [1]:
import os
import copy
import time
import random
import subprocess

import numpy as np
import pandas as pd
from PIL import Image

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from torch.utils.data import Dataset, DataLoader

In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

def parameter_count(model):
    total_count = 0
    trainable_count = 0
    for p in model.parameters():
        total_count += torch.prod(torch.tensor(p.shape)).item()
        if p.requires_grad:
            trainable_count += torch.prod(torch.tensor(p.shape)).item()

    return total_count, trainable_count

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Device = {device}")

Device = cuda


In [3]:
class MovingAverage:
    def __init__(self, name, rd=4):
        self.name = name
        # avg value
        self.val = 0.0
        self.sum = 0.0
        self.count = 0
        self.rd = rd

    def update(self, x):
        self.sum += x
        self.count += 1

        # update self.value
        self.val = round(self.sum / self.count, self.rd)

    def value(self) -> float:
        return self.val

In [4]:
class HiddenDataset(Dataset):
    def __init__(self, df, base_dir):
        super().__init__()
        df['image_path'] = df['image_id'].apply(lambda x: os.path.join(base_dir,'images', x.split('-')[0], x.split('-')[1] + '.png'))
        self.df = df

        # read the images at the init only
        # self.images = [
        #    torch.tensor(np.transpose(np.array(Image.open(x).convert('RGB')), [2, 0, 1])) for x in self.df['image_path'].tolist()
        # ]
        self.images = [torchvision.io.read_image(x) for x in self.df['image_path'].tolist()]

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        image = self.images[index]
        age = self.df['age_group'].iloc[index]
        return image, age

In [5]:
if os.path.exists('/kaggle/input/neurips-2023-machine-unlearning/empty.txt'):
    # save the file while saving the version
    # subprocess.run('touch submission.zip', shell=True)
    base_dir = "/kaggle/input/mock-cifar10-data"
    num_checkpoints = 10
    real_run = False
else:
    # this part will run when we submit to kaggle.
    base_dir = "/kaggle/input/neurips-2023-machine-unlearning/"
    num_checkpoints = 512
    real_run = True

In [6]:
os.makedirs('/kaggle/tmp', exist_ok=True)


print(f"Initializing the model")
model = resnet18(weights=None, num_classes=10)
original_path = os.path.join(base_dir, 'original_model.pth')
print(f"Loading the model from checkpoint = {original_path}")
model.load_state_dict(torch.load(original_path))
model.to(device)

retain_df = pd.read_csv(os.path.join(base_dir, "retain.csv"))
forget_df = pd.read_csv(os.path.join(base_dir, "forget.csv"))
validation_df = pd.read_csv(os.path.join(base_dir, "validation.csv"))

print(f"Initializing the retain dataset")
retain_dataset = HiddenDataset(retain_df, base_dir)

print(f"Initializing the forget dataset")
forget_dataset = HiddenDataset(forget_df, base_dir)

print(f"Initializing the validation dataset")
validation_dataset = HiddenDataset(validation_df, base_dir)

print(f"length of retain dataset = {len(retain_dataset)}")
print(f"length of forget dataset = {len(forget_dataset)}")
print(f"length of validation dataset = {len(validation_dataset)}")

Initializing the model
Loading the model from checkpoint = /kaggle/input/mock-cifar10-data/original_model.pth
Initializing the retain dataset
Initializing the forget dataset
Initializing the validation dataset
length of retain dataset = 27440
length of forget dataset = 560
length of validation dataset = 3500


In [7]:
def calculate_accuracy(model_init, dataloader, device):
    model_init.eval()
    gt = np.array([])
    pred = np.array([])
    with torch.no_grad():
        for X, y in dataloader:
            X = X.float().to(device)
            y = y.long().to(device)

            out = model_init(X)
            y_pred = torch.argmax(out, dim=1)

            gt = np.append(gt, y.cpu().numpy())
            pred = np.append(pred, y_pred.cpu().numpy())

    acc = round(float(np.mean(gt == pred)), 6)

    return acc


def unlearning(
    model,
    retain_loader,
    forget_loader,
    validation_loader,
    device
):
    epochs = 1

    # evaluate first
    retain_acc = calculate_accuracy(model, retain_loader, device)
    forget_acc = calculate_accuracy(model, forget_loader, device)
    validation_acc = calculate_accuracy(model, validation_loader, device)

    print(f"Initial retain acc = {retain_acc}, forget acc = {forget_acc}, validation acc = {validation_acc}")

    # freeze the layers till fc
#     for name, p in model.named_parameters():
#         if "fc" not in name:
#             p.requires_grad = False

    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)
    # set train
    model.train()

    # epoch
    for X_retain, y_retain in retain_loader:
        # change
        X_retain = X_retain.float().to(device)
        y_retain = y_retain.long().to(device)

        optimizer.zero_grad()
        out_retain = model(X_retain)
        loss = loss_fn(out_retain, y_retain)
        loss.backward()
        optimizer.step()

    # add noise
    std = 1e-3
    for p in model.parameters():
        # 1e-4 is the standard deviation
        noise = std * torch.randn_like(p.data)
        p.data = p.data + noise

    # evaluate now
    retain_acc_update = calculate_accuracy(model, retain_loader, device)
    forget_acc_update = calculate_accuracy(model, forget_loader, device)
    validation_acc_update = calculate_accuracy(model, validation_loader, device)

    print(f"After scrub retain acc = {retain_acc_update}, forget acc = {forget_acc_update}, validation acc = {validation_acc_update}")


In [8]:
T1 = time.time()

batch_size = 64
retain_loader = DataLoader(retain_dataset, batch_size=batch_size, shuffle=True)
forget_loader = DataLoader(forget_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

for sd in range(num_checkpoints):
    TS1 = time.time()
    print(f"Running for checkpoint = {sd}")
    final_model = copy.deepcopy(model)
    unlearning(final_model, retain_loader, forget_loader, validation_loader, device)
    # save it as half as there can be space issue.
    # https://www.kaggle.com/competitions/neurips-2023-machine-unlearning/discussion/441758
    # 32 precision model is 43MB and 16 precision model is 22MB. 
    state = final_model.half().state_dict()
    torch.save(state, f'/kaggle/tmp/unlearned_checkpoint_{sd}.pth')
    TS2 = time.time()
    print(f"Time taken = {round(TS2 - TS1, 3)} seconds")

T2 = time.time()
timetaken_models = round(T2 - T1, 3)
print(f"Total timetaken to run the {num_checkpoints} models is = {timetaken_models} seconds")

Running for checkpoint = 0
Initial retain acc = 0.979701, forget acc = 0.985714, validation acc = 0.725714
After scrub retain acc = 0.995809, forget acc = 0.996429, validation acc = 0.744571
Time taken = 18.838 seconds
Running for checkpoint = 1
Initial retain acc = 0.979701, forget acc = 0.985714, validation acc = 0.725714
After scrub retain acc = 0.997413, forget acc = 0.996429, validation acc = 0.750571
Time taken = 13.749 seconds
Running for checkpoint = 2
Initial retain acc = 0.979701, forget acc = 0.985714, validation acc = 0.725714
After scrub retain acc = 0.997668, forget acc = 0.996429, validation acc = 0.749714
Time taken = 13.715 seconds
Running for checkpoint = 3
Initial retain acc = 0.979701, forget acc = 0.985714, validation acc = 0.725714
After scrub retain acc = 0.997303, forget acc = 1.0, validation acc = 0.752
Time taken = 13.635 seconds
Running for checkpoint = 4
Initial retain acc = 0.979701, forget acc = 0.985714, validation acc = 0.725714
After scrub retain acc = 

In [9]:
T3 = time.time()
# Ensure that submission.zip will contain exactly num_checkpoints 
# (if this is not the case, an exception will be thrown).
unlearned_ckpts = os.listdir('/kaggle/tmp')
if len(unlearned_ckpts) != num_checkpoints:
    raise RuntimeError(f'Expected exactly {num_checkpoints} checkpoints. The submission will throw an exception otherwise.')

subprocess.run('zip submission.zip /kaggle/tmp/*.pth', shell=True)
T4 = time.time()
zip_time_taken = round(T4 - T3, 3)
print(f"Total time taken to zip the {num_checkpoints} models is = {zip_time_taken} seconds")


  adding: kaggle/tmp/unlearned_checkpoint_0.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_1.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_2.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_3.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_4.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_5.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_6.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_7.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_8.pth (deflated 7%)
  adding: kaggle/tmp/unlearned_checkpoint_9.pth (deflated 7%)
Total time taken to zip the 10 models is = 10.032 seconds
