In [1]:
import torch
import torch.nn.functional as F
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

In [2]:
#!pip install matplotlib
#!pip install scikit-learn
#!pip install pytorch-lightning
#!pip install wandb
#!pip install pandas

In [3]:
# Set seeds and device
seed = 2
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

Using cuda


In [4]:
GITHUB_TOKEN = "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"
USER = "mkraehenmann"
CLONE_URL = f"https://{USER}:{GITHUB_TOKEN}@github.com/{USER}/machine-unlearning.git"
get_ipython().system(f"git clone {CLONE_URL}")

fatal: destination path 'machine-unlearning' already exists and is not an empty directory.


In [5]:
import sys
sys.path.append("machine-unlearning")

# Load Datasets

In [6]:
from machine_unlearning.datasets import get_datasets

DATASET = "cifar10"
path = "/kaggle/input/utkface-new/UTKFace"

train_dataset, val_dataset = get_datasets(DATASET, path, test_size=0.1, augment=True)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
from machine_unlearning.datasets import get_forget_retain_sets

with open('forget_set.txt', 'r') as fp:
    forget_indices = list(map(int, fp.read().split(',')))
    
forget_set, train_dataset = get_forget_retain_sets(DATASET, train_dataset, forget_indices=forget_indices)

In [8]:
from torch.utils.data import DataLoader

BATCH_SIZE = 128

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Pytorch Lightning Model

In [9]:
from machine_unlearning.models.resnet import ResNet18
from machine_unlearning.setups import get_cfg

SETUP = "train"

cfg = get_cfg(SETUP, DATASET)

cfg["data"] = eval(f"{SETUP}_loader")
cfg["val_data"] = val_loader

In [10]:
from machine_unlearning.utils import resolve_model, resolve_classes, resolve_loss

# Custom cfg
cfg = {
        "model": resolve_model(DATASET),
        "dataset": DATASET,
        "setup": "train",
        "loss": resolve_loss(DATASET),
        "num_classes": resolve_classes(DATASET),
        "only_train_fc": False,
        "optimizer": "sgd",
        "optimizer_lr": 0.1,
        "optimizer_momentum": 0.9,
        "optimizer_weight_decay": 5e-4,
        "lr_scheduler": "multistep",
        "milestones": [35, 70, 90],
        "epochs": 100,
        "data": eval(f"{SETUP}_loader"),
        "val_data": val_loader
    }

In [11]:
from machine_unlearning.experiments import Experiment

model = Experiment(cfg)

In [12]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, Callback, EarlyStopping

checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='/kaggle/working/',
    filename='{epoch:02d}-{val_loss:.2f}'
)

class PrintCallback(pl.Callback):
    def on_train_epoch_end(self, trainer, pl_module):
        print(f"Training loss: {trainer.callback_metrics['train_loss']}, Accuracy: {trainer.callback_metrics['train_accuracy']}")
        
    def on_validation_end(self, trainer, pl_module):
        print(f"Validation loss: {trainer.callback_metrics['val_loss']}, Accuracy: {trainer.callback_metrics['val_accuracy']}")

# Setup Logger

In [13]:
import wandb

os.environ["WANDB_API_KEY"] = "38ae9d0469697c6ccfcd59c7c7d5252f0b2318f4"
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33mmax-kraehenmann[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [14]:
import random
from pytorch_lightning.loggers import WandbLogger

wandb_logger = WandbLogger(
    log_model=False, 
    project="machine-unlearning", 
    name=f"{random.randint(1000, 10000)}_{cfg['epochs']}epochs_{cfg['setup']}setup_{DATASET}"
)
wandb_logger.experiment.config.update(cfg)
trainer = pl.Trainer(accelerator="gpu", max_epochs=cfg["epochs"], logger=wandb_logger, callbacks=[PrintCallback()])#, EarlyStopping(monitor="val_loss", mode="min")])

[34m[1mwandb[0m: Currently logged in as: [33mmax-kraehenmann[0m. Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


# Training

In [None]:
trainer.fit(model, cfg["data"], cfg["val_data"])

You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | model | ResNet           | 11.2 M
1 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.696    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Validation loss: 2.3007607460021973, Accuracy: 0.11328125


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation loss: 1.6205717325210571, Accuracy: 0.38670000433921814
Training loss: 1.698501467704773, Accuracy: 0.25606122612953186


In [None]:
wandb.finish()
torch.save(model.state_dict(), f'models/resnet18_seed{seed}.pt')