## SISA MODEL DEMO CODE

In [1]:
import os
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import time
from copy import deepcopy
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import warnings
warnings.filterwarnings("ignore")

np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7d93b2f64610>

## Dataset link : https://www.kaggle.com/datasets/dhruvpanchal1/cat-dog-classification


# Load training data (50 cat, 50 dog)

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
train_dir = "/home/dhruv/Documents/AML_/cat_dog_images/train"
test_dir = "/home/dhruv/Documents/AML_/cat_dog_images/test"
cat_train_dir = os.path.join(train_dir, "cat")
dog_train_dir = os.path.join(train_dir, "dog")
cat_test_dir = os.path.join(test_dir, "cat")
dog_test_dir = os.path.join(test_dir, "dog")

# Image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


def load_images(directory, label, is_train=True, max_images=50):
    X, y, image_paths = [], [], []
    folder = os.path.join(directory, "cats" if label == 1 else "dogs")
    for img_name in sorted(os.listdir(folder))[:max_images if is_train else 10]:
        img_path = os.path.join(folder, img_name)
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img).unsqueeze(0)
        X.append(img_tensor)
        y.append(label)
        image_paths.append(img_path)
    return X, y, image_paths

cat_X, cat_y, cat_paths = load_images(train_dir, 1)
dog_X, dog_y, dog_paths = load_images(train_dir, 0)
X_train_tensors = torch.cat(cat_X + dog_X)
y_train = np.array(cat_y + dog_y)
train_paths = cat_paths + dog_paths

# Shuffle training data to ensure mixed classes
indices = np.arange(len(y_train))
np.random.shuffle(indices)
X_train_tensors = X_train_tensors[indices]
y_train = y_train[indices]
train_paths = [train_paths[i] for i in indices]

# Load test data (10 cat, 10 dog)
cat_X_test, cat_y_test, cat_test_paths = load_images(test_dir, 1, is_train=False)
dog_X_test, dog_y_test, dog_test_paths = load_images(test_dir, 0, is_train=False)
X_test_tensors = torch.cat(cat_X_test + dog_X_test)
y_test = np.array(cat_y_test + dog_y_test)
test_paths = cat_test_paths + dog_test_paths



# Feature extraction with ResNet18

In [5]:
resnet = models.resnet18(pretrained=True)
resnet.eval()
feature_extractor = torch.nn.Sequential(*list(resnet.children())[:-1])
feature_extractor.to(device)

def extract_features(X):
    with torch.no_grad():
        X = X.to(device)
        features = feature_extractor(X).squeeze(-1).squeeze(-1).cpu().numpy()
    return features


X_train = extract_features(X_train_tensors)
X_test = extract_features(X_test_tensors)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train model

In [6]:
def train_model(X, y, model=None):
    if model is None:
        model = LogisticRegression(random_state=42)
    start_time = time.time()

    # Skip fitting if only one class
    if len(np.unique(y)) < 2:
        return model, time.time() - start_time
    model.fit(X, y)
    
    return model, time.time() - start_time

# Baseline model

In [7]:
baseline_model, baseline_time = train_model(X_train, y_train)
baseline_acc = accuracy_score(y_test, baseline_model.predict(X_test))
print(f"Baseline training time: {baseline_time:.3f} seconds")
print(f"Baseline test accuracy: {baseline_acc:.3f}")

Baseline training time: 0.003 seconds
Baseline test accuracy: 0.950


# Naive Unlearning Demo

In [8]:
# Remove image #5 (index 4)
remove_idx = 4
print(f"Removing image: {train_paths[remove_idx]}")
X_naive = np.delete(X_train, remove_idx, axis=0)
y_naive = np.delete(y_train, remove_idx)
naive_model, naive_time = train_model(X_naive, y_naive)
naive_acc = accuracy_score(y_test, naive_model.predict(X_test))
print(f"Naive unlearning time (retrain all): {naive_time:.3f} seconds")
print(f"Naive unlearning test accuracy: {naive_acc:.3f}")

Removing image: /home/dhruv/Documents/AML_/cat_dog_images/train/cats/cats_5.jpg
Naive unlearning time (retrain all): 0.001 seconds
Naive unlearning test accuracy: 0.950


# SISA Unlearning

In [9]:
## Defining the parameter
n_shards = 5
n_slices_per_shard = 4
samples_per_shard = len(X_train) // n_shards  # 20 images
samples_per_slice = samples_per_shard // n_slices_per_shard  # 5 images

## Aggregation step
def aggregate_predictions(models, X):
    predictions = np.array([model.predict(X) for model in models])
    return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)


# Sharding
shards = []
for i in range(n_shards):
    start = i * samples_per_shard
    end = start + samples_per_shard
    shards.append((X_train[start:end], y_train[start:end]))


# Isolation & Slicing
sub_models = []
checkpoints = []
for shard_X, shard_y in shards:
    model = LogisticRegression(random_state=42)
    shard_checkpoints = []
    for slice_idx in range(n_slices_per_shard):
        slice_start = slice_idx * samples_per_slice
        slice_end = slice_start + samples_per_slice
        slice_X = shard_X[slice_start:slice_end]
        slice_y = shard_y[slice_start:slice_end]
        model, _ = train_model(slice_X, slice_y, model)
        shard_checkpoints.append(deepcopy(model))
    sub_models.append(model)
    checkpoints.append(shard_checkpoints)


# Baseline SISA evaluation
sisa_predictions = aggregate_predictions(sub_models, X_test)
sisa_acc = accuracy_score(y_test, sisa_predictions)
print(f"SISA baseline test accuracy: {sisa_acc:.3f}")

SISA baseline test accuracy: 0.750


# SISA Unlearning Demo

In [10]:
# SISA Unlearning: Remove image #5
start_time = time.time()
photo_shard = remove_idx // samples_per_shard  # Shard 0
photo_slice = (remove_idx % samples_per_shard) // samples_per_slice  # Slice 0
photo_local_idx = remove_idx % samples_per_slice  # Local index 4

# Retrain affected shard
shard_X, shard_y = shards[photo_shard]
slice_start = photo_slice * samples_per_slice
slice_end = slice_start + samples_per_slice
slice_X = np.delete(shard_X[slice_start:slice_end], photo_local_idx, axis=0)
slice_y = np.delete(shard_y[slice_start:slice_end], photo_local_idx)

# Start from checkpoint or fresh model
model = deepcopy(checkpoints[photo_shard][photo_slice - 1]) if photo_slice > 0 else LogisticRegression(random_state=42)
model, _ = train_model(slice_X, slice_y, model)

# Continue remaining slices
for slice_idx in range(photo_slice + 1, n_slices_per_shard):
    slice_start = slice_idx * samples_per_slice
    slice_end = slice_start + samples_per_slice
    slice_X = shard_X[slice_start:slice_end]
    slice_y = shard_y[slice_start:slice_end]
    model, _ = train_model(slice_X, slice_y, model)

# Update sub-models

In [11]:
sub_models[photo_shard] = model
sisa_unlearn_time = time.time() - start_time
sisa_unlearn_predictions = aggregate_predictions(sub_models, X_test)
sisa_unlearn_acc = accuracy_score(y_test, sisa_unlearn_predictions)
print(f"SISA unlearning time (retrain shard {photo_shard}, slice {photo_slice}+): {sisa_unlearn_time:.3f} seconds")
print(f"SISA unlearning test accuracy: {sisa_unlearn_acc:.3f}")

SISA unlearning time (retrain shard 0, slice 0+): 0.013 seconds
SISA unlearning test accuracy: 0.750


# Sample predictions

In [12]:
print("\nSample predictions on test images ([Dog, Cat]):")
sample_test_idx = [0, 10]  # First dog, first cat
sample_X_test = X_test[sample_test_idx]
sample_y_test = y_test[sample_test_idx]
sample_paths = [test_paths[i] for i in sample_test_idx]
naive_preds = naive_model.predict(sample_X_test)
sisa_preds = aggregate_predictions(sub_models, sample_X_test)
print(f"Test images: {sample_paths}")
print(f"Naive Unlearning: {naive_preds} (0=dog, 1=cat)")
print(f"SISA Unlearning: {sisa_preds} (0=dog, 1=cat)")


Sample predictions on test images ([Dog, Cat]):
Test images: ['/home/dhruv/Documents/AML_/cat_dog_images/test/cats/cat_1.jpg', '/home/dhruv/Documents/AML_/cat_dog_images/test/dogs/dog_1.jpg']
Naive Unlearning: [1 0] (0=dog, 1=cat)
SISA Unlearning: [1 0] (0=dog, 1=cat)
