In [1]:
pip install tensorflow

[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
pip install keras

[0mNote: you may need to restart the kernel to use updated packages.


# Data Import and Split

Importing in data of cifar 10 (10 classes, ~60k images). I want to do a 80-10-10 train-test-val split to make sure that I have enough training data since this isn't particularly large but also want to make sure that the proportion of each class stays the same within each set.

In [41]:
import numpy as np

from keras.datasets import cifar10
from keras.utils import to_categorical   
from sklearn.model_selection import train_test_split

def load_cifar10():

    (X_train, y_train), (X_test_val, y_test_val) = cifar10.load_data()
    X_val, X_test, y_val, y_test = train_test_split(X_test_val, y_test_val, test_size=0.5, shuffle=True, stratify=y_test_val)

    return X_train, y_train, X_val, X_test, y_val, y_test


In [42]:
X_train, y_train, X_val, X_test, y_val, y_test = load_cifar10()
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(X_val.shape)
print(y_val.shape)

(50000, 32, 32, 3)
(50000, 1)
(5000, 32, 32, 3)
(5000, 1)
(5000, 32, 32, 3)
(5000, 1)


In [43]:
print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))
print(np.unique(y_val, return_counts=True))

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8), array([5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]))
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8), array([500, 500, 500, 500, 500, 500, 500, 500, 500, 500]))
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8), array([500, 500, 500, 500, 500, 500, 500, 500, 500, 500]))


Now we want to split our dataset into the retain and forget sets. We figure out the single class that we want to "unlearn" through a constant variable for now but can randomize it in future experiments with random.

Then we double check the set size and labels again

In [44]:
def split_to_retain_and_forget(X, y, unlearn_class_label):
    retain_mask = (y != unlearn_class_label).squeeze(axis=1)
    
    X_retain = X[retain_mask]
    y_retain = y[retain_mask]
    
    forget_mask = (y == unlearn_class_label).squeeze(axis=1)
    
    X_forget = X[forget_mask]
    y_forget = y[forget_mask]

    return X_retain, y_retain, X_forget, y_forget

In [45]:
unlearn_class = 6

In [46]:
X_train_retain, y_train_retain, X_train_forget, y_train_forget = split_to_retain_and_forget(X_train, y_train, unlearn_class)

print(X_train_retain.shape)
print(y_train_retain.shape)
print(np.unique(y_train_retain, return_counts=True))

print(X_train_forget.shape)
print(y_train_forget.shape)
print(np.unique(y_train_forget, return_counts=True))


(45000, 32, 32, 3)
(45000, 1)
(array([0, 1, 2, 3, 4, 5, 7, 8, 9], dtype=uint8), array([5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]))
(5000, 32, 32, 3)
(5000, 1)
(array([6], dtype=uint8), array([5000]))


Now let's begin to train our original model

In [60]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

num_filters = 8
filter_size = 3
pool_size = 2

model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(32, 32, 3)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

In [61]:
model.compile(
    'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [65]:
from tensorflow.keras.utils import to_categorical

model.fit(
    X_train,
    to_categorical(y_train),
    epochs = 3,
    validation_data = (X_val, to_categorical(y_val))
)

Epoch 1/3

ValueError: Unexpected result of `train_function` (Empty logs). This could be due to issues in input pipeline that resulted in an empty dataset. Otherwise, please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [None]:
print(X_train[0])