# Shift maliciousness (dSprites)

**Author**: Maleakhi A. Wijaya  
**Description**: Inspect the maliciousness of different type and intensity of shifts.

In [15]:
# Load utilities functions
%run ../../scripts/constants.py
%run ../../scripts/dsprites_utils.py
%run ../../scripts/shift_applicator.py
%run ../../scripts/shift_dimensionality_reductor.py
%run ../../scripts/experiment_utils.py
%run ../../scripts/shift_statistical_test.py
from sklearn.metrics import accuracy_score

In [2]:
## Random seed
SEED = 20
np.random.seed(SEED)
tf.random.set_seed(SEED)

## Load dataset

In [3]:
path = "../../data/dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz"
X_train, X_test, y_train, y_test, c_train, c_test = load_dsprites(path, 100000, DatasetTask.Task1, train_size=0.85, class_index=1)
n_classes_shape = 3
concept_names = ["color", "shape", "scale", "rotation", "x", "y"]
concept_values = get_latent_sizes()

# Split training into validation set as well 
X_train, X_valid = X_train[:70000], X_train[70000:]
y_train, y_valid = y_train[:70000], y_train[70000:]
c_train, c_valid = c_train[:70000], c_train[70000:]

Training samples: 85000
Testing samples: 15000


In [4]:
# Load adversarial samples (we will need this for adversarial
# shift).
adv_samples = np.load("../../data/adversarial_samples/X_adversarial_dsprites.npy")

In [5]:
# Reshape to appropriate shift input
# It is noteworthy that for efficiency, we represent the images as only 2 dimension
# when we preprocessing (number of instances/ batch size * flatten size).
# When visualising back the image, we need to reshape it back to the original dimension
ORIGINAL_SHAPE = X_test.shape[1:] # constant hold the image original shape
X_test_flatten = deepcopy(X_test.reshape(X_test.shape[0], -1))
X_train_flatten = deepcopy(X_train.reshape(X_train.shape[0], -1))
X_valid_flatten = deepcopy(X_valid.reshape(X_valid.shape[0], -1))

## Load models & original prediction results

In [10]:
## Load original end-to-end and input-to-concept models
# End to end model
ffnn_path = "../../models/end_to_end_dsprites_task1"
end_to_end_model = tf.keras.models.load_model(ffnn_path)

# Input to concept model
itc_path = "../../models/multitask_dsprites"
itc_model = tf.keras.models.load_model(itc_path)

In [17]:
# Evaluate model (FFNN)
y_pred = end_to_end_model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 0.9998666666666667


In [18]:
# Evaluate model (concept bottleneck model)
for i, pred in enumerate(itc_model.predict(X_test)):
    print("*"*20, f"Model: {DSPRITES_CONCEPT_NAMES[i]}", "*"*20)
    c_truth = c_test[:, i]
    c_pred = np.argmax(pred, axis=1)
    
    print(f"Accuracy: {accuracy_score(c_truth, c_pred)}")

******************** Model: color ********************
Accuracy: 1.0
******************** Model: shape ********************
Accuracy: 1.0
******************** Model: scale ********************
Accuracy: 0.9997333333333334
******************** Model: rotation ********************
Accuracy: 0.5332666666666667
******************** Model: x ********************
Accuracy: 0.8658666666666667
******************** Model: y ********************
Accuracy: 0.8815333333333333


## Gaussian shift

In [19]:
shift_intensities = [ShiftIntensity.Small, ShiftIntensity.Medium, ShiftIntensity.Large]
X_gaussian = []

for shift_intensity in shift_intensities:
    X_shifted = deepcopy(X_test_flatten)
    X_shifted, _ = apply_gaussian_shift(X_shifted, y_test, shift_intensity, 1.0)
    X_shifted = X_shifted.reshape(-1, ORIGINAL_SHAPE[0], ORIGINAL_SHAPE[1], ORIGINAL_SHAPE[2])
    X_gaussian.append(X_shifted)

In [20]:
# Print accuracy score (end-to-end)
for x, intensity_str in zip(X_gaussian, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    y_pred = end_to_end_model.predict(x)
    y_pred = np.argmax(y_pred, axis=1)
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

******************** small ********************
Accuracy: 0.9999333333333333
******************** medium ********************
Accuracy: 0.9847333333333333
******************** large ********************
Accuracy: 0.3394


In [22]:
# Print accuracy score (cbm)
for x, intensity_str in zip(X_gaussian, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    for i, pred in enumerate(itc_model.predict(x)):
        print("*"*10, f"Model: {DSPRITES_CONCEPT_NAMES[i]}", "*"*10)
        c_truth = c_test[:, i]
        c_pred = np.argmax(pred, axis=1)

        print(f"Accuracy: {accuracy_score(c_truth, c_pred)}")
    print()

******************** small ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.9999333333333333
********** Model: scale **********
Accuracy: 0.9993333333333333
********** Model: rotation **********
Accuracy: 0.5326
********** Model: x **********
Accuracy: 0.8659333333333333
********** Model: y **********
Accuracy: 0.8813333333333333

******************** medium ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.9963333333333333
********** Model: scale **********
Accuracy: 0.6630666666666667
********** Model: rotation **********
Accuracy: 0.49246666666666666
********** Model: x **********
Accuracy: 0.8636666666666667
********** Model: y **********
Accuracy: 0.8741333333333333

******************** large ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.3536666666666667
********** Model: scale

## Knockout shift

In [23]:
shift_intensities = [ShiftIntensity.Small, ShiftIntensity.Medium, ShiftIntensity.Large]
X_ko = []
y_ko = []
c_ko = []

for shift_intensity in shift_intensities:
    X_shifted, y_shifted, c_shifted = deepcopy(X_test_flatten), deepcopy(y_test), deepcopy(c_test)
    X_shifted, y_shifted, c_shifted = apply_ko_shift(X_shifted, y_shifted, c_shifted, shift_intensity, cl=MAJORITY)
    X_shifted = X_shifted.reshape(-1, ORIGINAL_SHAPE[0], ORIGINAL_SHAPE[1], ORIGINAL_SHAPE[2])
    X_ko.append(X_shifted)
    y_ko.append(y_shifted)
    c_ko.append(c_shifted)

In [24]:
# Print accuracy score (end-to-end)
for x, y, c, intensity_str in zip(X_ko, y_ko, c_ko, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    y_pred = end_to_end_model.predict(x)
    y_pred = np.argmax(y_pred, axis=1)
    print(f"Accuracy: {accuracy_score(y, y_pred)}")

******************** small ********************
Accuracy: 0.9999309868875086
******************** medium ********************
Accuracy: 0.9999197045126064
******************** large ********************
Accuracy: 0.9998990816429508


In [27]:
# Print accuracy score (cbm)
for x, y, c, intensity_str in zip(X_ko, y_ko, c_ko, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    for i, pred in enumerate(itc_model.predict(x)):
        print("*"*10, f"Model: {DSPRITES_CONCEPT_NAMES[i]}", "*"*10)
        c_truth = c[:, i]
        c_pred = np.argmax(pred, axis=1)

        print(f"Accuracy: {accuracy_score(c_truth, c_pred)}")
    print()

******************** small ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 1.0
********** Model: scale **********
Accuracy: 0.9997239475500345
********** Model: rotation **********
Accuracy: 0.5318150448585232
********** Model: x **********
Accuracy: 0.8612146307798482
********** Model: y **********
Accuracy: 0.8773636991028295

******************** medium ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 1.0
********** Model: scale **********
Accuracy: 0.9996788180504256
********** Model: rotation **********
Accuracy: 0.5203147583105829
********** Model: x **********
Accuracy: 0.8385257748514533
********** Model: y **********
Accuracy: 0.8573149189015578

******************** large ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 1.0
********** Model: scale **********
Accuracy: 0.9996972449

## Concept shift

In [28]:
shift_intensities = [ShiftIntensity.Small, ShiftIntensity.Medium, ShiftIntensity.Large]
X_cs = []
y_cs = []
c_cs = []

for shift_intensity in shift_intensities:
    X_shifted, y_shifted, c_shifted = deepcopy(X_test_flatten), deepcopy(y_test), deepcopy(c_test)
    X_shifted, y_shifted, c_shifted = apply_concept_shift(X_shifted, y_shifted, c_shifted, 2, shift_intensity, cl=MAJORITY)
    X_shifted = X_shifted.reshape(-1, ORIGINAL_SHAPE[0], ORIGINAL_SHAPE[1], ORIGINAL_SHAPE[2])
    X_cs.append(X_shifted)
    y_cs.append(y_shifted)
    c_cs.append(c_shifted)

In [29]:
# Print accuracy score (end-to-end)
for x, y, c, intensity_str in zip(X_cs, y_cs, c_cs, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    y_pred = end_to_end_model.predict(x)
    y_pred = np.argmax(y_pred, axis=1)
    print(f"Accuracy: {accuracy_score(y, y_pred)}")

******************** small ********************
Accuracy: 0.9998643699986437
******************** medium ********************
Accuracy: 0.9998543441846915
******************** large ********************
Accuracy: 0.9998395121168352


In [30]:
# Print accuracy score (cbm)
for x, y, c, intensity_str in zip(X_cs, y_cs, c_cs, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    for i, pred in enumerate(itc_model.predict(x)):
        print("*"*10, f"Model: {DSPRITES_CONCEPT_NAMES[i]}", "*"*10)
        c_truth = c[:, i]
        c_pred = np.argmax(pred, axis=1)

        print(f"Accuracy: {accuracy_score(c_truth, c_pred)}")
    print()

******************** small ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 1.0
********** Model: scale **********
Accuracy: 0.9997287399972874
********** Model: rotation **********
Accuracy: 0.5323477553234776
********** Model: x **********
Accuracy: 0.8653194086531941
********** Model: y **********
Accuracy: 0.8813237488132375

******************** medium ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 1.0
********** Model: scale **********
Accuracy: 0.9997086883693832
********** Model: rotation **********
Accuracy: 0.5285121258466244
********** Model: x **********
Accuracy: 0.8641759522248926
********** Model: y **********
Accuracy: 0.8802709198164737

******************** large ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 1.0
********** Model: scale **********
Accuracy: 0.9999197560

## Image shift

In [31]:
shift_intensities = [ShiftIntensity.Small, ShiftIntensity.Medium, ShiftIntensity.Large]
X_img = []

for shift_intensity in shift_intensities:
    X_shifted = deepcopy(X_test_flatten)
    X_shifted, _ = apply_img_shift(X_shifted, y_test, ORIGINAL_SHAPE,
                                   shift_intensity, 1.0, ShiftType.All)
    X_shifted = X_shifted.reshape(-1, ORIGINAL_SHAPE[0], ORIGINAL_SHAPE[1], ORIGINAL_SHAPE[2])
    X_img.append(X_shifted)

In [32]:
# Print accuracy score (end-to-end)
for x, intensity_str in zip(X_img, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    y_pred = end_to_end_model.predict(x)
    y_pred = np.argmax(y_pred, axis=1)
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

******************** small ********************
Accuracy: 0.989
******************** medium ********************
Accuracy: 0.8778666666666667
******************** large ********************
Accuracy: 0.6262666666666666


In [33]:
# Print accuracy score (cbm)
for x, intensity_str in zip(X_img, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    for i, pred in enumerate(itc_model.predict(x)):
        print("*"*10, f"Model: {DSPRITES_CONCEPT_NAMES[i]}", "*"*10)
        c_truth = c_test[:, i]
        c_pred = np.argmax(pred, axis=1)

        print(f"Accuracy: {accuracy_score(c_truth, c_pred)}")
    print()

******************** small ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.9872666666666666
********** Model: scale **********
Accuracy: 0.8312666666666667
********** Model: rotation **********
Accuracy: 0.15386666666666668
********** Model: x **********
Accuracy: 0.1884
********** Model: y **********
Accuracy: 0.18406666666666666

******************** medium ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.8495333333333334
********** Model: scale **********
Accuracy: 0.5410666666666667
********** Model: rotation **********
Accuracy: 0.0336
********** Model: x **********
Accuracy: 0.03286666666666667
********** Model: y **********
Accuracy: 0.05413333333333333

******************** large ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.5819333333333333
********** Model: scale ********

## Adversarial shift

In [35]:
shift_intensities = [ShiftIntensity.Small, ShiftIntensity.Medium, ShiftIntensity.Large]
X_adv = []

for shift_intensity in shift_intensities:
    X_shifted = deepcopy(X_test_flatten)
    X_shifted, _ = apply_adversarial_shift(X_shifted, y_test, adv_samples,
                                   range(len(X_shifted)), shift_intensity)
    X_shifted = X_shifted.reshape(-1, ORIGINAL_SHAPE[0], ORIGINAL_SHAPE[1], ORIGINAL_SHAPE[2])
    X_adv.append(X_shifted)

In [37]:
# Print accuracy score (end-to-end)
for x, intensity_str in zip(X_adv, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    y_pred = end_to_end_model.predict(x)
    y_pred = np.argmax(y_pred, axis=1)
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

******************** small ********************
Accuracy: 0.9582
******************** medium ********************
Accuracy: 0.7922
******************** large ********************
Accuracy: 0.5861333333333333


In [38]:
# Print accuracy score (cbm)
for x, intensity_str in zip(X_adv, ["small", "medium", "large"]):
    print("*"*20, intensity_str, "*"*20)
    for i, pred in enumerate(itc_model.predict(x)):
        print("*"*10, f"Model: {DSPRITES_CONCEPT_NAMES[i]}", "*"*10)
        c_truth = c_test[:, i]
        c_pred = np.argmax(pred, axis=1)

        print(f"Accuracy: {accuracy_score(c_truth, c_pred)}")
    print()

******************** small ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.9618666666666666
********** Model: scale **********
Accuracy: 0.9358666666666666
********** Model: rotation **********
Accuracy: 0.49493333333333334
********** Model: x **********
Accuracy: 0.842
********** Model: y **********
Accuracy: 0.8533333333333334

******************** medium ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.8110666666666667
********** Model: scale **********
Accuracy: 0.692
********** Model: rotation **********
Accuracy: 0.3346
********** Model: x **********
Accuracy: 0.7335333333333334
********** Model: y **********
Accuracy: 0.7420666666666667

******************** large ********************
********** Model: color **********
Accuracy: 1.0
********** Model: shape **********
Accuracy: 0.6236
********** Model: scale **********
Accuracy: 0.37653333333333