# Task 2 - Testing the impact of obfuscating data by randomly permuting all pixels

In [None]:
# Packages
import tensorflow 
import numpy as np
import pandas as pd
from tensorflow import keras
from functools import partial
from tensorflow.keras import layers
from matplotlib import pyplot as plt
from tensorflow.keras.datasets import mnist 
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from keras.utils.np_utils import to_categorical
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

In [None]:
# Function to create an MLP with permutated input data
def mlp_permutation(dataset, save=False):
  print('\n', dataset)

  # Dataset will be either the MNIST or fashion MNIST
  if dataset == 'digits':
    data = keras.datasets.mnist
    class_names = ['Zero','One','Two','Three','Four','Five','Six','Seven',\
                   'Eight','Nine']

  elif dataset == 'fashion':
    data = keras.datasets.fashion_mnist
    class_names = ['T-shirt/top','Trouser','Pullover','Dress','Coat','Sandal',\
                   'Shirt','Sneaker','Bag','Ankle boot']

  # Collecting the data
  (x_train, y_train), (x_test, y_test) = data.load_data()
  X_train = x_train.copy()
  X_test = x_test.copy()

  # Permutating the input data with a constant random seed
  for i in range(len(X_train)):
      np.random.seed(44)
      X_train[i] = np.random.permutation(X_train[i])
      
  for i in range(len(X_test)):
      np.random.seed(44)
      X_test[i] = np.random.permutation(X_test[i])

  # Scaling the pixel intensities
  X_valid, X_train = X_train[:5000] / 255, X_train[5000:] / 255

  # Creating validation set
  y_valid, y_train = y_train[:5000], y_train[5000:]

  # Creating the MLP 
  model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(10, activation='softmax'),
    keras.layers.Dropout(0.2)]) 
  
  # Compiling our MLP
  model.compile(loss='sparse_categorical_crossentropy',
                optimizer='RMSprop',
                metrics=['accuracy'])
  
  # Training our MLP
  history = model.fit(X_train, y_train, epochs=40, 
                      validation_data=(X_valid, y_valid))
  
  # Saving
  if save==True:
    savepath_d = '/content/drive/MyDrive/Colab Notebooks/savefiles_mnist_digits/'
    savepath_f = '/content/drive/MyDrive/Colab Notebooks/savefiles_mnist_fashion/'

    if dataset == 'digits':
      savepath = savepath_d
    else:
      savepath = savepath_f  

    filename = 'mlp_permutation'
    hist_df = pd.DataFrame(history.history) 
    with open(savepath + filename + '.csv', mode='w') as f:
      hist_df.to_csv(f)

    test_case = np.array(model.evaluate(X_test, y_test))
    np.save(savepath + filename + '.npy', test_case)
    with open(filename + '.npy', 'wb') as f:
      np.save(f, test_case)

  # Testing
  print('\n', model.evaluate(X_test, y_test))
  
  X_new = X_test[:3]
  y_prob = model.predict(X_new)  
  print(y_prob.round(2))

  y_pred = np.argmax(model.predict(X_new), axis=1)
  print(y_pred)

  print('\nPredictions:', np.array(class_names)[y_pred])

  y_new = y_test[:3]
  print('True labels:', np.array(class_names)[y_new])

  return model

In [None]:
# Running our MLP permutation code
mlp_digits_model = mlp_permutation('digits', save=True)
mlp_fashion_model = mlp_permutation('fashion', save=True)

In [None]:
# Function to create a CNN with permutated input data
def cnn_permutation(dataset, save=False):
  print('\n', dataset)

  # Dataset will be either the MNIST or fashion MNIST
  if dataset == 'digits':
    data = keras.datasets.mnist
    class_names = ['Zero','One','Two','Three','Four','Five','Six','Seven',\
                   'Eight','Nine']

  elif dataset == 'fashion':
    data = keras.datasets.fashion_mnist
    class_names = ['T-shirt/top','Trouser','Pullover','Dress','Coat','Sandal',\
                   'Shirt','Sneaker','Bag','Ankle boot']

  # Collecting the data 
  (x_train, y_train), (x_test, y_test) = data.load_data()
  X_train = x_train.copy()
  X_test = x_test.copy()

  # Permutating the input data with a constant random seed
  for i in range(len(X_train)):
      np.random.seed(44)
      X_train[i] = np.random.permutation(X_train[i])
      
  for i in range(len(X_test)):
      np.random.seed(44)
      X_test[i] = np.random.permutation(X_test[i])

  # Scaling the pixel intensities
  X_valid, X_train = X_train[:5000] / 255, X_train[5000:] / 255

  # Creating validation set
  y_valid, y_train = y_train[:5000], y_train[5000:]

  # Reshaping
  x_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
  x_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
  x_valid = X_valid.reshape(X_valid.shape[0], 28, 28, 1)

  # Creating our CNN 
  DefaultConv2D = partial(layers.Conv2D, kernel_size=3, activation='relu', 
                          padding="SAME")
  model = Sequential([
      DefaultConv2D(filters=64, kernel_size=7, input_shape=(28,28,1)),
      layers.MaxPooling2D(pool_size=2),
      DefaultConv2D(filters=128),
      DefaultConv2D(filters=128),
      layers.MaxPooling2D(pool_size=2),
      DefaultConv2D(filters=256),
      DefaultConv2D(filters=256),
      layers.MaxPooling2D(pool_size=2),
      layers.Flatten(),
      layers.Dense(units=128, activation='relu'),
      layers.Dropout(0.5),
      layers.Dense(units=64, activation='relu'),
      layers.Dropout(0.5),
      layers.Dense(units=10, activation='softmax')])
  
  # Compiling our CNN
  model.compile(loss='sparse_categorical_crossentropy',
                optimizer='sgd',
                metrics=['accuracy'])
  
  # Training our CNN
  history = model.fit(X_train, y_train, epochs=40, 
                      validation_data=(x_valid, y_valid))

  # Saving
  if save==True:
    savepath_d = '/content/drive/MyDrive/Colab Notebooks/cnn_mnist_digits/'
    savepath_f = '/content/drive/MyDrive/Colab Notebooks/cnn_mnist_fashion/'

    if dataset == 'digits':
      savepath = savepath_d
    else:
      savepath = savepath_f  

    filename = 'cnn_permutation'
    hist_df = pd.DataFrame(history.history) 
    with open(savepath + filename + '.csv', mode='w') as f:
      hist_df.to_csv(f)

    test_case = np.array(model.evaluate(x_test, y_test))
    np.save(savepath + filename + '.npy', test_case)
    with open(filename + '.npy', 'wb') as f:
      np.save(f, test_case)

  # Testing
  print('\n', model.evaluate(x_test, y_test))

  return model

In [None]:
# Running our CNN permutation code
cnn_digits_model = cnn_permutation('digits', save=True)
cnn_fashion_model = cnn_permutation('fashion', save=True)