In [17]:
import tensorflow_datasets as tfds
import tensorflow as tf

ds, ds_info = tfds.load(
  'cifar10',
  split=['train', 'test'],
  as_supervised=True,
  shuffle_files=True,
  with_info=True
)

ds_train = ds[0]
ds_test = ds[1]



In [18]:
def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

def pipeline(ds):
  ds = ds.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
  ds = ds.cache()
  ds = ds.prefetch(tf.data.AUTOTUNE)
  return ds

ds_train = pipeline(ds_train)
ds_test = pipeline(ds_test)


In [19]:
ds_train_np = tfds.as_numpy(ds_train)
ds_test_np = tfds.as_numpy(ds_test)

In [20]:
import numpy as np
ds_train_img = list()
ds_train_label = list()
for ex in ds_train_np:
  ds_train_img.append(ex[0])
  ds_train_label.append(ex[1])


ds_test_img = list()
ds_test_label = list()
for ex in ds_test_np:
  ds_test_img.append(ex[0])
  ds_test_label.append(ex[1])

In [21]:
ds_train_img = np.array(ds_train_img)
reshaped_train=ds_train_img.reshape(50000,3072)

In [22]:
ds_test_img = np.array(ds_test_img)
reshaped_test=ds_test_img.reshape(10000,3072)

In [23]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans 
from sklearn.pipeline import Pipeline

In [24]:
n_categories=4
pca = PCA(n_components=10)
kmeans = KMeans(n_clusters=n_categories,max_iter=200)
predictor = Pipeline([('pca', pca), ('kmeans', kmeans)])
predict = predictor.fit(reshaped_train).predict(reshaped_train)


In [25]:
import pickle

with open("kmeans_predictor.pkl", "wb") as f:
    pickle.dump(predictor, f)

In [26]:
# The variable ds_train_img has the original data. The variable predict has the cluster/shard info
# divide data from "ds_train_img" to 4 shards based on "predict"
shards = [list() for x in range(4)]
labels_shards = [list() for x in range(4)]

for i in range(len(predict)):
  if predict[i] == 0:
    # put "ds_train_img[i]" in shard_1
    shards[0].append(ds_train_img[i])
    labels_shards[0].append(ds_train_label[i])
  elif predict[i] == 1:
    # put "ds_train_img[i]" in shard_2
    shards[1].append(ds_train_img[i])
    labels_shards[1].append(ds_train_label[i])
  elif predict[i] == 2:
    # put "ds_train_img[i]" in shard_3
    shards[2].append(ds_train_img[i])
    labels_shards[2].append(ds_train_label[i])
  else:
    # put "ds_train_img[i]" in shard_4
    shards[3].append(ds_train_img[i])
    labels_shards[3].append(ds_train_label[i])

In [27]:
vgg_benchmark_model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_uniform'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
])

In [28]:
from math import ceil

temp_model = tf.keras.models.clone_model(vgg_benchmark_model)
layer_count = ceil(len(temp_model.layers)*0.5)
print('the root layer count is : ', layer_count)

root_model = tf.keras.models.Sequential(temp_model.layers[:layer_count])

the root layer count is :  8


In [29]:
constituent_models = list(range(4))
for i in range(4):
      print('building model', i)

      temp_model = tf.keras.models.clone_model(vgg_benchmark_model)
      layer_count = ceil(len(temp_model.layers)*(0.5))
      print('the second half layer count is : ', len(temp_model.layers) - layer_count)
      # layer_count = len(temp_model.layers) - layer_count
      part_model = tf.keras.models.Sequential(temp_model.layers[layer_count:])

      constituent_models[i] = tf.keras.models.Sequential([
        root_model,
        part_model
        ])

      constituent_models[i].compile(
          optimizer=tf.keras.optimizers.Adam(0.001),
          loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
          metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
      )


building model 0
the second half layer count is :  8
building model 1
the second half layer count is :  8
building model 2
the second half layer count is :  8
building model 3
the second half layer count is :  8


In [31]:
tf.config.run_functions_eagerly(True)
ds_test = tf.data.Dataset.from_tensor_slices(  (ds_test_img ,ds_test_label) )
ds_test = ds_test.batch(250)
# train each constituent model with a seperate shard
for i in range(4):
    print('training model[', i, ']')
    # convert data to tensors
    data_set = tf.data.Dataset.from_tensor_slices(  (shards[i] ,labels_shards[i]) )
    data_set = data_set.batch(250)
    constituent_models[i].fit(
        data_set,
        epochs=20,
        validation_data=ds_test,
        verbose = 1
    )

training model[ 0 ]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
training model[ 1 ]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
training model[ 2 ]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
training model[ 3 ]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20

In [32]:
#test the model accuracy without clustering the test data
for i in range(4):
  loss, acc = constituent_models[i].evaluate(ds_test)
  print('model', i, " accuracy ", acc)

model 0  accuracy  0.43340003490448
model 1  accuracy  0.5347000360488892
model 2  accuracy  0.48910000920295715
model 3  accuracy  0.6117000579833984


In [None]:
!mkdir -p cluster_trained_models
constituent_models[0].save('cluster_trained_models/constituent_model_0') 
constituent_models[1].save('cluster_trained_models/constituent_model_1')  
constituent_models[2].save('cluster_trained_models/constituent_model_2') 
constituent_models[3].save('cluster_trained_models/constituent_model_3')



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
cifar  cifar-10-python.tar.gz  cluster_trained_models  drive  sample_data


In [None]:
# predict the cluster of the testing data
predict = predictor.predict(reshaped_test)

In [None]:
# predict testing data via its respective constituent model
shards_test = [list() for x in range(4)]
labels_shards_test = [list() for x in range(4)]

for i in range(len(predict)):
  if predict[i] == 0:
    # put "test_data[i]" in shard_1
    shards_test[0].append(test_data[i])
    labels_shards_test[0].append(test_labels[i])
  elif predict[i] == 1:
    # put "test_data[i]" in shard_2
    shards_test[1].append(test_data[i])
    labels_shards_test[1].append(test_labels[i])
  elif predict[i] == 2:
    # put "test_data[i]" in shard_3
    shards_test[2].append(test_data[i])
    labels_shards_test[2].append(test_labels[i])
  else:
    # put "test_data[i]" in shard_4
    shards_test[3].append(test_data[i])
    labels_shards_test[3].append(test_labels[i])

In [None]:
# evaluate the accuracy of each constituent model
for i in range(4):
  ds_test = tf.data.Dataset.from_tensor_slices(  (shards_test[i] ,labels_shards_test[i]) )
  ds_test = ds_test.batch(128)
  loss, acc = constituent_models[i].evaluate(ds_test)
  print('model', i, " accuracy ", acc)

model 0  accuracy  0.12645012140274048
model 1  accuracy  0.35145196318626404
model 2  accuracy  0.38756614923477173
model 3  accuracy  0.7065471410751343
