In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd "/content/drive/MyDrive/ClassAug Work"

/content/drive/.shortcut-targets-by-id/12PFZZk88jXqRuPPPvsCgycke0gd7aQBI/ClassAug Work


In [None]:
import tensorflow as tf
from tensorflow import keras
import pickle
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, average_precision_score, precision_recall_curve, auc, confusion_matrix
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA

In [None]:
data = None
with open('./embeddings/cifar10_resnet50_embeddings.pkl', 'rb') as f:
    data = pickle.load(f)
    
embs = data['embs']
labels = data['labels']

In [None]:
label_mapping_reverse = {
  0: "airplane",
  1: "automobile",
  2: "bird",
  3: "cat",
  4: "deer",
  5: "dog",
  6: "frog",
  7: "horse",
  8: "ship",
  9: "truck"
}
label_mapping = {}
for k, v in label_mapping_reverse.items():
    label_mapping[v] = k

In [None]:
classes = list(label_mapping.keys())

adhoc_classes = list(combinations(classes, 2))
adhoc_classes = ['|'.join(comb) for comb in adhoc_classes]

all_classes = classes
all_classes.extend(adhoc_classes)

for i, c in enumerate(all_classes):
    if c not in label_mapping:
        label_mapping[c] = i

for i, c in enumerate(all_classes):
    if i not in label_mapping_reverse:
        label_mapping_reverse[i] = c

In [None]:
crs = np.array([tuple(map(lambda x: int(label_mapping[x]), adhoc_class.split("|"))) for adhoc_class in adhoc_classes])

In [None]:
x_train, x_test, y_train, y_test = train_test_split(embs, labels, test_size=0.30, random_state=43, stratify=labels)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(42000, 2048)
(18000, 2048)
(42000, 1)
(18000, 1)


In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, embs, labels, batch_size=64, dim=2048, n_classes=55, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.embs = embs
        self.n_classes = n_classes
        self.labels = tf.keras.utils.to_categorical(labels, num_classes=self.n_classes, dtype=np.int16)
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(self.embs.shape[0] / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(self.embs.shape[0])
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):        
        X = self.embs[indexes]
        y = self.labels[indexes]

        p = np.random.permutation(np.arange(X.shape[0]))
        X = X[p]
        y = y[p]
        
        return X, y

In [None]:
class AdhocDataGenerator(keras.utils.Sequence):
    def __init__(self, embs, labels, label_mapping, label_mapping_reverse, batch_size=64, adhoc_batch_size=192, dim=2048, n_classes=10, n_adhoc_classes=45, crs=[], shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.adhoc_batch_size = adhoc_batch_size
        self.total_batch_size = batch_size + adhoc_batch_size
        self.label_mapping = label_mapping
        self.label_mapping_reverse = label_mapping_reverse
        self.embs = embs
        self.n_classes = n_classes
        self.n_adhoc_classes = n_adhoc_classes
        self.crs = crs
        self.total_classes = n_classes + n_adhoc_classes
        self.one_cold_labels = np.squeeze(labels)
        self.labels = keras.utils.to_categorical(labels, num_classes=self.total_classes, dtype=np.int16)
        self.shuffle = shuffle
        self.on_epoch_end()
        self.__generate_confused_examples()
       
    def __generate_confused_examples(self):
        idx = np.random.choice(len(self.crs), size=self.embs.shape[0])
        class1, class2 = self.crs[idx][:,0], self.crs[idx][:,1]
        confused_examples = np.zeros((self.embs.shape[0], 2, self.dim))
        confused_examples_labels = np.zeros((self.embs.shape[0], self.total_classes))
        class_examples = {}
        i = 0
        for c1, c2 in zip(class1, class2):
            if i % 10000 == 0:
                print(f'Examples #{i}/{class1.shape[0]}')
            if c1 not in class_examples:
                class_examples[c1] = self.embs[self.one_cold_labels == c1]
            if c2 not in class_examples:
                class_examples[c2] = self.embs[self.one_cold_labels == c2]
            
            confused_examples[i][0] = self.__get_random_choice(class_examples[c1]) + np.random.normal(0.5, 1, self.dim)
            confused_examples[i][1] = self.__get_random_choice(class_examples[c2]) + np.random.normal(0.5, 1, self.dim)
            
            confused_examples_labels[i] = keras.utils.to_categorical(
                self.label_mapping[
                    self.label_mapping_reverse[c1] + '|' + self.label_mapping_reverse[c2]
                ]
            , num_classes=self.total_classes)
            
            i += 1
        class_examples = {}
        lamda = np.random.uniform(0.4, 0.6, size=self.embs.shape[0]).reshape(-1, 1)
        
        self.confused_examples = (lamda*confused_examples[:,0]) + ((1-lamda)*confused_examples[:,1])
        self.confused_examples_labels = confused_examples_labels

    def __len__(self):
        return int(np.floor(self.embs.shape[0] / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(self.embs.shape[0])
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __get_random_choice(self, arr):
        idx = np.random.choice(len(arr))
        return arr[idx]
            
    def __data_generation(self, indexes):
        batch_x = self.embs[indexes]
        batch_y = self.labels[indexes]
        
        X = np.zeros(([self.batch_size+self.adhoc_batch_size, self.dim]), dtype=np.float32)
        y = np.zeros(([self.batch_size+self.adhoc_batch_size, self.total_classes]), dtype=np.int16)
        
        X[:self.batch_size, :] = self.embs[indexes]
        y[:self.batch_size, :] = self.labels[indexes]
        
        idx = np.random.choice(self.confused_examples.shape[0], size=self.adhoc_batch_size)
        X[self.batch_size:, :] = self.confused_examples[idx]
        y[self.batch_size:, :] = self.confused_examples_labels[idx]
        
        p = np.random.permutation(np.arange(X.shape[0]))
        X = X[p]
        y = y[p]
        
        return X, y

In [None]:
training_generator = AdhocDataGenerator(x_train, y_train, label_mapping, label_mapping_reverse, crs=crs)
validation_generator = DataGenerator(x_test, y_test)

Examples #0/42000
Examples #10000/42000
Examples #20000/42000
Examples #30000/42000
Examples #40000/42000


In [None]:
keras.backend.clear_session()
model = keras.models.Sequential()
model.add(keras.layers.Dense(1024, activity_regularizer='l2', activation='relu', input_shape=(2048,)))
model.add(keras.layers.Dense(10+45, activity_regularizer='l2', activation='relu'))
model.add(keras.layers.Softmax())

In [None]:
# model.load_weights('./models/')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1024)              2098176   
                                                                 
 dense_1 (Dense)             (None, 55)                56375     
                                                                 
 softmax (Softmax)           (None, 55)                0         
                                                                 
Total params: 2,154,551
Trainable params: 2,154,551
Non-trainable params: 0
_________________________________________________________________


In [None]:
checkpoint_filepath = './checkpoints/classaug_cifar10_best_{val_accuracy:.2f}_updated_generator.h5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='val_accuracy',
mode='max',
save_best_only=True)
model.compile(
    optimizer=keras.optimizers.SGD(), 
    loss='categorical_crossentropy', 
    metrics=['accuracy']
)

In [None]:
history=model.fit(training_generator, validation_data=validation_generator, epochs=1000, callbacks=[model_checkpoint_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

KeyboardInterrupt: ignored