In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import random
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

seed_value= 0
os.environ['PYTHONHASHSEED']=str(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

In [None]:
import sys
ipython = get_ipython()

def hide_traceback(exc_tuple=None, filename=None, tb_offset=None,
                      exception_only=False, running_compiled_code=False):
       etype, value, tb = sys.exc_info()
       return ipython._showtraceback(etype, value, ipython.InteractiveTB.get_exception_only(etype, value))

ipython.showtraceback = hide_traceback

In [None]:
from tensorflow.keras.layers import Layer
import tensorflow.keras.backend as K
from tensorflow.keras.losses import Loss

def class_consistency_loss(y_true, y_pred):
    """
    Use the mean score of an image against all the samples from the same class to get a score per class for each image.
    Then average again over all the samples to get a class_wise confusion matrix
    """
    y_true = tf.math.divide_no_nan(y_true, tf.reduce_sum(y_true, axis=0))
    class_mask = tf.reduce_sum(y_true, axis=0) > 0
    confusion_matrix = tf.boolean_mask(
        tf.matmul(y_true, tf.matmul(y_pred, y_true), transpose_a=True)[class_mask], class_mask, axis=1
    )
    identity_matrix = tf.eye(tf.shape(confusion_matrix)[0])
    return K.binary_crossentropy(identity_matrix, confusion_matrix)

class ClassConsistencyLoss(Loss):
    def call(self, y_true, y_pred):
        return class_consistency_loss(y_true, y_pred)

class Classification(Layer):
    """
    Uses the inner kernel to compute the score matrix between batch and support set, eventually returns
    the average score per class
    """

    support_tensors_shape = tf.TensorShape([None, None])
    support_labels_one_hot_shape = tf.TensorShape([None, None])
    support_labels_name_shape = tf.TensorShape([None])
    support_tensors_spec = tf.TensorSpec(support_tensors_shape, tf.float32, name="support_tensors")
    support_labels_one_hot_spec = tf.TensorSpec(support_labels_one_hot_shape, tf.float32, name="support_labels_one_hot")
    support_labels_name_spec = tf.TensorSpec(support_labels_name_shape, tf.string, name="support_labels_name")

    def __init__(self, kernel, **kwargs):
        """
        Args:
            support_tensors (tf.Tensor): support set embeddings with shape (n, *embedding_shape)
            support_labels (tf.Tensor): one-hot encoded support set labels with shape (n, n classes)
        """
        super().__init__(**kwargs)
        self.kernel = kernel
        self.support_tensors = tf.Variable(
            [[]], validate_shape=False, shape=self.support_tensors_shape, name="support_tensors"
        )
        self.support_labels_name = tf.Variable(
            [],
            validate_shape=False,
            shape=self.support_labels_name_shape,
            name="support_labels_name",
            dtype=self.support_labels_name_spec.dtype,
        )
        self.support_labels_one_hot = tf.Variable(
            [[]], validate_shape=False, shape=self.support_labels_one_hot_shape, name="support_labels_one_hot"
        )
        self.columns = tf.Variable([], validate_shape=False, shape=[None], dtype=tf.string, name="columns")
        self.support_set_loss = tf.Variable(0.0, name="support_set_loss")

    def get_config(self):
        config = super().get_config()
        config.update({"kernel": self.kernel.to_json()})
        return config

    @classmethod
    def from_config(cls, config):
        kernel = tf.keras.models.model_from_json(config["kernel"])
        config["kernel"] = kernel
        return cls(**config)

    @staticmethod
    def _validate_support_set_shape(support_tensors, support_labels):
        if support_tensors.shape[0] != support_labels.shape[0]:
            raise AttributeError("Support tensors and support labels shape 0 should match")

    @tf.function(
        input_signature=(support_tensors_spec, support_labels_name_spec, tf.TensorSpec(None, tf.bool, name="overwrite"))
    )
    def set_support_set(self, support_tensors, support_labels_name, overwrite):
        self._validate_support_set_shape(support_tensors, support_labels_name)
        support_tensors = tf.cond(
            overwrite, lambda: support_tensors, lambda: tf.concat([self.support_tensors, support_tensors], axis=0)
        )
        support_labels_name = tf.cond(
            overwrite, lambda: support_labels_name, lambda: tf.concat([self.support_labels_name, support_labels_name], axis=0),
        )
        columns, codes = tf.unique(support_labels_name)
        support_labels_one_hot = tf.one_hot(codes, depth=tf.size(columns))
        support_set_size = tf.shape(support_tensors)[0]
        pair_wise_scores = tf.reshape(
            self.kernel(
                [
                    tf.repeat(support_tensors, tf.ones(support_set_size, dtype=tf.int32) * support_set_size, axis=0),
                    tf.tile(support_tensors, [support_set_size, 1]),
                ]
            ),
            [support_set_size, support_set_size],
        )
        self.support_set_loss.assign(class_consistency_loss(support_labels_one_hot, pair_wise_scores))

        normalized_labels = tf.math.divide_no_nan(support_labels_one_hot, tf.reduce_sum(support_labels_one_hot, axis=0))
        self.support_tensors.assign(support_tensors)
        self.support_labels_name.assign(support_labels_name)
        self.support_labels_one_hot.assign(normalized_labels)
        self.columns.assign(columns)
        return tf.expand_dims(self.support_set_loss, axis=0)

    @tf.function(input_signature=())
    def get_support_set(self):
        return self.support_tensors, self.support_labels_one_hot, self.support_set_loss

    def compute_output_shape(self, input_shape):
        return input_shape[0], tf.shape(self.support_labels_one_hot)[1]

    @tf.function
    def call(self, inputs, **kwargs):
        if isinstance(inputs, list):
            if len(inputs) > 1:
                raise ValueError("Layer should be called on a single tensor")
            inputs = inputs[0]
        batch_size = tf.shape(inputs)[0]
        support_set_size = tf.shape(self.support_tensors)[0]
        pair_wise_scores = tf.reshape(
            self.kernel(
                [
                    tf.repeat(inputs, tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * support_set_size, axis=0),
                    tf.tile(self.support_tensors, [batch_size, 1]),
                ]
            ),
            [batch_size, support_set_size],
        )
        return tf.linalg.matmul(pair_wise_scores, self.support_labels_one_hot)

    
def classification_accuracy(ascending=False):
    """
    Use the top score of a sample against all the other sample to get a predicted label for each sample.
    Then mean accuracy is returned.
    "Top" is defined according to the ascending arg: like in pandas.sort, ascending=True means that the top score is the smallest while
    ascending=False means that the top score is the greatest. Hence if a distance is used, ascending=True (pick closest sample) while
    if a similarity is used, ascending=False (pick the greatest similarity).
    Note: if there is no other sample of the same class, the sample will always be counted as failure
    since it is not possible to find the right class in the other samples.
    """

    def top_score_classification_accuracy(y_true, y_pred):
        y_true = tf.dtypes.cast(y_true, y_pred.dtype)
        if ascending:
            y_pred = y_pred + tf.linalg.diag(tf.reduce_max(y_pred, axis=1) + K.epsilon())
            y_pred = tf.map_fn(lambda x: y_true[x], tf.argmin(y_pred, axis=1), dtype=y_pred.dtype)
        else:
            y_pred = y_pred - tf.linalg.diag(tf.reduce_max(y_pred, axis=1) + K.epsilon())
            y_pred = tf.map_fn(lambda x: y_true[x], tf.argmax(y_pred, axis=1), dtype=y_pred.dtype)
        return tf.reduce_mean(tf.reduce_sum(y_true * y_pred, axis=1))

    return top_score_classification_accuracy

def min_eigenvalue(_, y_pred):
    """
    Compute the minimum eigenvalue of the y_pred tensor. If this value if non-negative (resp. positive) then the
    similarity or distance learnt is a positive semi-definite (resp. positive definite) kernel.
    See Also [Positive-definite kernel](https://en.wikipedia.org/wiki/Positive-definite_kernel)
    """
    return tf.reduce_min(tf.linalg.svd(y_pred, compute_uv=False))


In [None]:
img_rows, img_cols, img_channel = 299, 299, 3

base_model = tf.keras.applications.Xception(weights='imagenet', include_top=True, input_shape=(img_rows, img_cols, img_channel))

In [None]:
add_model = tf.keras.Sequential()
add_model.add(tf.keras.layers.Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(tf.keras.layers.Dropout(rate = 0.8))
#add_model.add(tf.keras.layers.Dropout(rate = 0.8))

#add_model.add(tf.keras.layers.Dropout(rate = 0.8))
add_model.add(tf.keras.layers.Dense(units=256, activation=tf.nn.relu))
add_model.add(tf.keras.layers.Dense(units=11, activation=tf.nn.softmax))

en_model = tf.keras.Model(inputs=base_model.input, outputs=add_model(base_model.output))
#en_model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=0.0001),
#              metrics=['accuracy'])

#en_model.summary()

In [None]:
encoder = en_model
support_layer = Classification(kernel="rbf")
model = Sequential([encoder, support_layer])
model.compile(loss=ClassConsistencyLoss(), optimizer=tf.keras.optimizers.Adam(lr=0.0001),
               metrics=[classification_accuracy(), min_eigenvalue])

In [None]:
batch_size = 8
epochs = 1000

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255.)
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255.)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255.)

train_set = train_datagen.flow_from_directory('image8/train',
                                                 target_size = (299, 299),
                                                 batch_size = batch_size,
                                                 class_mode = 'categorical')

val_set = val_datagen.flow_from_directory('image8/val',
                                                 target_size = (299, 299),
                                                 batch_size = batch_size,
                                                 class_mode = 'categorical')


callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

history = model.fit_generator(
    train_set,
    steps_per_epoch= 4742 // batch_size,
    epochs=epochs,
    validation_data=val_set,
    validation_steps = 527 // batch_size,
    callbacks=[callback]
)

# callbacks=[tf.keras.callbacks.ModelCheckpoint('VGG16-transferlearning.model', monitor='val_acc', save_best_only=True)]

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model auc')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

In [None]:
test_set = test_datagen.flow_from_directory('image8/test',
                                                 target_size = (299, 299),
                                                 batch_size = batch_size,
                                                 class_mode = 'categorical')
score = model.evaluate(test_set, steps = 28501 // batch_size)

In [None]:
test_set = test_datagen.flow_from_directory('image8/test',
                                                 target_size = (299, 299),
                                                 batch_size = batch_size,
                                                 class_mode = 'categorical',
                                                shuffle=False)
preds = model.predict(test_set, steps = 28501 // batch_size)
a = test_set.classes

In [None]:
y_test = np.zeros((a.size, a.max()+1))
y_test[np.arange(a.size),a] = 1

In [None]:
y_test = y_test[:-(28501%batch_size)]

In [None]:
def LIFT(preds, y_test, cls): # >=2.5
    condition = y_test.astype(bool) #preds에 정답인 예측 score만 남긴다.
    c = np.extract(condition, preds)
    b = np.argsort(-c)[:len(c)//5] #예측 score 상위 20%
    
    lift_20 = preds[b] 
    
    lift_20_flat = np.argmax(lift_20, axis=1) #값을 확률에서 0, 1값으로 바꾼다. 
    y_test_flat = np.argmax(y_test, axis=1)
    lift_20_1 = lift_20_flat[lift_20_flat == cls] #1인 경우만 남긴다. 
    y_1 = y_test_flat[y_test_flat == cls]
    
    lift_score = (len(lift_20_1)/len(lift_20_flat))/(len(y_1)/len(y_test_flat))
    print('LIFT Accuracy: ',  lift_score)
    return lift_score

In [None]:
lift_score = [0, 0, 0]
lift_score[0] = LIFT(preds, y_test, 3)
lift_score[1] = LIFT(preds, y_test, 4)
lift_score[2] = LIFT(preds, y_test, 7)
avg_lift = sum(lift_score) / 3

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in [3, 4, 7]:
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

In [None]:
m = tf.keras.metrics.AUC()
m.update_state(y_test, preds)
auroc_score = m.result().numpy()
print(auroc_score)

In [None]:
final_score = (lift_score/5)*0.7 + (auroc_score)*0.3
print(final_score)

In [None]:
#model.save('model.h5')

In [None]:
#model = tf.keras.models.load_model('model.h5')