# Initialization

In [1]:
import tensorflow as tf
import numpy as np
import cv2
import glob


2024-07-08 15:42:25.947593: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
import configs

# Extractor Implementation

In [None]:
model = tf.keras.models.load_model(configs.CGUNET_CHECKPOINT)
extractor = tf.keras.Model(model.input,
                           model.get_layer('global_average_pooling2d_4').output)
extractor.load_weights(configs.CGUNET_CHECKPOINT)
extractor.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_15 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 conv2d_181 (Conv2D)         (None, 224, 224, 64)      1792      
                                                                 
 conv2d_182 (Conv2D)         (None, 224, 224, 64)      36928     
                                                                 
 max_pooling2d_49 (MaxPooli  (None, 112, 112, 64)      0         
 ng2D)                                                           
                                                                 
 dropout_88 (Dropout)        (None, 112, 112, 64)      0         
                                                                 
 conv2d_183 (Conv2D)         (None, 112, 112, 128)     73856     
                                                           

# Data Loader

In [None]:
class MyDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, data_dir, batch_size=32, shuffle=True):
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.filenames = glob.glob(data_dir + '/*/*.jpg')
        self.filenames.sort()
        print(len(self.filenames), 'found')
        self.classnames = ['/akiec/', '/bcc/', '/bkl/',
                           '/df/', '/mel/', '/nv/', '/vasc/']
        self.indices = None
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.filenames) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        filenames = [self.filenames[k] for k in indices]
        X, Y = self.__data_generation(filenames)
        return X, Y

    def on_epoch_end(self):
        self.indices = np.arange(len(self.filenames))
        if self.shuffle == True:
            np.random.shuffle(self.indices)

    def filename2onehot(self, fn):
        for i in range(configs.NUM_CLASS):
            if self.classnames[i] in fn:
                result = np.zeros(configs.NUM_CLASS, dtype=np.float64)
                result[i] = 1
                return result
        return None

    def __data_generation(self, filenames):
        num = len(filenames)
        X = np.empty([num, *configs.IMAGE_SIZE], dtype=np.float64)
        Y_class = np.empty([num, configs.NUM_CLASS])
        for i, fn in enumerate(filenames):
            img = cv2.imread(fn)
            delta = (configs.IMAGE_SIZE[1] - configs.IMAGE_SIZE[0]) // 2
            img = img[:, delta:configs.IMAGE_SIZE[1]-delta-1]
            img = cv2.resize(img, (224, 224))
            X[i] = np.array(img, dtype=np.float64) / 255.
            Y_class[i] = self.filename2onehot(fn)

        return X, {'out_img':X, 'out_class':Y_class}

# Extracting

In [None]:
def encode_features(subset_name, decoder):
    datagen = MyDataGenerator(configs.DATA_DIR+'/'+ subset_name, shuffle=False)
    filenames = datagen.filenames
    counter = 0
    num = len(filenames)

    for X, Y in datagen:
      inc = len(X)
      features = decoder.predict(X)
      for i in range(inc):
        fn = filenames[counter]
        counter += 1
        feat_fn = fn.replace('.jpg', '.unet.npy')
        np.save(feat_fn, features[i])
      print(subset_name, counter, '/', num)

In [None]:
encode_features('train', extractor)
encode_features('val', extractor)
encode_features('test', extractor)

test 3010 / 3010
