In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [2]:
print(tf.__version__)

2.8.0


In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
try:
  tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  # Invalid device or cannot modify virtual devices once initialized.
  pass

2023-12-15 13:00:17.420250: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-15 13:00:17.453088: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-15 13:00:17.453248: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [4]:
model_save_folder = "./joint_ae"

In [5]:
import glob
img_folder = "./val2017/"
img_paths = sorted(glob.glob(img_folder+'/*'))
# print(img_paths)
print("Number of imgs in the folder:", len(img_paths))

Number of imgs in the folder: 50000


In [6]:
# label
label_path = './data/ImageNetLabels.txt'
with open(label_path, "r", encoding="UTF8") as lbfile:
    labels = lbfile.read().splitlines()

# ground truths
gt_path = './data/caffe_clsloc_validation_ground_truth.txt'
with open(gt_path,"r") as lbfile:
    lines = lbfile.readlines()
    gts = []
    for x in lines:
        gts.append(int(x.split(' ')[1].splitlines()[0]))
# gts = np.array(gts) + 1
gts = np.array(gts)

## Autoencoder

### Regularizer

In [7]:
import tensorflow_addons as tfa

C = 1e-4

def orthogonal_reg(w):  # 1703.01827
  units = w.shape[-1]
  w = tf.reshape(w, (-1, units))
  w = tf.transpose(w) @ w
  
  return (C/2)*tf.linalg.norm(w - tf.eye(units))

### Prepare Autoencoder

In [8]:
img_height, img_width = 224,224

In [9]:
print(img_height, img_width)

encoder_input = layers.Input(shape=(img_height, img_width, 3))

# Encoder
initializer = tf.keras.initializers.Orthogonal()
encoder_x = layers.Conv2D(32, (5, 5), 
                  strides=2, 
                  activation="relu", 
                  padding="same", 
                  kernel_initializer=initializer
                 )(encoder_input)

encoder_x = layers.Conv2D(16, (5, 5), 
                  strides=2, 
                  activation="relu", 
                  padding="same", 
                  kernel_initializer=initializer
                 )(encoder_x)
encoder_x = layers.Conv2D(13, (3, 3), 
                strides=2,
                  activation="relu", 
                  padding="same", 
                  kernel_initializer=initializer,
                  name='encoder_out'
                 )(encoder_x)

encoder_model = keras.Model(encoder_input, encoder_x,  name='enocder')
encoder_model.summary()

224 224
Model: "enocder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 112, 112, 32)      2432      
                                                                 
 conv2d_1 (Conv2D)           (None, 56, 56, 16)        12816     
                                                                 
 encoder_out (Conv2D)        (None, 28, 28, 13)        1885      
                                                                 
Total params: 17,133
Trainable params: 17,133
Non-trainable params: 0
_________________________________________________________________


2023-12-15 13:00:17.620397: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-15 13:00:17.620912: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-15 13:00:17.621044: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-12-15 13:00:17.621123: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [10]:
def dropout_tail(X):
    total_dim = tf.shape(X)[-1]
    tail_len = tf.random.uniform([1,], minval=0, maxval=total_dim, dtype=tf.int32)
    tail_len = tf.math.minimum(tail_len, total_dim)
    head_len = total_dim - tail_len
    mask = tf.concat((tf.ones([tf.shape(X)[1], tf.shape(X)[2], head_len[0]]), tf.zeros((tf.shape(X)[1], tf.shape(X)[2],tail_len[0]))), axis=-1)
    X = X*mask
    return X

In [11]:
# Decoder
_,w,h,c = encoder_model.get_layer('encoder_out').output_shape
decoder_input = layers.Input(shape=(w,h,c))
decoder_x = layers.Conv2DTranspose(128, (2, 2), 
                                   strides=2, 
                                   activation="relu", 
                                   padding="same",
                                   name="decoder_input"
                                                          )(decoder_input)
decoder_x = layers.Conv2DTranspose(64, (3, 3), strides=2, activation="relu", padding="same")(decoder_x)
decoder_x = layers.Conv2DTranspose(32, (5, 5), strides=2, activation="relu",padding="same")(decoder_x)
decoder_x = layers.Conv2D(3, (3, 3), activation="sigmoid", padding="same")(decoder_x)    
# Autoencoder
decoder_model = keras.Model(decoder_input, decoder_x,  name='decoder')
decoder_model.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 28, 28, 13)]      0         
                                                                 
 decoder_input (Conv2DTransp  (None, 56, 56, 128)      6784      
 ose)                                                            
                                                                 
 conv2d_transpose (Conv2DTra  (None, 112, 112, 64)     73792     
 nspose)                                                         
                                                                 
 conv2d_transpose_1 (Conv2DT  (None, 224, 224, 32)     51232     
 ranspose)                                                       
                                                                 
 conv2d_2 (Conv2D)           (None, 224, 224, 3)       867       
                                                           

In [12]:
class CustomTrainStep(tf.keras.Model):
    def __init__(self, n_gradients, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.n_gradients = tf.constant(n_gradients, dtype=tf.int32)
        self.n_acum_step = tf.Variable(0, dtype=tf.int32, trainable=False)
        self.gradient_accumulation = [tf.Variable(tf.zeros_like(v, dtype=tf.float32), trainable=False) for v in self.trainable_variables]

    def train_step(self, data):
        self.n_acum_step.assign_add(1)

        x, y = data
        # Gradient Tape
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
        # Calculate batch gradients
        gradients = tape.gradient(loss, self.trainable_variables)
        # Accumulate batch gradients
        for i in range(len(self.gradient_accumulation)):
            self.gradient_accumulation[i].assign_add(gradients[i])
 
        # If n_acum_step reach the n_gradients then we apply accumulated gradients to update the variables otherwise do nothing
        tf.cond(tf.equal(self.n_acum_step, self.n_gradients), self.apply_accu_gradients, lambda: None)

        # update metrics
        self.compiled_metrics.update_state(y, y_pred)
        return {m.name: m.result() for m in self.metrics}

    def apply_accu_gradients(self):
        # apply accumulated gradients
        self.optimizer.apply_gradients(zip(self.gradient_accumulation, self.trainable_variables))

        # reset
        self.n_acum_step.assign(0)
        for i in range(len(self.gradient_accumulation)):
            self.gradient_accumulation[i].assign(tf.zeros_like(self.trainable_variables[i], dtype=tf.float32))


In [13]:
encoder_model.load_weights(model_save_folder + "/best_model_save_encoder/variables/variables")
encoder_model.load_weights(model_save_folder + "/best_model_save_decoder/variables/variables")



<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f9bdc118460>

In [14]:
input_ae = layers.Input(shape=(img_height, img_width, 3))

e_out = encoder_model(input_ae)
e_out = dropout_tail(e_out)
d_out = decoder_model(e_out)

autoencoder_model = keras.Model(inputs=[input_ae], outputs=[d_out], name="ae_model")
# autoencoder_model = CustomTrainStep(n_gradients=5, inputs=[input_ae], outputs=[d_out], name="ae_model")
autoencoder_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='mse')
autoencoder_model.summary()

Model: "ae_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 enocder (Functional)           (None, 28, 28, 13)   17133       ['input_3[0][0]']                
                                                                                                  
 tf.compat.v1.shape (TFOpLambda  (4,)                0           ['enocder[0][0]']                
 )                                                                                                
                                                                                           

## Split the Encoder and Decoder

In [15]:
def cut_encoder_decoder(autoencoder_model, layerName = "decoder", verbose=False):
    decoder_input_index = None
#     layerName = layerName
    for idx, layer in enumerate(autoencoder_model.layers):
        if layer.name == layerName:
            decoder_input_index = idx
            break

    if verbose: print("Decoder index:", decoder_input_index,"\n---")

    # encoder = keras.Model(autoencoder_tail_model.get_layer("input_4").input, autoencoder_tail_model.get_layer("encoder").output, name='encoder1')

    encoder = tf.keras.Sequential(name='encoder1')
    for layer in autoencoder_model.layers[:2]:
        encoder.add(layer)

    # encoder.compile()
    if verbose: encoder.summary()

    decoder = tf.keras.Sequential(name='decoder1')
    for layer in autoencoder_model.layers[decoder_input_index:]:
        decoder.add(layer)


    # encoder.compile()
    if verbose: decoder.summary()
    return encoder, decoder

In [16]:
encoder_pnc, decoder_pnc = cut_encoder_decoder(autoencoder_model, verbose=True)

Decoder index: 21 
---
Model: "encoder1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 enocder (Functional)        (None, 28, 28, 13)        17133     
                                                                 
Total params: 17,133
Trainable params: 17,133
Non-trainable params: 0
_________________________________________________________________
Model: "decoder1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 decoder (Functional)        (None, 224, 224, 3)       132675    
                                                                 
Total params: 132,675
Trainable params: 132,675
Non-trainable params: 0
_________________________________________________________________
