<a href="https://colab.research.google.com/github/toanpt74/COLAB_RD/blob/main/VQ_VAE_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#from distutils.command.install_egg_info import install_egg_info

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from tensorflow import keras
from tensorflow.keras import layers
#import tensorflow_probability as tfp
import tensorflow as tf
from keras.layers import BatchNormalization, Activation, Conv2D, Add, Dropout
import pandas as pd
import os
import random
from sklearn.model_selection import train_test_split
import cv2
from keras.callbacks import ModelCheckpoint

os.environ['CUDA_VISIBLE_DEVICES'] = '1'
COL = 128 #width
ROW = 256 #height
INPUT_SHAPE=(COL, ROW, 1,)
datapath=r'E:\ToanPT'

def LoadDataToTrain(input_shape=(256,256,1), path='', test_ratio=0.2, Use_Region = False):
    images_path = path

    image_file_list = os.listdir(images_path)

    X_path = [os.path.join(images_path, fname) for fname in image_file_list]
    random.shuffle(X_path)



    # X_path = [os.path.join(root, file) for root, _, files in
    #           os.walk(os.path.join(images_path, 'train')) for file in files if
    #           (file.endswith('.jpg') or file.endswith('.bmp') or file.endswith('.png'))]
    X_train = loadImages(X_path, input_shape=input_shape, islabel=0)
    return X_train, X_path
def loadImages(images_path, input_shape, islabel=0):
    data = []
    print("**************")
    for file in images_path:
        print("Load file:" + file)
        im = cv2.imread(file)
        d = len(im.shape)
        if d == 3:
            im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY)
        # Crop Image
        im = cv2.resize(im, (input_shape[1], input_shape[0]))
        im = np.array(im)
        im = im.astype('float32')
        temp = im / 255.0
        if islabel ==1 :
            print(temp.shape)
            print(np.sum(temp))
        data.append(temp)
    X = np.array(data)
    return X

class VectorQuantizer(layers.Layer):
    def __init__(self, num_embeddings, embedding_dim, beta=0.25, **kwargs):
        super().__init__(**kwargs)
        self.embedding_dim = embedding_dim
        self.num_embeddings = num_embeddings
        self.beta = beta
        w_init = tf.random_uniform_initializer()
        self.embeddings = tf.Variable(
            initial_value=w_init(shape=(self.embedding_dim, self.num_embeddings), dtype="float32"),
            trainable=True,name="embeddings_vqvae",)
    def call(self, x):
        input_shape = tf.shape(x)
        flattened = tf.reshape(x, [-1, self.embedding_dim])
        # Quantization.
        encoding_indices = self.get_code_indices(flattened)
        encodings = tf.one_hot(encoding_indices, self.num_embeddings)
        quantized = tf.matmul(encodings, self.embeddings, transpose_b=True)
        # Reshape the quantized values back to the original input shape
        quantized = tf.reshape(quantized, input_shape)
        commitment_loss = tf.reduce_mean((tf.stop_gradient(quantized) - x) ** 2)
        codebook_loss = tf.reduce_mean((quantized - tf.stop_gradient(x)) ** 2)
        self.add_loss(self.beta * commitment_loss + codebook_loss)
        # Straight-through estimator.
        quantized = x + tf.stop_gradient(quantized - x)
        return quantized
    def get_code_indices(self, flattened_inputs):
        # Calculate L2-normalized distance between the inputs and the codes.
        similarity = tf.matmul(flattened_inputs, self.embeddings)
        distances = (
            tf.reduce_sum(flattened_inputs ** 2, axis=1, keepdims=True)
            + tf.reduce_sum(self.embeddings ** 2, axis=0)- 2 * similarity)
        encoding_indices = tf.argmin(distances, axis=1)
        return encoding_indices

def encoder_conv_block(inputs,filter):
    e1 = layers.Conv2D(filters=2**filter, kernel_size=3, strides=(1,1), padding="same",dilation_rate=(1,1), activation='relu')(inputs)
    e1 = layers.BatchNormalization()(e1)
    e2 = layers.Conv2D(filters=2**filter+1, kernel_size=3, strides=(1,1), padding="same", dilation_rate=(2, 2), activation='relu')(inputs)
    e2 = layers.BatchNormalization()(e2)
    e = layers.concatenate(inputs=[e1, e2], axis=-1)
    y = layers.Conv2D(filters=2**filter+1, kernel_size=3, strides=(2,2), padding="same", dilation_rate=(1, 1), activation='relu')(e)
    return y
def get_encoder(latent_dim=32):
    encoder_inputs = keras.Input(shape=INPUT_SHAPE)
    x = encoder_conv_block(encoder_inputs, filter=4)
    x = layers.BatchNormalization()(x)
    x = encoder_conv_block(inputs=x, filter=5)
    x = layers.BatchNormalization()(x)
    x = encoder_conv_block(inputs=x, filter=6)
    x = layers.BatchNormalization()(x)
    x = encoder_conv_block(inputs=x, filter=7)
    x = layers.BatchNormalization()(x)
    x = encoder_conv_block(inputs=x, filter=8)
    x = layers.BatchNormalization()(x)
    encoder_outputs = layers.Conv2D(latent_dim, 1, padding="same")(x)
    return keras.Model(encoder_inputs, encoder_outputs, name="encoder")
    #-----------------------
    # x = layers.Conv2D(16, 3, activation="relu", strides=2, padding="same")(
    #     encoder_inputs
    # )
    # x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(x)
    # x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
    #
    # x = layers.Conv2D(128, 3, activation="relu", strides=2, padding="same")(x)
    #
    # encoder_outputs = layers.Conv2D(latent_dim, 1, padding="same")(x)
    # return keras.Model(encoder_inputs, encoder_outputs, name="encoder")
def get_decoder(latent_dim=32):
    latent_inputs = keras.Input(shape=get_encoder(latent_dim).output.shape[1:])
    x = layers.Conv2DTranspose(filters=256, kernel_size=3, strides=2, padding='same', activation='relu')(latent_inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2DTranspose(filters=128, kernel_size=3, strides=2, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2DTranspose(filters=64, kernel_size=3, strides=2, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2DTranspose(filters=16, kernel_size=3, strides=2, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2DTranspose(filters=1, kernel_size=3, strides=1, padding='same', activation='sigmoid')(x)
    decoder_outputs = layers.Conv2DTranspose(1, 3, padding="same")(x)
    return keras.Model(latent_inputs, decoder_outputs, name="decoder")
    #---------------------
    # x = layers.Conv2DTranspose(128, 3, activation="relu", strides=2, padding="same")(
    #     latent_inputs
    # )
    # x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
    # x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
    # x = layers.Conv2DTranspose(16, 3, activation="relu", strides=2, padding="same")(x)
    #
    # decoder_outputs = layers.Conv2DTranspose(1, 3, padding="same")(x)
    # return keras.Model(latent_inputs, decoder_outputs, name="decoder")
def get_vqvae(latent_dim=32, num_embeddings=64):
    vq_layer = VectorQuantizer(num_embeddings, latent_dim, name="vector_quantizer")
    encoder = get_encoder(latent_dim)
    decoder = get_decoder(latent_dim)
    inputs = keras.Input(shape=INPUT_SHAPE)
    encoder_outputs = encoder(inputs)
    quantized_latents = vq_layer(encoder_outputs)
    reconstructions = decoder(quantized_latents)
    return keras.Model(inputs, reconstructions, name="vq_vae")

get_vqvae().summary()

class VQVAETrainer(keras.models.Model):
    def __init__(self, train_variance, latent_dim=32, num_embeddings=128, **kwargs):
        super().__init__(**kwargs)
        self.train_variance = train_variance
        self.latent_dim = latent_dim
        self.num_embeddings = num_embeddings
        self.vqvae = get_vqvae(self.latent_dim, self.num_embeddings)
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss")
        self.vq_loss_tracker = keras.metrics.Mean(name="vq_loss")
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.vq_loss_tracker,
        ]
    def train_step(self, x):
        with tf.GradientTape() as tape:
            # Outputs from the VQ-VAE.
            reconstructions = self.vqvae(x)
            # Calculate the losses.
            reconstruction_loss = (
                tf.reduce_mean((x - reconstructions) ** 2) / self.train_variance
            )
            total_loss = reconstruction_loss + sum(self.vqvae.losses)
        # Backpropagation.
        grads = tape.gradient(total_loss, self.vqvae.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.vqvae.trainable_variables))
        # Loss tracking.
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.vq_loss_tracker.update_state(sum(self.vqvae.losses))
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "vqvae_loss": self.vq_loss_tracker.result(),
        }
    def loadWeight(self, path):
        self.vqvae.load_weights(path)


X_Train, X_Path = LoadDataToTrain(input_shape=INPUT_SHAPE,path=datapath, Use_Region=False)

x_train_scaled = np.expand_dims(X_Train, -1)
data_variance = np.var(X_Train)

#Train model
# vqvae_trainer = VQVAETrainer(data_variance, latent_dim=32, num_embeddings=512)
# vqvae_trainer.compile(optimizer=keras.optimizers.Adam())
#
# model_save_path =r'E:\abc'
#
# vqvae_trainer.fit(x_train_scaled, epochs=700, batch_size=16)
# vqvae_trainer.vqvae.save(model_save_path +"\\abc", save_format="tf")

#Reconstruction


def mse(imageA, imageB):
    # the 'Mean Squared Error' between the two images is the
    # sum of the squared difference between the two images;
    # NOTE: the two images must have the same dimension
    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])

    # return the MSE, the lower the error, the more "similar"
    # the two images are
    return err

def Predict(model_path, file_path):
    model = tf.keras.models.load_model(model_path)
    n = len(X_Train)
    R = np.zeros(n)
    Blob = np.zeros(n)
    Path = []
    print(f"Number Images ={n}")
    NG=0
    OK=0
    dem=0
    for i in range(n):
        idx = [i]
        test_image = X_Train[idx]
        reconstructions_test = model.predict(test_image)

        x = np.reshape(test_image, (COL, ROW))
        y = np.reshape(reconstructions_test[0], (COL, ROW))
        feature_score = np.power(x - y, 2)
        outlier_map = np.where(feature_score < 0.01, 0, 255)
        outlier_map255 = outlier_map.astype('uint8')
        _, thresh = cv2.threshold(outlier_map255, 20, 255, cv2.THRESH_BINARY)
        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        outcnt = [cnt for cnt in contours if (cv2.contourArea(cnt) > 100)]
        numblob = len(outcnt)
        Blob[i] = numblob
        reconstruction = reconstructions_test[0].squeeze()
        b = mse(test_image[0], reconstruction)
        R[i] = b
        # if b > 0.001: #Thay spec vao day
        if b < 0.0005: #Thay spec vao day
             OK= OK +1
             print(f"OK - SCORE: {i} - {b}")
             print(X_Path[i])
        else:
            if b > 0.002:
                print(f"NG - SCORE: {i} - {b}")
                print(X_Path[i])
                NG=NG+1
    print(f"OK= {OK}")
    print(f"NG = {NG}")
    print(f"MAX= {np.max(R)}")
    print(f"MIN= {np.min(R)}")
    print(f"STD= {np.std(R)}")
    print(f"AVG={np.average(R)}")
    df = pd.DataFrame({"Value": R, "NumBlob":Blob,"Path": X_Path} )
    df.to_excel(file_path, index=False)

def check_image(images_path,file_path):
    image_file_list = os.listdir(images_path)

#Predict(r"E:\VideoClassification\model\abc",r'D:\Private_Documents\abc.xlsx')

#Test
def CompareImages(model_path):
    model = tf.keras.models.load_model(model_path)
    n = len(X_Train)
    R = np.zeros(n)
    Path = []
    print(f"Number Images ={n}")
    NG = 0
    OK = 0
    dem = 0
    for i in range(10):
        idx = [i]
        test_image = X_Train[idx]
        reconstructions_test = model.predict(test_image)
        x = np.reshape(test_image, (COL, ROW))
        y = np.reshape(reconstructions_test[0],(COL, ROW))
        feature_score = np.power(x - y, 2)
        outlier_map = np.where(feature_score < 0.01, 0, 255)
        reconstruction = reconstructions_test[0].squeeze()
        b = mse(test_image[0], reconstruction)
        plt.subplot(1, 3, 1)
        plt.imshow(test_image[0] ,cmap='gray')# + 0.5)
        plt.title(str(b))
        plt.axis("off")
        plt.subplot(1, 3, 2)
        plt.imshow(reconstruction,cmap='gray')# + 0.5)
        plt.title("Reconstructed")
        plt.axis("off")
        #Dem so blob
        outlier_map255 = outlier_map.astype('uint8')
        _, thresh = cv2.threshold(outlier_map255, 20, 255, cv2.THRESH_BINARY)
        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        outcnt = [cnt for cnt in contours if (cv2.contourArea(cnt) > 100)]

        numBlob = len(outcnt)
        plt.subplot(1, 3, 3)
        plt.imshow(outlier_map, cmap='gray')  # + 0.5)
        plt.title(f"Score:{b}, blob :{str(numBlob)}")
        plt.axis("off")
        plt.show()
CompareImages(r'E:\abc')





