In [7]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import affine_transform
import skimage as ski
import time
import imageio


In [8]:
import sys
import os

sys.path.append(os.path.abspath("../src/"))
sys.path.append(os.path.abspath("../src/keras-tf"))

import configuration_handler as cfh
import image_optimisation as io
import transformation as tr
import tf_inverse_compositional_algorithm as tf_ica

We create a configuration file with the meta parameters for the algorithm. 

In [9]:
# reading the parameters from the configuration file
params = cfh.read_config_file("config.ini")
params_rica = params["robust_inverse_compositional_algorithm"]
params_pica = params["pyramidal_inverse_compositional_algorithm"]

## Performing evaluation tests

In [10]:
from tensorflow.keras.ops.image import affine_transform
from transformation import TransformType
from tf_transformation import pad_params
from tf_inverse_compositional_algorithm import PyramidalInverseCompositional
from image_optimisation import RobustErrorFunctionType

In [11]:
class TFDSEvaluator:
    def __init__(self, img_size=(256, 256)):
        self.img_size = img_size
        self.batch_size = 4
        self.models = {
            'Pyramidal-tr': PyramidalInverseCompositional(
                transform_type=TransformType.TRANSLATION,
                nscales=3,
                nu=0.5,
                TOL=1e-5,
                robust_type=RobustErrorFunctionType.CHARBONNIER,
                lambda_=0.,
                nanifoutside=True,
                delta=10,
                verbose=False
            ),
            'Pyramidal-eu': PyramidalInverseCompositional(
                transform_type=TransformType.EUCLIDEAN,
                nscales=3,
                nu=0.5,
                TOL=1e-5,
                robust_type=RobustErrorFunctionType.CHARBONNIER,
                lambda_=0.,
                nanifoutside=True,
                delta=10,
                verbose=False
            ),
            'Pyramidal-si': PyramidalInverseCompositional(
                transform_type=TransformType.SIMILARITY,
                nscales=3,
                nu=0.5,
                TOL=1e-5,
                robust_type=RobustErrorFunctionType.CHARBONNIER,
                lambda_=0.,
                nanifoutside=True,
                delta=10,
                verbose=False
            ),
            'Pyramidal-af': PyramidalInverseCompositional(
                transform_type=TransformType.AFFINITY,
                nscales=3,
                nu=0.5,
                TOL=1e-5,
                robust_type=RobustErrorFunctionType.CHARBONNIER,
                lambda_=0.,
                nanifoutside=True,
                delta=10,
                verbose=False
            ),
            'Pyramidal-ho': PyramidalInverseCompositional(
                transform_type=TransformType.HOMOGRAPHY,
                nscales=3,
                nu=0.5,
                TOL=1e-5,
                robust_type=RobustErrorFunctionType.CHARBONNIER,
                lambda_=0.,
                nanifoutside=True,
                delta=10,
                verbose=False
            )
        }
        
        # For a local use: Stop magic stuff that eats up all the memory
        # Could be disabled if you have enough memory
        self.data_options = tf.data.Options()
        self.data_options.autotune.enabled = False
        self.data_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
        self.data_options.experimental_optimization.inject_prefetch = False
        # Load and prepare the source dataset
        self.im1_ds, self.im1_ds_info = self.load_prepare_source_dataset()
        print(self.im1_ds_info)
        print("number of images in the dataset:", self.im1_ds.cardinality())

    def load_prepare_source_dataset(self):
        """Load imagenette dataset from TFDS, a small subset of ImageNet"""
        im1_ds, info = tfds.load('imagenette', with_info=True, split="train[:5%]", shuffle_files=True)
        # prepare im1_ds
        # As we don't use the images for classification, we only keep the images
        im1_ds = im1_ds.map(lambda x: x["image"])
        # For a local use: Stop magic stuff that eats up all the memory
        # Could be disabled if you have enough memory
        im1_ds = im1_ds.with_options(self.data_options)

        # Images do not have the same size, so we resize them before batching
        im1_ds = im1_ds.map(lambda x: tf.image.resize(x, self.img_size))
        im1_ds = im1_ds.batch(self.batch_size).map(lambda x: tf.image.convert_image_dtype(x, tf.float32))
        # im1_ds = im1_ds.cache().prefetch(tf.data.AUTOTUNE)
        im1_ds = im1_ds.prefetch(tf.data.AUTOTUNE) # the dataset is huge, so we don't cache it
        return im1_ds, info        

    def prepare_datasets(self, im1_ds, transform_type):
        # duplicate the image dataset to apply transformation and constitute a second ds
        arr = list(im1_ds.unbatch().as_numpy_iterator())
        im2_ds = tf.data.Dataset.from_tensor_slices(arr)
        # For a local use: Stop magic stuff that eats up all the memory
        # Could be disabled if you have enough memory
        im2_ds = im2_ds.with_options(self.data_options)
         

        # prepare the dataset of affine transformations
        p_ds = tf.data.Dataset.from_generator(
            lambda: (self.generate_params(transform_type) for _ in range(len(arr))),
            output_signature=tf.TensorSpec(shape=(8,), dtype=tf.float32)
        )
        # p_ds = p_ds.batch(self.batch_size).cache().prefetch(tf.data.AUTOTUNE)
        p_ds = p_ds.batch(self.batch_size).prefetch(tf.data.AUTOTUNE) # the dataset is huge, so we don't cache it

        # prepare im2_ds
        im2_ds = im2_ds.batch(self.batch_size).map(lambda x: tf.image.resize(x, self.img_size))
        im2_ds = im2_ds.map(lambda x: tf.image.convert_image_dtype(x, tf.float32))
        combined_ds = tf.data.Dataset.zip((im2_ds, p_ds))
        transformed_ds = combined_ds.map(
            lambda im, p: affine_transform(
                im, p, 
                interpolation="bilinear", 
                fill_mode="constant", 
                fill_value=np.nan, 
                data_format="channels_last"
            ),
            num_parallel_calls=tf.data.AUTOTUNE
        )
        # im2_ds = transformed_ds.cache().batch(self.batch_size).prefetch(tf.data.AUTOTUNE)
        im2_ds = transformed_ds.prefetch(tf.data.AUTOTUNE) # the dataset is huge, so we don't cache it

        return im2_ds, p_ds


    def generate_affine_params(self):
        """Generate random affine transformation parameters"""
        return {
            "scale": np.random.uniform(0.1, 0.3, 2),
            "rotation": np.random.uniform(-np.pi/6, np.pi/6),
            "shear": np.random.uniform(-0.3, 0.3, 2),
            "translation": np.random.uniform(-30, 30, 2),
            "homography": np.random.uniform(0., 0.2, 2)
        }
    
    def generate_params(self, transform_type: TransformType):
        aff_params = self.generate_affine_params()
        match transform_type:
            case TransformType.TRANSLATION:
                p = aff_params['translation']
                return pad_params(p, 8)
            case TransformType.EUCLIDEAN:
                p = np.array([aff_params['translation'], aff_params['rotation']])
                return pad_params(p, 8)
            case TransformType.SIMILARITY: # specified as (tx, ty, s, sh)
                p = np.array([aff_params['translation'], aff_params['scale'][0], aff_params['shear'][0]])
                return pad_params(p, 8)
            case TransformType.AFFINITY: # specified as (tx, ty, a00, a01, a10, a11)
                p = np.array([aff_params['translation'], aff_params['scale'][0], aff_params['shear'], aff_params['scale'][1]])
                return pad_params(p, 8)
            case TransformType.HOMOGRAPHY: # specified as (tx, ty, a00, a01, a10, a11, a20, a21)
                p = np.array([aff_params['translation'], aff_params['scale'][0], aff_params['shear'], aff_params['scale'][1], 
                              aff_params['homography']])
                return tf.constant(p)

    def evaluate(self, num_samples=50):
        # results = {name: {'mse': [], 'mae': [], 'epe': [], 'time': []} 
        #           for name in self.models}
        results = {name: {'mse': [], 'mae': [], 'time': []} 
                  for name in self.models}
        nb_batches = self.im1_ds.cardinality().numpy() // self.batch_size
        
        for model_name, model in self.models.items():
            # Prepare datasets
            tf.print("Preparing datasets for model", model_name)
            im2_ds, p_ds = self.prepare_datasets(self.im1_ds, model.transform_type)

            i = 0
            for batch_im1, batch_im2, batch_params in zip(self.im1_ds, im2_ds, p_ds):
                tf.print("Processing batch number", i, " on ", nb_batches)
                start_time = time.time()
                
                # Estimate transformation
                p_pred, error, DI, Iw = model([batch_im1, batch_im2])
                params_pred = p_pred[0]
                true_params = batch_params.numpy()[0]
                
                # Calculate parameter errors
                mse = np.mean((params_pred - true_params)**2)
                mae = np.mean(np.abs(params_pred - true_params))
                
                # # Calculate endpoint error (EPE)
                # pred_flow = self.params_to_flow(params_pred)
                # true_flow = self.params_to_flow(true_params)
                # epe = np.mean(np.sqrt(np.sum((pred_flow - true_flow)**2, axis=-1)))
                
                # Store results
                results[name]['mse'].append(mse)
                results[name]['mae'].append(mae)
                # results[name]['epe'].append(epe)
                results[name]['time'].append(time.time() - start_time)
                i += 1
                
        return results

    def params_to_flow(self, params):
        """Convert affine parameters to flow field"""
        h, w = self.img_size
        matrix = params.reshape(3, 3)
        
        x = np.linspace(0, w-1, w)
        y = np.linspace(0, h-1, h)
        xx, yy = np.meshgrid(x, y)
        ones = np.ones_like(xx)
        coords = np.stack([xx, yy, ones], axis=0)
        
        transformed = np.tensordot(matrix, coords, axes=([1], [0]))
        flow = np.moveaxis(transformed, 0, -1) - np.stack([xx, yy], axis=-1)
        return flow

    def plot_results(self, results):
        metrics = ['mse', 'mae', 'epe', 'time']
        fig, axs = plt.subplots(2, 2, figsize=(15, 10))
        
        for i, metric in enumerate(metrics):
            ax = axs[i//2, i%2]
            for name in self.models:
                ax.plot(results[name][metric], label=name)
            ax.set_title(f'{metric.upper()} Comparison')
            ax.set_xlabel('Sample Index')
            ax.set_ylabel(metric.upper())
            ax.legend()
            ax.grid(True)
            
        plt.tight_layout()
        plt.show()




In [12]:
# Run evaluation
evaluator = TFDSEvaluator()
results = evaluator.evaluate(num_samples=50)
evaluator.plot_results(results)


tfds.core.DatasetInfo(
    name='imagenette',
    full_name='imagenette/full-size-v2/1.0.0',
    description="""
    Imagenette is a subset of 10 easily classified classes from the Imagenet
    dataset. It was originally prepared by Jeremy Howard of FastAI. The objective
    behind putting together a small version of the Imagenet dataset was mainly
    because running new ideas/algorithms/experiments on the whole Imagenet take a
    lot of time.
    
    This version of the dataset allows researchers/practitioners to quickly try out
    ideas and share with others. The dataset comes in three variants:
    
    *   Full size
    *   320 px
    *   160 px
    
    Note: The v2 config correspond to the new 70/30 train/valid split (released in
    Dec 6 2019).
    """,
    config_description="""
    full-size variant.
    """,
    homepage='https://github.com/fastai/imagenette',
    data_dir='/home/mike/tensorflow_datasets/imagenette/full-size-v2/1.0.0',
    file_format=tfrecord,
    downl

2025-03-11 17:35:01.841226: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-03-11 17:35:30.193463: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: INVALID_ARGUMENT: paddings must be a matrix with 2 columns: [1]
2025-03-11 17:35:30.206813: W tensorflow/core/framework/op_kernel.cc:1829] UNKNOWN: InvalidArgumentError: {{function_node __wrapped__Pack_N_2_device_/job:localhost/replica:0/task:0/device:GPU:0}} Shapes of all inputs must match: values[0].shape = [2] != values[1].shape = [] [Op:Pack] name: 
Traceback (most recent call last):

  File "/home/mike/miniconda3/envs/tensorflow2/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)

  File "/home/mike/miniconda3/envs/tensorflow2/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)



UnknownError: {{function_node __wrapped__IteratorGetNext_output_types_1_device_/job:localhost/replica:0/task:0/device:CPU:0}} InvalidArgumentError: {{function_node __wrapped__Pad_device_/job:localhost/replica:0/task:0/device:GPU:0}} paddings must be a matrix with 2 columns: [1] [Op:Pad]
Traceback (most recent call last):

  File "/home/mike/miniconda3/envs/tensorflow2/lib/python3.10/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)

  File "/home/mike/miniconda3/envs/tensorflow2/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)

  File "/home/mike/miniconda3/envs/tensorflow2/lib/python3.10/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "/tmp/ipykernel_46229/3981852581.py", line 102, in <genexpr>
    lambda: (self.generate_params(transform_type) for _ in range(len(arr))),

  File "/tmp/ipykernel_46229/3981852581.py", line 143, in generate_params
    return pad_params(p, 8)

  File "/mnt/c/Users/mikef/git/inverse_compositional_algorithm/src/keras-tf/tf_transformation.py", line 31, in pad_params
    return tf.cond(

  File "/home/mike/miniconda3/envs/tensorflow2/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None

  File "/mnt/c/Users/mikef/git/inverse_compositional_algorithm/src/keras-tf/tf_transformation.py", line 33, in <lambda>
    lambda: tf.pad(params, [[0, pad_length]]),

tensorflow.python.framework.errors_impl.InvalidArgumentError: {{function_node __wrapped__Pad_device_/job:localhost/replica:0/task:0/device:GPU:0}} paddings must be a matrix with 2 columns: [1] [Op:Pad]


	 [[{{node PyFunc}}]] [Op:IteratorGetNext] name: 

In [None]:
# Print summary statistics
print("Average Metrics:")
for name in evaluator.models:
    print(f"\n{name}:")
    for metric in ['mse', 'mae', 'epe', 'time']:
        avg = np.mean(results[name][metric])
        print(f"  {metric.upper()}: {avg:.4f}")