## End-to-end test

In [None]:
!pip install -q datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import numpy as np
import keras
from datasets import load_dataset
import datasets
datasets.logging.set_verbosity_warning()

### Choosing the best Cluster Classification *model*

In [None]:
# Setup
folder  = '/content/drive/MyDrive/Colab Notebooks/Zama'
batch_size = 16
enhanced_model = True

# load cluster classification models
if enhanced_model:
    cluster_cls_handles = ['8000_orig__1_siamclusters_enhanced.keras',
                           '8000_large_4_siamclusters_enhanced.keras']
else:
    cluster_cls_handles = ['8000_orig__1_siamclusters',
                           '8000_large_4_siamclusters_epoch40']

cluster_cls_models = [keras.models.load_model(f'{folder}/{h}') for h in cluster_cls_handles]

# load embedding model
siamese_embedding_model = keras.models.load_model(f'{folder}/embedding_network_512.h5')     #  USAGE: res = siamese_embedding_model.predict( tf_ds )

# Test only for clasters with trained inter-cluster classificaton models
for cluster_idx in range(0, 6):

    # Test with various augmentation layers
    for augm in ['small', 'medium', 'large']:

        # Load dataset
        ds = load_dataset(f"arieg/cluster{cluster_idx:02d}_{augm}_10", split='train')
        ds.set_format('tf')

        image_test = ds['image'].numpy()
        if enhanced_model:
            emb_test = siamese_embedding_model.predict( ds.with_format('tf').to_tf_dataset(columns="image", batch_size=batch_size), verbose=0)

        print(f"Dataset cluster{cluster_idx:02d}_{augm}_10")

        # Classify cluster
        for i, model in enumerate(cluster_cls_models):
            print(f"    Model {cluster_cls_handles[i]}")

            # Classification inference
            if enhanced_model:
                res = model.predict([image_test, emb_test], verbose=0)
            else:
                res = model.predict( image_test, verbose=0)

            # Top-1
            top1_samples = [sample_idx for sample_idx in range(res.shape[0]) if cluster_idx == np.argmax(res[sample_idx])]
            print(f"        Top-1 {len(top1_samples) / res.shape[0]:.2f}")

            # Top-K calculation
            k = 3
            top3_samples = [sample_idx for sample_idx in range(res.shape[0]) if cluster_idx in np.argpartition(res[sample_idx], -k)[-k:]]
            print(f"        Top-3 {len(top3_samples) / res.shape[0]:.2f}")

### End-to-End test with the chosen Cluster Classsifcation model
Test with datasets of large, medium and small augmentations

In [None]:
# Setup
folder  = '/content/drive/MyDrive/Colab Notebooks/Zama'
batch_size = 16

enhanced_model = True

# load the best cluster classification model
cluster_cls_handle = '8000_orig__1_siamclusters_enhanced.keras'
                          # '8000_large_4_siamclusters_enhanced.keras'
                          # '8000_orig__1_siamclusters'
                          # '8000_large_4_siamclusters_epoch40'
print(f" Model {cluster_cls_handle}")
cluster_cls_model = keras.models.load_model(f'{folder}/{cluster_cls_handle}')

# load embedding model
siamese_embedding_model = keras.models.load_model(f'{folder}/embedding_network_512.h5')     #  USAGE: res = siamese_embedding_model.predict( tf_ds )

# Test only for clasters with trained inter-cluster classificaton models
for cluster_idx in range(0, 6):

    # Create inter-cluster classification model
    num_classes = np.load(f"/content/drive/MyDrive/Colab Notebooks/Zama/top_layers/b_cluster{cluster_idx:02d}_top3.npy").shape[0]
    track_cls_model = create_cls_model(num_classes)

    # Load top classification layers' weights
    # to make it ready for inference
    for i, lyr in enumerate([-4, -2, -1]):

        weights = np.load(f"/content/drive/MyDrive/Colab Notebooks/Zama/top_layers/w_cluster{cluster_idx:02d}_top{i+1}.npy")
        biases = np.load(f"/content/drive/MyDrive/Colab Notebooks/Zama/top_layers/b_cluster{cluster_idx:02d}_top{i+1}.npy")

        track_cls_model.layers[lyr].set_weights([weights, biases])

    # Test with various augmentation layers
    for augm in ['small', 'medium', 'large']:

        # Load dataset
        ds = load_dataset(f"arieg/cluster{cluster_idx:02d}_{augm}_10", split='train')
        ds.set_format('tf')

        image_test = ds['image'].numpy()
        label_test = ds['label']
        if enhanced_model:
            emb_test = siamese_embedding_model.predict( ds.with_format('tf').to_tf_dataset(columns="image", batch_size=batch_size), verbose=0)

        print(f"Dataset cluster{cluster_idx:02d}_{augm}_10")

        # Cluster classification inference
        if enhanced_model:
            res = cluster_cls_model.predict([image_test, emb_test], verbose=0)
        else:
            res = cluster_cls_model.predict( image_test, verbose=0)

        # Track classification inference
        res_track = track_cls_model.predict(image_test, verbose=0)

        # Top-1
        top1_samples = [sample_idx for sample_idx in range(res.shape[0]) if cluster_idx == np.argmax(res[sample_idx])]
        print(f"        Top-1 cluster  {len(top1_samples) / res.shape[0]:.2f}")

        e2e_good = sum([1 for i in top1_samples if np.argmax(res_track[i]) == int(label_test[i])])
        print(f"        Top-1 end2end  {e2e_good / res.shape[0]:.2f}")

        # Top-K calculation
        k = 3
        top3_samples = [sample_idx for sample_idx in range(res.shape[0]) if cluster_idx in np.argpartition(res[sample_idx], -k)[-k:]]
        print(f"        Top-3 cluster  {len(top3_samples) / res.shape[0]:.2f}")

        e2e_good = sum([1 for i in top3_samples if np.argmax(res_track[i]) == int(label_test[i])])
        print(f"        Top-3 end2end  {e2e_good / res.shape[0]:.2f}")