In [None]:
# On-Device Training - Best/Worst Sample Selection
# Import helper functions from our organized codebase
import sys
import os

# Add the src directory to Python path for importing our modules
sys.path.append('../src')

from src.models import OnDeviceTrainingModel
from src.data import DataPreprocessor
from src.utils import SampleSelector, calculate_bvsb, save_images_to_zip
from src.evaluation import plot_confidence_distribution

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
# Load and prepare CIFAR-10 data using our DataPreprocessor
preprocessor = DataPreprocessor(img_size=160, batch_size=128)

# Prepare datasets with class splits (9 classes for N, 1 class for M)
datasets = preprocessor.prepare_datasets(n_classes=[0, 1, 2, 3, 4, 5, 6, 7, 8])

print("Available datasets:", list(datasets.keys()))
print("Dataset shapes have been automatically handled by the preprocessor")

In [None]:
# Load pre-trained model (you would update this path to your actual model)
# model = tf.keras.models.load_model("path/to/your/model.keras")

# For demonstration, let's create a simple model
model = OnDeviceTrainingModel(num_classes=10, img_size=160)
print("Model created successfully!")

# Initialize sample selector
sample_selector = SampleSelector(model.model)
print("Sample selector initialized!")

In [None]:
# Select best and worst samples for each class
output_dir = './sample_outputs/'
os.makedirs(output_dir, exist_ok=True)

# Use the helper function to select samples for all classes
target_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8]  # N classes
results = sample_selector.select_all_classes(
    datasets['train_n'], 
    target_classes, 
    n_samples=20,  # 20 best and worst samples per class
    output_dir=output_dir
)

print("Sample selection completed!")
for class_idx, result in results.items():
    print(f"Class {class_idx}: {result['worst_samples']} worst, {result['best_samples']} best samples")
    print(f"  Worst samples saved to: {result['worst_zip']}")
    print(f"  Best samples saved to: {result['best_zip']}")

In [6]:
!pip install tensorflow pillow


  pid, fd = os.forkpty()


Collecting keras<2.16,>=2.15.0 (from tensorflow)
  Downloading keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 3.2.1
    Uninstalling keras-3.2.1:
      Successfully uninstalled keras-3.2.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.[0m[31m
[0mSuccessfully installed keras-2.15.0


In [2]:
from tensorflow.keras.models import load_model

# Load the model saved in Keras HDF5 format
model = load_model("/kaggle/input/cifar9_95acc/tensorflow2/cifar9_95acc/1/mnv2_cifar9_160_fbn_4.keras")

NUM_CLASSES = 10
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# create n- and m-datasets
def create_datasets(x, y, n_classes):

    x_n = []
    y_n = []
    x_m = []
    y_m = []
    for x_, y_ in zip (x, y):
        if y_ in n_classes:
            x_n.append(x_)
            y_n.append(y_)
        else:
            x_m.append(x_)
            y_m.append(y_)
    return np.array(x_n), np.array(y_n), np.array(x_m), np.array(y_m)
    
n_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8]
x_train_n, y_train_n, x_train_m, y_train_m = create_datasets(x_train, y_train, n_classes)
x_test_n, y_test_n, x_test_m, y_test_m = create_datasets(x_test, y_test, n_classes)

y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

y_train_n = tf.keras.utils.to_categorical(y_train_n, num_classes=10)
y_test_n = tf.keras.utils.to_categorical(y_test_n, num_classes=10)

y_train_m = tf.keras.utils.to_categorical(y_train_m, num_classes=10)
y_test_m = tf.keras.utils.to_categorical(y_test_m, num_classes=10)

# preprocessing
IMAGE_SHAPE = (160, 160, 3)

preprocessing = keras.Sequential(
    [
        layers.Rescaling(1./255.0, offset=0),
        layers.Resizing(IMAGE_SHAPE[0], IMAGE_SHAPE[1], interpolation='bilinear')
    ],
    name="preprocessing",
)

data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.15),
        layers.RandomContrast(factor=0.1),
        layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        layers.RandomZoom(height_factor=0.2, width_factor=0.2),
    ],
    name="data_augmentation",
)

IMG_SIZE = 160
BATCH_SIZE = 128
BUFFER_SIZE = BATCH_SIZE * 10
AUTO = tf.data.AUTOTUNE

# original
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.shuffle(ds_train.cardinality()).batch(BATCH_SIZE).map(lambda x, y: (data_augmentation(preprocessing(x)), y)).prefetch(AUTO)

ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
ds_test = ds_test.batch(BATCH_SIZE).map(lambda x, y: (preprocessing(x), y)).prefetch(AUTO)

# n-dataset
ds_train_n = tf.data.Dataset.from_tensor_slices((x_train_n, y_train_n))
ds_train_n = ds_train_n.shuffle(ds_train_n.cardinality()).batch(BATCH_SIZE).map(lambda x, y: (data_augmentation(preprocessing(x)), y)).prefetch(AUTO)

ds_test_n = tf.data.Dataset.from_tensor_slices((x_test_n, y_test_n))
ds_test_n = ds_test_n.batch(BATCH_SIZE).map(lambda x, y: (preprocessing(x), y)).prefetch(AUTO)

# m-dataset
ds_train_m = tf.data.Dataset.from_tensor_slices((x_train_m, y_train_m))
ds_train_m = ds_train_m.shuffle(ds_train_m.cardinality()).batch(BATCH_SIZE).map(lambda x, y: (data_augmentation(preprocessing(x)), y)).prefetch(AUTO)

ds_test_m = tf.data.Dataset.from_tensor_slices((x_test_m, y_test_m))
ds_test_m = ds_test_m.batch(BATCH_SIZE).map(lambda x, y: (preprocessing(x), y)).prefetch(AUTO)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


  trackable.load_own_variables(weights_store.get(inner_path))


[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 0us/step


In [3]:
ds_train_m = ds_train_m.unbatch().batch(1)
ds_train_n = ds_train_n.unbatch().batch(1)

In [10]:
def calculate_bvsb(probs):
    sorted_probs = np.sort(probs, axis=1)[:, ::-1]
    bvsb = sorted_probs[:, 0] - sorted_probs[:, 1]
    return bvsb

def save_images_to_zip(images, zip_path, image_format='JPEG'):
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for idx, image_tensor in enumerate(images):
            # Convert TensorFlow tensor to NumPy array
            image_array = image_tensor.numpy().astype('uint8')
            
            # Convert NumPy array to PIL Image
            image_pil = Image.fromarray(image_array)
            
            # Save the PIL Image to a temporary file
            temp_image_path = f'image_{idx}.{image_format.lower()}'
            image_pil.save(temp_image_path, format=image_format)
            
            # Add the image to the zip file
            zipf.write(temp_image_path, os.path.basename(temp_image_path))
            
            # Remove the temporary file
            os.remove(temp_image_path)


In [None]:
import numpy as np
import os
import absl.logging
from PIL import Image
import os
import zipfile
# Initialize logging before any significant operations
absl.logging.set_verbosity(absl.logging.INFO)
absl.logging.use_absl_handler()
# Define the directory to save outputs in Kaggle
output_dir = '/kaggle/working/'
os.makedirs(output_dir, exist_ok=True)

n_labels = [0,1,2,3, 4, 5, 6, 7,8]

opt = keras.optimizers.Adam(learning_rate=0.000001, epsilon=0.002, amsgrad=True, weight_decay=1e-5)
loss = keras.losses.CategoricalCrossentropy(label_smoothing=0.1)
model.compile(optimizer=opt, loss=loss, metrics=["accuracy"])

best_count = 0 
for i in n_labels:
    

    best_samples = []
    worst_samples = [] 
    bsvb_data = []
    count= 0
    n_labels_copy = n_labels.copy()
    n_labels_copy.remove(i)
    n_labels_ = tf.keras.utils.to_categorical(n_labels_copy, num_classes=10)
    for x, y in ds_train_n:
        if  not any(np.array_equal(y[0].numpy(), n) for n in n_labels_):
            prediction = model.predict(x,verbose=0)
            if(np.argmax(y)==np.argmax(prediction)):
                bvsb = calculate_bvsb(prediction)
                bsvb_data.append((x.numpy().tolist(), y.numpy().tolist(), bvsb[0]))
        
    
    sorted_bsvb_data = sorted(bsvb_data, key=lambda x: x[2])
    x_y = [item[0] for item in sorted_bsvb_data]
    worst = x_y[:50]
    best = x_y[-50:]

  
    output_zip_path = f'/kaggle/working/images_{i}.zip'
    save_images_to_zip(worst, output_zip_path, image_format='JPEG')
    
    

In [5]:
for x,_ in ds_train_n:
    print(x[0][0])
    print(x)
    break

tf.Tensor(
[[0.22092624 0.2245467  0.22960743]
 [0.27307618 0.27605227 0.2818482 ]
 [0.32508153 0.32742912 0.3340498 ]
 [0.369316   0.37114206 0.37862673]
 [0.39494422 0.3964828  0.40478244]
 [0.39810482 0.39962596 0.40860102]
 [0.39989007 0.40140638 0.41102064]
 [0.4022001  0.40369022 0.41383538]
 [0.40493345 0.40637657 0.41693965]
 [0.40619096 0.407546   0.41817078]
 [0.39460206 0.39585945 0.40599236]
 [0.35526696 0.3563878  0.36540788]
 [0.30355173 0.30381417 0.3108064 ]
 [0.24937233 0.24810442 0.25284475]
 [0.19305035 0.1896677  0.19196136]
 [0.13566208 0.12956455 0.12919626]
 [0.08517879 0.07566963 0.07228711]
 [0.05562558 0.0419147  0.03498107]
 [0.05927802 0.04060604 0.02950482]
 [0.06821215 0.04424916 0.02892785]
 [0.07762614 0.04817171 0.02867757]
 [0.08721275 0.05216999 0.02855674]
 [0.09656693 0.05623604 0.02871513]
 [0.10436433 0.05950144 0.02866668]
 [0.10957858 0.06136088 0.02814972]
 [0.11296873 0.06281595 0.0281241 ]
 [0.11599881 0.06416867 0.02814169]
 [0.11871999 0.06