In [None]:
# It opens a file picker to upload a dataset ZIP.
from google.colab import files
import zipfile, os, io, sys

print("Pick your dataset ZIP (should unzip to a folder with train/val/test subfolders).")
uploaded = files.upload()


zip_name = None
for fn in uploaded.keys():
    if fn.lower().endswith(".zip"):
        zip_name = fn
        break
if zip_name is None:
    raise SystemExit("No .zip uploaded. Please upload a dataset zip")

print("Uploaded:", zip_name)
target_dir = "/content/dataset"
os.makedirs(target_dir, exist_ok=True)
with zipfile.ZipFile(zip_name, 'r') as z:
    z.extractall(target_dir)

print("Extracted to", target_dir)
# show structure
for root, dirs, files in os.walk(target_dir):
    level = root.replace(target_dir, "").count(os.sep)
    indent = " " * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    if level >= 2:
        continue
    for d in dirs:
        print(f"{indent}  {d}/")


Pick your dataset ZIP (should unzip to a folder with train/val/test subfolders).


Saving dataset.zip to dataset (1).zip
Uploaded: dataset (1).zip
Extracted to /content/dataset
dataset/
  train/
  val/
  test/
  train/
    phone/
    bottle/
    id_card/
    backpack/
    keys/
    laptop/
    wallet/
    phone/
    bottle/
    id_card/
    backpack/
    keys/
    laptop/
    wallet/
  val/
    phone/
    bottle/
    id_card/
    backpack/
    keys/
    laptop/
    wallet/
    phone/
    bottle/
    id_card/
    backpack/
    keys/
    laptop/
    wallet/
  test/
    phone/
    bottle/
    id_card/
    backpack/
    keys/
    laptop/
    wallet/
    phone/
    bottle/
    id_card/
    backpack/
    keys/
    laptop/
    wallet/


In [None]:

DATA_DIR = "/content/dataset"
OUTPUT_DIR = "/content/saved_model"
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 12
EMBEDDING_DIM = 128
QUANTIZE_TFLITE = True
REP_CALIB_SAMPLES = 200

import os, sys, math, random
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import classification_report
import pandas as pd
import cv2
import matplotlib.pyplot as plt

print("TensorFlow version:", tf.__version__)

# Dataset loader (keeps raw for class_names)+
AUTOTUNE = tf.data.AUTOTUNE

def make_datasets(data_dir, img_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE):
    train_dir = os.path.join(data_dir, 'train')
    val_dir   = os.path.join(data_dir, 'val')
    test_dir  = os.path.join(data_dir, 'test')
    if not os.path.exists(train_dir):
        raise FileNotFoundError(f"train directory not found: {train_dir}")

    train_ds_raw = tf.keras.preprocessing.image_dataset_from_directory(
        train_dir, image_size=img_size, batch_size=batch_size, label_mode='categorical', shuffle=True
    )
    val_ds_raw = tf.keras.preprocessing.image_dataset_from_directory(
        val_dir, image_size=img_size, batch_size=batch_size, label_mode='categorical', shuffle=False
    ) if os.path.exists(val_dir) else None
    test_ds_raw = tf.keras.preprocessing.image_dataset_from_directory(
        test_dir, image_size=img_size, batch_size=batch_size, label_mode='categorical', shuffle=False
    ) if os.path.exists(test_dir) else None

    class_names = train_ds_raw.class_names
    rescale = layers.Rescaling(1.0/127.5, offset=-1.0)
    train_ds = train_ds_raw.map(lambda x,y: (rescale(x), y)).prefetch(AUTOTUNE)
    val_ds = val_ds_raw.map(lambda x,y: (rescale(x), y)).prefetch(AUTOTUNE) if val_ds_raw is not None else None
    test_ds = test_ds_raw.map(lambda x,y: (rescale(x), y)).prefetch(AUTOTUNE) if test_ds_raw is not None else None
    return train_ds, val_ds, test_ds, class_names

# Augmentation (applied in model)
aug = tf.keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.08),
    layers.RandomZoom(0.08),
    layers.RandomTranslation(0.05, 0.05),
], name='augmentation')

# Build full model (classifier + embedding head)
def build_full_model(num_classes, img_size=(IMG_SIZE, IMG_SIZE, 3), embedding_dim=EMBEDDING_DIM, trainable_backbone=False):
    inputs = layers.Input(shape=img_size)
    x = aug(inputs)
    base = MobileNetV2(include_top=False, input_shape=img_size, pooling='avg', weights='imagenet')
    base.trainable = trainable_backbone
    x = base(x)
    emb = layers.Dense(embedding_dim)(x)
    emb = layers.BatchNormalization()(emb)
    emb = layers.Activation('relu')(emb)
    emb = layers.Lambda(lambda t: tf.math.l2_normalize(t, axis=1), name='embedding')(emb)
    cls = layers.Dense(256, activation='relu')(x)
    cls = layers.Dropout(0.4)(cls)
    cls = layers.Dense(num_classes, activation='softmax', name='classifier')(cls)
    model = models.Model(inputs=inputs, outputs=[cls, emb])
    return model

def train_and_export(data_dir=DATA_DIR, output_dir=OUTPUT_DIR, epochs=EPOCHS):
    os.makedirs(output_dir, exist_ok=True)
    train_ds, val_ds, test_ds, class_names = make_datasets(data_dir)
    num_classes = len(class_names)
    print("Classes:", class_names)

    # Build full model (two outputs). We'll create a training_model pointing to only the classifier output.
    full_model = build_full_model(num_classes, trainable_backbone=False)

    # Create a training model that returns ONLY the classifier output for training (so y_true matches y_pred).
    training_model = models.Model(inputs=full_model.input, outputs=full_model.outputs[0])

    # Compile training_model with single loss (classifier)
    training_model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                           loss=tf.keras.losses.CategoricalCrossentropy(),
                           metrics=['accuracy'])

    ckpt_path = os.path.join(output_dir, 'best.h5')
    ckpt = ModelCheckpoint(ckpt_path, monitor='val_accuracy', save_best_only=True, save_weights_only=False)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7)
    early = EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True)

    print("Starting training (training_model -> classifier only)...")
    history = training_model.fit(train_ds, validation_data=val_ds, epochs=epochs, callbacks=[ckpt, reduce_lr, early])

    # After training, the weights of shared layers in full_model are updated (same layer instances).
    saved_model_dir = os.path.join(output_dir, 'final_saved_model')
    # Save the full model (classifier + embedding). This contains aug + backbone + embedding head.
    full_model.save(saved_model_dir)
    print("Saved full SavedModel to:", saved_model_dir)

    # Evaluate on test set by walking test files to preserve file paths
    if test_ds is not None:
        classes = class_names
        class_map = {c:i for i,c in enumerate(classes)}
        image_paths = []
        imgs = []
        labels = []
        for cls in classes:
            folder = os.path.join(data_dir, 'test', cls)
            for fname in sorted(os.listdir(folder)):
                if fname.lower().endswith(('.jpg','.jpeg','.png')):
                    pth = os.path.join(folder, fname)
                    image_paths.append(pth)
                    img = tf.keras.preprocessing.image.load_img(pth, target_size=(IMG_SIZE, IMG_SIZE))
                    arr = tf.keras.preprocessing.image.img_to_array(img)
                    arr = arr/127.5 - 1.0
                    imgs.append(arr)
                    labels.append(class_map[cls])
        if len(imgs) > 0:
            X = np.stack(imgs, axis=0)
            outs = full_model.predict(X, batch_size=32)
            cls_preds = np.argmax(outs[0], axis=1)
            emb_outs = outs[1]
            print("Classification report:")
            print(classification_report(labels, cls_preds, target_names=classes))
            df_meta = pd.DataFrame({'image_path': image_paths, 'true_label': labels, 'pred_label': cls_preds})
            emb_df = pd.DataFrame(emb_outs)
            out_df = pd.concat([df_meta, emb_df], axis=1)
            emb_csv = os.path.join(output_dir, 'embeddings.csv')
            out_df.to_csv(emb_csv, index=False)
            print("Saved embeddings CSV to:", emb_csv)
        else:
            print("No images found in test set to evaluate.")
    else:
        print("No test dataset present; skipping evaluation/embeddings export.")

    # Convert to TFLite
    tflite_out = os.path.join(output_dir, 'model.tflite')
    converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
    if QUANTIZE_TFLITE:
        print("Preparing representative dataset for quantization...")
        calib_folder = os.path.join(data_dir, 'val') if os.path.exists(os.path.join(data_dir,'val')) else os.path.join(data_dir,'train')
        rep_images = []
        for cls in os.listdir(calib_folder):
            cls_dir = os.path.join(calib_folder, cls)
            if not os.path.isdir(cls_dir): continue
            for i, fn in enumerate(os.listdir(cls_dir)):
                if fn.lower().endswith(('.jpg','.jpeg','.png')):
                    pth = os.path.join(cls_dir, fn)
                    img = cv2.imread(pth)
                    if img is None: continue
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)).astype('float32')
                    img = img / 127.5 - 1.0
                    rep_images.append(img)
                    if len(rep_images) >= REP_CALIB_SAMPLES:
                        break
            if len(rep_images) >= REP_CALIB_SAMPLES:
                break
        def rep_gen():
            for img in rep_images:
                yield [img[np.newaxis, ...]]
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        if len(rep_images) > 0:
            converter.representative_dataset = rep_gen
        else:
            print("Warning: no representative images found; quantization may be suboptimal.")
    try:
        tflite_model = converter.convert()
        with open(tflite_out, 'wb') as f:
            f.write(tflite_model)
        print("Saved TFLite model to:", tflite_out)
    except Exception as e:
        print("TFLite conversion failed:", e)

    return {"saved_model": saved_model_dir, "tflite": tflite_out if os.path.exists(tflite_out) else None}

# Run
if __name__ == "__main__":
    print("DATA_DIR =", DATA_DIR)
    results = train_and_export(DATA_DIR, OUTPUT_DIR, epochs=EPOCHS)
    print("Done. Artifacts:", results)
    print("Download artifacts from /content/saved_model in Colab Files sidebar or with: !zip -r /content/saved_model.zip /content/saved_model")


TensorFlow version: 2.19.0
DATA_DIR = /content/dataset
Found 350 files belonging to 7 classes.
Found 140 files belonging to 7 classes.
Found 140 files belonging to 7 classes.
Classes: ['backpack', 'bottle', 'id_card', 'keys', 'laptop', 'phone', 'wallet']
Starting training (training_model -> classifier only)...
Epoch 1/12
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - accuracy: 0.1195 - loss: 2.3210



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 337ms/step - accuracy: 0.1193 - loss: 2.3187 - val_accuracy: 0.1286 - val_loss: 2.0654 - learning_rate: 1.0000e-04
Epoch 2/12
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.1546 - loss: 2.1664



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 124ms/step - accuracy: 0.1536 - loss: 2.1655 - val_accuracy: 0.1500 - val_loss: 2.0671 - learning_rate: 1.0000e-04
Epoch 3/12
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.1825 - loss: 2.1392



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 123ms/step - accuracy: 0.1809 - loss: 2.1362 - val_accuracy: 0.1571 - val_loss: 2.0344 - learning_rate: 1.0000e-04
Epoch 4/12
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 90ms/step - accuracy: 0.1963 - loss: 2.0348 - val_accuracy: 0.1357 - val_loss: 2.0276 - learning_rate: 1.0000e-04
Epoch 5/12
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 89ms/step - accuracy: 0.1209 - loss: 2.1523 - val_accuracy: 0.1071 - val_loss: 2.0193 - learning_rate: 1.0000e-04
Epoch 6/12
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 92ms/step - accuracy: 0.1739 - loss: 1.9563 - val_accuracy: 0.1071 - val_loss: 2.0223 - learning_rate: 1.0000e-04
Epoch 7/12
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 95ms/step - accuracy: 0.1725 - loss: 1.9942 - val_accuracy: 0.1214 - val_loss: 2.0215 - learning_rate: 1.

ValueError: Invalid filepath extension for saving. Please add either a `.keras` extension for the native Keras format (recommended) or a `.h5` extension. Use `model.export(filepath)` if you want to export a SavedModel for use with TFLite/TFServing/etc. Received: filepath=/content/saved_model/final_saved_model.