Check tf version and GPU support

In [None]:
import tensorflow as tf
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

Initialize filepaths

In [None]:
dataset_path = './dataset'
temp_dataset_path = './dataset-temp'
exported_model_path = './exported-models'

Check if dataset is already present, if not create dataset

In [None]:
import os

if not os.path.exists(dataset_path):
    os.makedirs(dataset_path)
    print("Dataset directory created")
else:
    print("Dataset directory already exists")

Download the Tiny ImageNet dataset if not already available. If the dataset directory is empty, the dataset will be downloaded from [https://cs231n.stanford.edu/tiny-imagenet-200.zip](https://cs231n.stanford.edu/tiny-imagenet-200.zip), extracted to a temporary folder, and prepared for further processing.

In [None]:
import requests
import zipfile
from tqdm import tqdm

def download_file(url, destination):
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    
    with open(destination, 'wb') as file, tqdm(
        desc=destination,
        total=total_size,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as progress_bar:
        for data in response.iter_content(chunk_size=1024):
            size = file.write(data)
            progress_bar.update(size)

# Check if dataset directory is empty
if not os.listdir(dataset_path):
    # Replace with your actual dataset URL
    dataset_url = "https://cs231n.stanford.edu/tiny-imagenet-200.zip"
    zip_path = "./tiny-imagenet-200.zip"

    # Download and extract
    print("Downloading dataset...")
    download_file(dataset_url, zip_path)

    if not os.path.exists(temp_dataset_path):
        os.makedirs(temp_dataset_path)
        print("Temp dataset directory created")
    else:
        print("Temp dataset directory already exists")

    print("Extracting dataset...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(temp_dataset_path)

    # Clean up zip file
    os.remove(zip_path)
    print("Dataset downloaded and extracted successfully")
else:
    print("Dataset directory is not empty, skipping download")

Transform the Tiny ImageNet dataset into a hierarchical folder structure supported by MediaPipe Model Maker, where each class is represented by a folder named after its label (e.g., `tarantula`) inside the `./dataset` directory. Images for each class are copied into their respective folders, enabling easy ingestion for training custom image classifiers.

In [None]:
import shutil

tiny_imagenet_path = f"./{temp_dataset_path}/tiny-imagenet-200"  # Path to the extracted Tiny ImageNet directory
mp_train_data_path = dataset_path      # Path where you want to create the MediaPipe format data

# Create the target directory if it doesn't exist
os.makedirs(mp_train_data_path, exist_ok=True)

# Load the word to WNID mapping
words_path = os.path.join(tiny_imagenet_path, 'words.txt')
wnid_to_words = {}
with open(words_path, 'r') as f:
    for line in f:
        wnid, word = line.strip().split('\t')
        wnid_to_words[wnid] = word.split(',')[0].replace(' ', '_') # Use the first word and replace spaces with underscores

train_path = os.path.join(tiny_imagenet_path, 'train')
wnids = os.listdir(train_path)

for wnid in wnids:
    if os.path.isdir(os.path.join(train_path, wnid)):
        image_folder = os.path.join(train_path, wnid, 'images')
        if os.path.isdir(image_folder):
            class_label = wnid_to_words.get(wnid)
            if class_label:
                target_class_folder = os.path.join(mp_train_data_path, class_label)
                os.makedirs(target_class_folder, exist_ok=True)
                for filename in os.listdir(image_folder):
                    if filename.endswith('.JPEG'):
                        source_path = os.path.join(image_folder, filename)
                        destination_path = os.path.join(target_class_folder, filename)
                        shutil.copy2(source_path, destination_path) # Copy images, preserving metadata
                print(f"Processed class: {class_label} ({wnid})")
            else:
                print(f"Warning: No word found for WNID: {wnid}")

print("Finished reorganizing the training data.")

⚠️ **Warning:** Make sure to add the tray dataset in the `dataset` folder before proceeding. The tray dataset should go to `instrument-tray` folder.

In [None]:
if not os.path.exists(os.path.join(dataset_path, 'instrument-tray')):
    raise FileNotFoundError(
        "The 'instrument-tray' folder is missing in the dataset directory. " 
        "Please add the tray dataset to the 'instrument-tray' folder before proceeding."
    )
print("Found instrument-tray folder in the dataset directory")

In [None]:
import os
from datetime import datetime

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
export_folder_name = f"./{exported_model_path}/trays_{timestamp}"

[Set retraining options](https://ai.google.dev/edge/mediapipe/solutions/customization/image_classifier#set_retraining_options)

In [None]:
from mediapipe_model_maker import image_classifier
spec = image_classifier.SupportedModels.EFFICIENTNET_LITE2
hparams = image_classifier.HParams(export_dir=export_folder_name, batch_size=16)
options = image_classifier.ImageClassifierOptions(supported_model=spec, hparams=hparams)

In [None]:
if not os.path.exists(export_folder_name):
    os.makedirs(export_folder_name, exist_ok=True)
    print(f"Export directory created: {export_folder_name}")
else:
    print(f"Export directory already exists: {export_folder_name}")
    # Optionally, you can add logic to handle existing directories (e.g., rename or skip)

[Create dataset](https://ai.google.dev/edge/mediapipe/solutions/customization/image_classifier#create_dataset)

In [None]:
image_path = dataset_path
data = image_classifier.Dataset.from_folder(image_path)
train_data, remaining_data = data.split(0.8)
test_data, validation_data = remaining_data.split(0.5)

[Run retraining](https://ai.google.dev/edge/mediapipe/solutions/customization/image_classifier#run_retraining)

In [None]:
model = image_classifier.ImageClassifier.create(
    train_data = train_data,
    validation_data = validation_data,
    options=options,
)

[Evaluate performance](https://ai.google.dev/edge/mediapipe/solutions/customization/image_classifier#evaluate_performance)

In [None]:
loss, acc = model.evaluate(test_data)
print(f'Test loss:{loss}, Test accuracy:{acc}')

[Export model](https://ai.google.dev/edge/mediapipe/solutions/customization/image_classifier#export_model)

In [None]:
from mediapipe_model_maker import quantization
quantization_config = quantization.QuantizationConfig.for_int8(train_data)
model.export_model(model_name=f"trays_model_{spec.name}.tflite", quantization_config=quantization_config)