In [None]:
import numpy as np
import os

from tflite_model_maker.config import ExportFormat
from tflite_model_maker import model_spec
from tflite_model_maker import object_detector

import tensorflow as tf
assert tf.__version__.startswith('2')

tf.get_logger().setLevel('ERROR')
from absl import logging
logging.set_verbosity(logging.ERROR)

In [None]:
# Using Custom Dataset ### CHANGE ME IF NEEDED ###
print(os.getcwd())
dataset_path = "../../dataset/name_of_dataset"
images_path = dataset_path + "/images"
labels_path = dataset_path + "/Annotations"

# Label Map
label_map = {1: 'name_of_object'}

print(f"Images: {images_path}")
print(f"Labels: {labels_path}")

In [None]:
import os
import random
import shutil

def split_dataset(images_path, annotations_path, val_split, test_split, out_path):
  """Splits a directory of sorted images/annotations into training, validation, and test sets.

  Args:
    images_path: Path to the directory with your images (JPGs).
    annotations_path: Path to a directory with your VOC XML annotation files,
      with filenames corresponding to image filenames. This may be the same path
      used for images_path.
    val_split: Fraction of data to reserve for validation (float between 0 and 1).
    test_split: Fraction of data to reserve for test (float between 0 and 1).
  Returns:
    The paths for the split images/annotations (train_dir, val_dir, test_dir)
  """
  _, dirs, _ = next(os.walk(images_path))

  train_dir = os.path.join(out_path, 'train')
  val_dir = os.path.join(out_path, 'validation')
  test_dir = os.path.join(out_path, 'test')

  IMAGES_TRAIN_DIR = os.path.join(train_dir, 'images')
  IMAGES_VAL_DIR = os.path.join(val_dir, 'images')
  IMAGES_TEST_DIR = os.path.join(test_dir, 'images')
  os.makedirs(IMAGES_TRAIN_DIR, exist_ok=True)
  os.makedirs(IMAGES_VAL_DIR, exist_ok=True)
  os.makedirs(IMAGES_TEST_DIR, exist_ok=True)

  ANNOT_TRAIN_DIR = os.path.join(train_dir, 'annotations')
  ANNOT_VAL_DIR = os.path.join(val_dir, 'annotations')
  ANNOT_TEST_DIR = os.path.join(test_dir, 'annotations')
  os.makedirs(ANNOT_TRAIN_DIR, exist_ok=True)
  os.makedirs(ANNOT_VAL_DIR, exist_ok=True)
  os.makedirs(ANNOT_TEST_DIR, exist_ok=True)

  # Get all filenames for this dir, filtered by filetype
  filenames = os.listdir(os.path.join(images_path))
  filenames = [os.path.join(images_path, f) for f in filenames if (f.endswith('.jpg'))]
  # Shuffle the files, deterministically
  filenames.sort()
  random.seed(42)
  random.shuffle(filenames)
  # Get exact number of images for validation and test; the rest is for training
  val_count = int(len(filenames) * val_split)
  test_count = int(len(filenames) * test_split)
  for i, file in enumerate(filenames):
    source_dir, filename = os.path.split(file)
    annot_file = os.path.join(annotations_path, filename.replace("jpg", "xml"))
    if i < val_count:
      shutil.copy(file, IMAGES_VAL_DIR)
      shutil.copy(annot_file, ANNOT_VAL_DIR)
    elif i < val_count + test_count:
      shutil.copy(file, IMAGES_TEST_DIR)
      shutil.copy(annot_file, ANNOT_TEST_DIR)
    else:
      shutil.copy(file, IMAGES_TRAIN_DIR)
      shutil.copy(annot_file, ANNOT_TRAIN_DIR)
  return (train_dir, val_dir, test_dir)

In [None]:
def clean_xml_declaration(annotations_dir):
    for filename in os.listdir(annotations_dir):
        if filename.endswith('.xml'):
            path = os.path.join(annotations_dir, filename)
            with open(path, 'rb') as f:
                content = f.read()
            # Remove XML declaration if present
            if content.startswith(b'<?xml'):
                first_line_end = content.find(b'?>') + 2
                content = content[first_line_end:].lstrip()
            with open(path, 'wb') as f:
                f.write(content)

In [None]:
# Split data into train, validation, test
train_dir, val_dir, test_dir = split_dataset(images_path, labels_path, val_split=0.2, test_split=0.2, out_path='split-dataset')

# Clean data coming in from Label Studio
clean_xml_declaration(os.path.join(train_dir, 'annotations'))
clean_xml_declaration(os.path.join(val_dir, 'annotations'))
clean_xml_declaration(os.path.join(test_dir, 'annotations'))

train_data = object_detector.DataLoader.from_pascal_voc(
    os.path.join(train_dir, 'images'),
    os.path.join(train_dir, 'annotations'), label_map=label_map)

validation_data = object_detector.DataLoader.from_pascal_voc(
    os.path.join(val_dir, 'images'),
    os.path.join(val_dir, 'annotations'), label_map=label_map)

test_data = object_detector.DataLoader.from_pascal_voc(
    os.path.join(test_dir, 'images'),
    os.path.join(test_dir, 'annotations'), label_map=label_map)

In [None]:
# Select a model
spec = object_detector.EfficientDetLite0Spec()

In [None]:
# Train the model
model = object_detector.create(train_data=train_data, 
                               model_spec=spec, 
                               validation_data=validation_data, 
                               epochs=50, 
                               batch_size=10, 
                               train_whole_model=True)

In [None]:
metrics = model.evaluate(test_data)

In [None]:
import matplotlib.pyplot as plt

# Clean label names
labels = [k for k in metrics if not k.startswith('AP_/')]
values = [metrics[k] * 100 for k in labels]  # Convert to %

plt.figure(figsize=(10, 5))
plt.bar(labels, values)
plt.ylabel('Metric (%)')
plt.title('Model Evaluation Metrics (COCO)')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
TFLITE_FILENAME = 'name_of_model.tflite' # Rename output here!
LABELS_FILENAME = 'objects.txt'

model.export(export_dir='.', tflite_filename=TFLITE_FILENAME, label_filename=LABELS_FILENAME,
             export_format=[ExportFormat.TFLITE, ExportFormat.LABEL])

In [None]:
# Evaluate tflite file using test data
model.evaluate_tflite(TFLITE_FILENAME, test_data)