<a href="https://colab.research.google.com/github/kanbanyte/sepb/blob/collab/collab/object-detection-training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Check for NVIDIA GPU
!nvidia-smi

In [None]:
# Set up workspace directory

import os
ROOT_DIR = os.getcwd()
print("Root directory is: " + ROOT_DIR)

In [None]:
# Install Roboflow and Dataset (with private API key)
print("Installing Roboflow package")
%pip install roboflow --quiet

from roboflow import Roboflow
import getpass

api_key = getpass.getpass("Enter your Roboflow project API key: ")
rf = Roboflow(api_key=api_key)

# Ensure that the following matches the intended dataset source:
# - Worspace name
# - Project name
# - Dataset version
# - Format
print(f"Downloading dataset from Roboflow")
project = rf.workspace("sepb").project("chips-cropped")
dataset = project.version(10).download("yolov5")

print("Fixing incorrect paths in 'data.yaml'")
import yaml
dataset_yaml_path = os.path.join(dataset.location, "data.yaml")
with open(dataset_yaml_path) as data_yaml:
    doc = yaml.safe_load(data_yaml)
doc['train'] = "../train/images"
doc['val'] = "../valid/images"
with open(dataset_yaml_path, 'w') as data_yaml:
    yaml.dump(doc, data_yaml)

print("Finished fixing incorrect paths in 'data.yaml'")


# Download YOLO model and start training.


In [None]:
# Install dependencies from Ultralytics in quiet mode

print("Installing Ultralytics dependencies")
from IPython import display
%pip install ultralytics==8.0.159 --quiet
import ultralytics
ultralytics.checks()

# Install YOLO model
from ultralytics import YOLO

print("Cloning YOLOv5 from GitHub")
!git clone https://github.com/ultralytics/yolov5 --quiet
%pip install -r {ROOT_DIR}/yolov5/requirements.txt --quiet

In [None]:
# Configure the model training parameters

# TODO(HUY): This code is experimental, needs more understanding
def freeze_backbone(trainer):
    model = trainer.model
    frozen_layer_count = 10
    print(f"Freezing {frozen_layer_count} layers")
    freeze = [f'model.{x}.' for x in range(frozen_layer_count)]  # layers to freeze
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
            print(f'Freezing {k}')
            v.requires_grad = False
    print(f"{frozen_layer_count} layers are freezed.")


%cd {ROOT_DIR}

epochs_input = input(f"Enter the number of epochs (default: 30): ")
epochs = int(epochs_input) if epochs_input else 30

image_size_input = input(f"Enter the image size (default: 1000): ")
image_size = int(image_size_input) if image_size_input else 1000

model_choice = input("Enter the model choice (0 for small, 1 for medium, 2 for large, 3 for extra large): ")
if model_choice == "0":
    model_name = "yolov5su.pt"
elif model_choice == "1":
    model_name = "yolov5m.pt"
elif model_choice == "2":
    model_name = "yolov5l.pt"
elif model_choice == "3":
    model_name = "yolov5x.pt"
else:
    print("Invalid model choice. Please choose 0-3.")
    exit()

print(f"Training model {model_name} with {epochs} epochs")

# YOLO calculates the final learning rate as final_lr_factor * initial learning rate
final_lr_factor = 0.01

model = YOLO(model_name)
# model.add_callback("on_train_start", freeze_backbone)
model.train(data=dataset_yaml_path, epochs=epochs, imgsz=image_size, cache=True, lrf=final_lr_factor)

# Setup result output paths for subsequent cells
train_folders = [folder for folder in os.listdir(f'{ROOT_DIR}/runs/detect') if folder.startswith("train") and not folder.endswith(".zip")]

# Extract the indices from folder names and find the highest index
# Retraining will output results in new folders with the name format: "train<index>"
indices = [int(folder[len("train"):] if folder[len("train"):] else 0) for folder in train_folders]
highest_index = "" if max(indices) == 0 else max(indices)
result_folder_path = f'{ROOT_DIR}/runs/detect/train{highest_index}'


In [None]:
# Display the result summary

from IPython.display import Image, display

results_file_path = f'{result_folder_path}/results.png'
print(f"Displaying result summary from {results_file_path}")
display(Image(filename=results_file_path, width=2000))

confusion_mat_file_path = f'{result_folder_path}/confusion_matrix.png'
print(f"Displaying confusion matrix from {confusion_mat_file_path}")
display(Image(filename=confusion_mat_file_path, width=1600))

In [None]:
# Downloading models files and results in a ZIP folder
from datetime import datetime, timedelta
import locale
# Fix Google Collab occasional encoding error when using shell commands
locale.getpreferredencoding = lambda: "UTF-8"

# Format the zip file name with local Australian time
current_utc_time = datetime.utcnow()
dst_change = datetime(2023, 10, 1, 2, 0, 0)
time_difference = timedelta(hours=10) if current_utc_time < dst_change else timedelta(hours=11)
current_datetime = (current_utc_time + time_difference).strftime("%Y-%m-%dT%H-%M-%S")
zip_folder_path = f'{ROOT_DIR}/runs/detect/train-{current_datetime}'

# Avoid empty parent folders in zip file by cd-ing to the folder before zipping it
%cd {result_folder_path}
!zip -r {zip_folder_path} *

from google.colab import files

zip_file_size_mb = os.path.getsize(f'{zip_folder_path}.zip') / (1024 * 1024)
download = input(f"Press 'y' to download result zip folder ({zip_file_size_mb:.2f} MB) '{zip_folder_path}': ")
if download == 'y':
    files.download(f"{zip_folder_path}.zip")
