In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
import os
train_images_folder = "/content/drive/MyDrive/Colab Notebooks/FrogImages/Train/images"
print(len(os.listdir(train_images_folder)))

In [None]:
#Install ultralytics library for YOLO
!pip install --upgrade ultralytics -qq

In [None]:
#Check for success of ultralytics library installation
import ultralytics
print(ultralytics.__version__)

In [None]:
import os
import re
import glob
import random
import yaml

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
import shutil

import IPython.display as display
from PIL import Image
import cv2

from ultralytics import YOLO

In [None]:
"""Create CFG class to configure training"""

class CFG:
    DEBUG = False
    FRACTION = 0.05 if DEBUG else 1.0
    SEED = 88

    #Classification class info
    CLASSES = ["Frog"]
    NUM_CLASSES = len(CLASSES)

    #Training
    EPOCHS = 3 if DEBUG else 50
    BATCH_SIZE = 32

    BASE_MODEL = "yolov8x"
    BASE_MODEL_WEIGHTS = f'{BASE_MODEL}.pt'
    EXP_NAME = f"ppe_css_{EPOCHS}_epochs"

    OPTIMIZER = "Adam"
    LR = 1e-3
    LR_FACTOR = 0.01
    WEIGHT_DECAY = 5e-4
    DROPOUT = 0.3
    PATIENCE = 15
    PROFILE = False
    LABEL_SMOOTHING = 0.0

    #paths
    CUSTOM_DATASET_DIR =  "/content/drive/MyDrive/Colab Notebooks/FrogImages"
    OUTPUT_DIR = "/content/drive/MyDrive/Colab Notebooks/FrogImages"

In [None]:
#Create YAML file for training
dict_file = {
    'train': os.path.join(CFG.CUSTOM_DATASET_DIR, 'Train'),
    'val': os.path.join(CFG.CUSTOM_DATASET_DIR, 'Val'),
    'nc': CFG.NUM_CLASSES,
    'names': CFG.CLASSES
}

print(os.path.join(CFG.OUTPUT_DIR, "data.yaml"))
with open(os.path.join(CFG.OUTPUT_DIR, "data.yaml"), "w+") as file:
    yaml.dump(dict_file, file)

In [None]:
#Read YAML file
def read_yaml(file_path=CFG.CUSTOM_DATASET_DIR):
    with open(file_path, "r") as file:
        try:
            data = yaml.safe_load(file)
            return data
        except yaml.YAMLError as e:
            print("Error reading YAML:", e)
            return None

def print_yaml(data):
    formatted_yaml = yaml.dump(data, default_style=False)
    print(formatted_yaml)

yaml_path = os.path.join(CFG.OUTPUT_DIR, "data.yaml")
yaml_data = read_yaml(yaml_path)
if yaml_data:
    print_yaml(yaml_data)

In [None]:
def display_image(image, print_info = True, hide_axis = False):
    if isinstance(image, str):  # Check if it's a file path
        img = Image.open(image)
        plt.imshow(img)
    elif isinstance(image, np.ndarray):  # Check if it's a NumPy array
        image = image[..., ::-1]  # BGR to RGB
        img = Image.fromarray(image)
        plt.imshow(img)
    else:
        raise ValueError("Unsupported image format")

    if print_info:
        print('Type: ', type(img), '\n')
        print('Shape: ', np.array(img).shape, '\n')

    if hide_axis:
        plt.axis('off')

    plt.show()

In [None]:
example_image_path = '/content/drive/MyDrive/Colab Notebooks/FrogImages/Train/images/frogs1.jpg'
display_image(example_image_path, print_info = True, hide_axis = False)

In [None]:
def plot_random_images_from_folder(folder_path, num_images=20, seed=CFG.SEED):

    random.seed(seed)

    # Get a list of image files in the folder
    image_files = [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.png', '.jpeg', '.gif'))]

    # Ensure that we have at least num_images files to choose from
    if len(image_files) < num_images:
        raise ValueError("Not enough images in the folder")

    # Randomly select num_images image files
    selected_files = random.sample(image_files, num_images)

    # Create a subplot grid
    num_cols = 5
    num_rows = (num_images + num_cols - 1) // num_cols
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 8))

    for i, file_name in enumerate(selected_files):
        # Open and display the image using PIL
        img = Image.open(os.path.join(folder_path, file_name))

        if num_rows == 1:
            ax = axes[i % num_cols]
        else:
            ax = axes[i // num_cols, i % num_cols]

        ax.imshow(img)
        ax.axis('off')
        # ax.set_title(file_name)

    # Remove empty subplots
    for i in range(num_images, num_rows * num_cols):
        if num_rows == 1:
            fig.delaxes(axes[i % num_cols])
        else:
            fig.delaxes(axes[i // num_cols, i % num_cols])

    plt.tight_layout()
    plt.show()

In [None]:
folder_path = CFG.CUSTOM_DATASET_DIR + '/Train/images'
plot_random_images_from_folder(folder_path, num_images=20, seed=CFG.SEED)
# plot_random_images_from_folder(folder_path, num_images=20, seed=54)

In [None]:
def get_image_properties(image_path):
    # Read the image file
    img = cv2.imread(image_path)

    # Check if the image file is read successfully
    if img is None:
        raise ValueError("Could not read image file")

    # Get image properties
    properties = {
        "width": img.shape[1],
        "height": img.shape[0],
        "channels": img.shape[2] if len(img.shape) == 3 else 1,
        "dtype": img.dtype,
    }

    return properties

In [None]:
img_properties = get_image_properties(example_image_path)
img_properties

In [None]:
for mode in ['train', 'valid']:
    print(f'\nImage sizes in {mode} set:')

    img_size = 0
    for file in glob.glob(os.path.join(CFG.CUSTOM_DATASET_DIR, mode, '*')):
        if file.endswith(".jpg"):
          image = Image.open(file)
          if image.size != img_size:
              #print(f'{image.size}')
              img_size = image.size
              #print('\n')

In [None]:
#Check pretrained model's accuracy with dataset
model = YOLO(CFG.BASE_MODEL_WEIGHTS)

results = model.predict(
    source=example_image_path,
    classes=[0],
    conf=0.3,
    device=0, #Only works when GPU is on
    imgsz=(img_properties["height"], img_properties["width"]),
    save=True,
    save_txt=True,
    save_conf=True,
    exist_ok=True
)

In [None]:
### check predictions with base model
example_image_inference_output = example_image_path.split('/')[-1]
display_image(f'/content/runs/detect/predict/{example_image_inference_output}')

In [None]:
#Output basic training configuration
print('Model: ', CFG.BASE_MODEL_WEIGHTS)
print('Epochs: ', CFG.EPOCHS)
print('Batch: ', CFG.BATCH_SIZE)

In [None]:
### Load pre-trained YOLO model
model = YOLO(CFG.BASE_MODEL_WEIGHTS)

In [None]:
### train
model.train(
    data = os.path.join(CFG.OUTPUT_DIR, 'data.yaml'),

    task = 'detect',

    imgsz = (img_properties['height'], img_properties['width']),

    epochs = CFG.EPOCHS,
    batch = CFG.BATCH_SIZE,
    optimizer = CFG.OPTIMIZER,
    lr0 = CFG.LR,
    lrf = CFG.LR_FACTOR,
    weight_decay = CFG.WEIGHT_DECAY,
    dropout = CFG.DROPOUT,
    fraction = CFG.FRACTION,
    patience = CFG.PATIENCE,
    profile = CFG.PROFILE,
    label_smoothing = CFG.LABEL_SMOOTHING,

    name = f'{CFG.BASE_MODEL}_{CFG.EXP_NAME}',
    seed = CFG.SEED,

    val = True,
    amp = True,
    exist_ok = True,
    resume = False,
    device = 0,
    verbose = False,
)

In [None]:
#Test previous model without actually training
WEIGHTS = '/content/drive/MyDrive/Colab Notebooks/best frogs (4).pt'
model =  YOLO(WEIGHTS)

In [None]:
import random
results_image_path = "/content/drive/MyDrive/Colab Notebooks/FrogImages/Test"
results_images = [file for file in os.listdir(results_image_path) if file.endswith(".jpg")]
#example_image_path = os.path.join(results_image_path, random.choice(results_images))
#example_image_path = os.path.join(results_image_path, "waftgreentreefrogamplexis-bg.jpg")
for img in results_images:
  example_image_path = os.path.join(results_image_path, img)
  results = model.predict(
      source=example_image_path,
      classes=[0],
      conf=0.5,
      device='cpu', #Only works when GPU is on
      imgsz=(img_properties["height"], img_properties["width"]),
      save=True,
      save_txt=True,
      save_conf=True,
      exist_ok=True
  )

In [None]:
df = pd.read_csv(f'runs/detect/{CFG.BASE_MODEL}_{CFG.EXP_NAME}/results.csv')
df = df.rename(columns=lambda x: x.replace(" ", ""))
df.to_csv(f'{CFG.OUTPUT_DIR}training_log_df.csv', index=False)
df

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 15), sharex=True)

### Training and Validation Box Loss
ax1.set_title('Box Loss')
ax1.plot(df['epoch'], df['train/box_loss'], label='Training box_loss', marker='o', linestyle='-')
ax1.plot(df['epoch'], df['val/box_loss'], label='Validation box_loss', marker='o', linestyle='-')
ax1.set_ylabel('Box Loss')
ax1.legend()
ax1.grid(True)

### Training and Validation cls_loss
ax2.set_title('Cls Loss')
ax2.plot(df['epoch'], df['train/cls_loss'], label='Training cls_loss', marker='o', linestyle='-')

ax2.plot(df['epoch'], df['val/cls_loss'], label='Validation cls_loss', marker='o', linestyle='-')
ax2.set_ylabel('cls_loss')
ax2.legend()
ax2.grid(True)

### Training and Validation dfl_loss
ax3.set_title('DFL Loss')
ax3.plot(df['epoch'], df['train/dfl_loss'], label='Training dfl_loss', marker='o', linestyle='-')
ax3.plot(df['epoch'], df['val/dfl_loss'], label='Validation dfl_loss', marker='o', linestyle='-')
ax3.set_xlabel('Epochs')
ax3.set_ylabel('dfl_loss')
ax3.legend()
ax3.grid(True)

plt.suptitle('Training Metrics vs. Epochs')
plt.show()

In [None]:
import matplotlib.pyplot as plt

df = pd.read_csv(f'runs/detect/{CFG.BASE_MODEL}_{CFG.EXP_NAME}/results.csv')
df = df.rename(columns=lambda x: x.replace(" ", ""))
df.to_csv(f'{CFG.OUTPUT_DIR}training_log_df.csv', index=False)

# Extract metrics for plotting
epochs = df['epoch']
train_mAP50 = df['metrics/mAP50(B)']
val_mAP50 = df['metrics/mAP50(B)']
train_mAP95 = df['metrics/mAP95(B)']
val_mAP95 = df['metrics/mAP95(B)']

# Create the plot
plt.figure(figsize=(10, 5))
plt.plot(epochs, train_mAP50, label='Train mAP50')
plt.plot(epochs, val_mAP50, label='Validation mAP50')
plt.plot(epochs, train_mAP95, label='Train mAP95')
plt.plot(epochs, val_mAP95, label='Validation mAP95')

# Add labels and title
plt.xlabel('Epoch')
plt.ylabel('mAP')
plt.title('mAP50 and mAP95 over Epochs')
plt.legend()
plt.grid(True)

# Show the plot
plt.show()

# Print the final mAP values
final_mAP50 = val_mAP50.iloc[-1]
final_mAP95 = val_mAP95.iloc[-1]
print(f'Final Validation mAP50: {final_mAP50:.4f}')
print(f'Final Validation mAP95: {final_mAP95:.4f}')