**Experiment 1**

En aquest notebook es realitza l'entrenament d'un model YOLO (versió 8) per a la base de dades original de fruites sintètiques. Inclou la configuració inicial, el carregament i preprocessament de les dades, l'entrenament i l'avaluació del rendiment del model amb diverses mètriques.

In [None]:
# GPU's information
!nvidia-smi

In [None]:
# Install the ultralytics package
!pip install ultralytics

In [None]:
# Import the necessary libraries
from ultralytics import YOLO
import os
import matplotlib.pyplot as plt
from IPython.display import display, Image
from IPython import display
display.clear_output()
!yolo mode= yolo checks

In [None]:
#Import the dataset from Roboflow
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="api_key")
project = rf.workspace("fruitesd").project("fd-ndbok")
version = project.version(1)
dataset = version.download("yolov8")

In [None]:
import os
import subprocess

# Define the epochs and batch size combinations
epochs_list = [50, 100, 150]
batch_sizes = [4, 16, 32]
patience_dict = {50: 10, 100: 15, 150: 20}

# Path to the data's configuration file
data_file = '/content/FD-1/data.yaml'
model_file = 'yolov8m.pt'

# Base directory to save the results
base_results_dir = '/content/training_result'
os.makedirs(base_results_dir, exist_ok=True)

# Train with each combination of epochs and batch size
for epochs in epochs_list:
    for batch_size in batch_sizes:
        run_name = f'epochs_{epochs}_batch_{batch_size}'
        patience = patience_dict[epochs]

        command = [
            'yolo', 'task=detect', 'mode=train',
            f'model={model_file}',
            f'data={data_file}',
            f'epochs={epochs}',
            'imgsz=640',
            f'batch={batch_size}',
            f'patience={patience}',
            f'name={run_name}',
            f'project={base_results_dir}'
        ]

        # Execute the training command
        subprocess.run(command)

        print(f'Entrenament completat per {epochs} epochs i batch size {batch_size}')

        # Validation
        command_val = [
            'yolo', 'task=detect', 'mode=val',
            f'model={os.path.join(base_results_dir, run_name, "weights/best.pt")}',  #path to the trained model
            f'data={data_file}',
            'imgsz=640',
            f'project={base_results_dir}',
            f'name={run_name}_val'
        ]
        subprocess.run(command_val)

        print(f'Validació completada per {epochs} epochs i batch size {batch_size}')


In [None]:
# To save the results for future uses
import shutil
# Compress the folder of base_results_dir in a ZIP file
shutil.make_archive('/content/trs', 'zip', '/content/training_result')

In [None]:
from google.colab import drive
drive.mount('/content/drive')
# Copy the ZIP file in Google Drive
!cp /content/trs.zip /content/drive/MyDrive/tr.zip

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Path to the results folder
results_base_dir = '/content/training_result'

# Initialization of the different combinations of epochs and batch sizes
combinations = [(50, 4), (50, 16), (50, 32), (100, 4), (100, 16), (100, 32), (150, 4), (150, 16), (150, 32)]

# Initialize the data
epochs_data = []
batch_data = []
map50_95_data = []
precision_data = []
recall_data = []
train_box_loss_data = []
train_cls_loss_data = []
val_box_loss_data = []
val_cls_loss_data = []

# Read the results of each training
for epochs, batch_size in combinations:
    run_name = f'epochs_{epochs}_batch_{batch_size}'
    results_file = os.path.join(results_base_dir, run_name, 'results.csv')

    # Check if the results file exists
    if os.path.exists(results_file):
        results = pd.read_csv(results_file)

        # Clean the column names
        results.columns = results.columns.str.strip()

        # Use the columns to obtain the metrics
        try:
            map50_95 = results['metrics/mAP50-95(B)'].iloc[-1]
            precision = results['metrics/precision(B)'].iloc[-1]
            recall = results['metrics/recall(B)'].iloc[-1]
            train_box_loss = results['train/box_loss'].iloc[-1]
            train_cls_loss = results['train/cls_loss'].iloc[-1]
            val_box_loss = results['val/box_loss'].iloc[-1]
            val_cls_loss = results['val/cls_loss'].iloc[-1]
        except KeyError as e:
            print(f'The column {e} does not exist in {results_file}')
            continue

        epochs_data.append(epochs)
        batch_data.append(batch_size)
        map50_95_data.append(map50_95)
        precision_data.append(precision)
        recall_data.append(recall)
        train_box_loss_data.append(train_box_loss)
        train_cls_loss_data.append(train_cls_loss)
        val_box_loss_data.append(val_box_loss)
        val_cls_loss_data.append(val_cls_loss)
    else:
        print(f'The file {results_file} does not exist')

# Create separate plots for each batch size
batch_sizes = set(batch_data)

for batch_size in batch_sizes:
    fig, ax = plt.subplots(1, 1, figsize=(10, 5))

    # Plot Train and Validation Loss for current batch size
    batch_indices = [i for i, b in enumerate(batch_data) if b == batch_size]
    batch_epochs = [epochs_data[i] for i in batch_indices]
    batch_train_box_loss = [train_box_loss_data[i] for i in batch_indices]
    batch_val_box_loss = [val_box_loss_data[i] for i in batch_indices]
    batch_train_cls_loss = [train_cls_loss_data[i] for i in batch_indices]
    batch_val_cls_loss = [val_cls_loss_data[i] for i in batch_indices]

    ax.plot(batch_epochs, batch_train_box_loss, label=f'Train Box Loss Batch Size {batch_size}')
    ax.plot(batch_epochs, batch_val_box_loss, label=f'Val Box Loss Batch Size {batch_size}', linestyle='--')
    ax.plot(batch_epochs, batch_train_cls_loss, label=f'Train Cls Loss Batch Size {batch_size}', linestyle='-.')
    ax.plot(batch_epochs, batch_val_cls_loss, label=f'Val Cls Loss Batch Size {batch_size}', linestyle=':')
    ax.set_xlabel('Epochs')
    ax.set_ylabel('Loss')
    ax.set_title(f'Train and Validation Loss vs Epochs for Batch Size {batch_size}')
    ax.legend()

    plt.tight_layout()
    plt.show()

# Create combined plots
fig, ax = plt.subplots(2, 1, figsize=(10, 15))

# Plot mAP50-95
for batch_size in set(batch_data):
    batch_indices = [i for i, b in enumerate(batch_data) if b == batch_size]
    batch_epochs = [epochs_data[i] for i in batch_indices]
    batch_map50_95 = [map50_95_data[i] for i in batch_indices]

    ax[0].plot(batch_epochs, batch_map50_95, label=f'Batch Size {batch_size}')

ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('mAP50-95')
ax[0].set_title('mAP50-95 vs Epochs for Different Batch Sizes')
ax[0].legend()

# Plot precision and recall
for batch_size in set(batch_data):
    batch_indices = [i for i, b in enumerate(batch_data) if b == batch_size]
    batch_epochs = [epochs_data[i] for i in batch_indices]
    batch_precision = [precision_data[i] for i in batch_indices]
    batch_recall = [recall_data[i] for i in batch_indices]

    ax[1].plot(batch_epochs, batch_precision, label=f'Precision Batch Size {batch_size}')
    ax[1].plot(batch_epochs, batch_recall, label=f'Recall Batch Size {batch_size}', linestyle='--')

ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Precision/Recall')
ax[1].set_title('Precision and Recall vs Epochs for Different Batch Sizes')
ax[1].legend()

plt.tight_layout()
plt.show()
