In [1]:
!pip install ultralytics==8.1.0
!pip install pandas numpy matplotlib seaborn scikit-learn pingouin

Collecting ultralytics==8.1.0
  Downloading ultralytics-8.1.0-py3-none-any.whl.metadata (39 kB)
Collecting thop>=0.1.1 (from ultralytics==8.1.0)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting hub-sdk>=0.0.2 (from ultralytics==8.1.0)
  Downloading hub_sdk-0.0.24-py3-none-any.whl.metadata (12 kB)
Downloading ultralytics-8.1.0-py3-none-any.whl (699 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m699.2/699.2 kB[0m [31m42.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading hub_sdk-0.0.24-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.9/44.9 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: hub-sdk, thop, ultralytics
  Attempting uninstall: ultralytics
    Found existing installation: ultralytics 8.3.228
    Uninstalling ultralytics-8.3.228:
      Successfully uninstalled ultralytics-8.3.228
Success

In [None]:
import itertools
import pandas as pd
import numpy as np
import time
import random
from ultralytics import YOLO
import matplotlib.pyplot as plt
import seaborn as sns

In [1]:
!git clone https://github.com/lisapraino/Deep_learning.git

fatal: destination path 'Deep_learning' already exists and is not an empty directory.


In [None]:
yaml_path = "/content/Deep_learning/alternative-dataset/Fridge_detection.v1i.yolov11/data.yaml"

if not os.path.exists(yaml_path):
    raise FileNotFoundError("File data.yaml not found.")
else:
    print("data.yaml found :", yaml_path)

In [None]:
MODEL_PATH = "yolov11s.pt"

base_model = YOLO(MODEL_PATH)

In [None]:
BATCH_SIZES = [8, 16, 32]
LEARNING_RATES = [1e-3, 1e-4, 1e-5]
EPOCHS = [25, 50, 100]

N_REPETITIONS = 5

In [None]:
factorial_combinations = list(itertools.product(BATCH_SIZES, LEARNING_RATES, EPOCHS))

print(f"Nombre total de combinaisons : {len(factorial_combinations)}")
print(f"Nombre total de runs (avec répétitions) : {len(factorial_combinations) * N_REPETITIONS}")

In [None]:
def run_experiment(batch_size, learning_rate, epochs, rep):

    model = YOLO(MODEL_PATH)

    start = time.time()

    train_results = model.train(
        data=yaml_path,
        epochs=epochs,
        batch=batch_size,
        lr0=learning_rate,
        imgsz=640,
        device=0
    )

    train_time = time.time() - start

    metrics = model.val()

    return {
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "epochs": epochs,
        "rep": rep,
        "mAP": metrics.box.map,
        "precision": metrics.box.mp,
        "recall": metrics.box.mr,
        "f1": metrics.box.f1,
        "train_time": train_time,
        "train_loss": train_results.results_dict.get("train/box_loss", None),
        "val_loss": train_results.results_dict.get("val/box_loss", None)
    }

In [None]:
all_results = []

for (batch_size, lr, epochs) in factorial_combinations:
    for rep in range(N_REPETITIONS):
        print(f"➡ Running: batch={batch_size}, lr={lr}, epochs={epochs}, rep={rep+1}")

        result = run_experiment(batch_size, lr, epochs, rep+1)
        all_results.append(result)

        pd.DataFrame(all_results).to_csv("results_autosave.csv", index=False)

In [None]:
df = pd.DataFrame(all_results)
df.to_csv("final_experiment_results.csv", index=False)
df.head()

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(data=df, x="batch_size", y="mAP")
plt.title("Distribution du mAP selon le Batch Size")
plt.show()

plt.figure(figsize=(10,6))
sns.boxplot(data=df, x="learning_rate", y="mAP")
plt.title("Distribution du mAP selon le Learning Rate")
plt.show()

plt.figure(figsize=(10,6))
sns.boxplot(data=df, x="epochs", y="mAP")
plt.title("Distribution du mAP selon le nombre d'epochs")
plt.show()

In [None]:
import pingouin as pg

anova = pg.anova(
    data=df,
    dv="mAP",
    between=["batch_size", "learning_rate", "epochs"],
    detailed=True
)
anova

In [None]:
import scipy.stats as stats
import matplotlib.pyplot as plt

stats.probplot(df["mAP"], dist="norm", plot=plt)
plt.title("Normal Probability Plot - mAP")
plt.show()

In [None]:
from scipy.stats import levene

levene(
    df[df["batch_size"] == 8]["mAP"],
    df[df["batch_size"] == 16]["mAP"],
    df[df["batch_size"] == 32]["mAP"]
)

In [None]:
best = df.sort_values(by="mAP", ascending=False).iloc[0]
best