# Establish a Baseline Training and Validation Pipeline

## Initial Setup

In [None]:
!pip install -U ultralytics wandb

In [None]:
import os
import yaml

from ultralytics import YOLO

import wandb
from wandb.integration.ultralytics import add_wandb_callback

## Initialize a WandB Run

In [None]:
wandb.init(project="object-detection-bdd", job_type="baseline")

## Fetch and preprocess the dataset

We first fetch the bdd-100k dataset hosted as a W&B dataset artifact.

In [None]:
artifact = wandb.use_artifact(
    "reviewco/object-detection-bdd/bdd100k-ultralytics-format:latest", type="dataset"
)
artifact_dir = artifact.download()

Next, we preprocess the `data.yaml` file in the artifact, to set the absolute path to the dataset. This file would be used by Ultralytics to register the dataset and build the dataloader for training and validation.

In [None]:
metadata_file = os.path.join(artifact_dir, "data.yaml")
with open(metadata_file, "r") as yaml_file:
    metadata = yaml.safe_load(yaml_file)
metadata["path"] = os.path.join(
    os.path.dirname(os.path.realpath(__file__)), artifact_dir
)
with open(metadata_file, "w") as yaml_file:
    yaml.dump(metadata, yaml_file)

## Training and Validation

In [None]:
# Define the YOLO model and load the respective pre-trained checkpoint
model = YOLO("yolov8n.pt")

# Add the wandb callback for ultralytics, this would enable us to
# utilize several advanced visualization features.
add_wandb_callback(model, enable_model_checkpointing=True)

In [None]:
# Train the model
model.train(data=metadata_file, epochs=5, imgsz=640)

# Validate the model
model.val()

# Finish the experiment
wandb.finish()