In [1]:
# !pip install ultralytics

In [2]:
# !pip uninstall torch torchvision torchaudio -y

In [3]:
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128

In [4]:
import yaml
from pathlib import Path

import torch

import ultralytics
from ultralytics import YOLO

In [5]:
!python --version

Python 3.10.14


In [6]:
!nvidia-smi

Tue Oct 28 22:57:21 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.52                 Driver Version: 576.52         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 5060 Ti   WDDM  |   00000000:2B:00.0  On |                  N/A |
|  0%   42C    P0             24W /  216W |     257MiB /   8151MiB |      1%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [7]:
ultralytics.__version__

'8.3.221'

In [8]:
!conda info --envs

# conda environments:
#
base                     C:\ProgramData\anaconda3
default-python-310    *  C:\ProgramData\anaconda3\envs\default-python-310
default-python-310       c:\ProgramData\anaconda3\envs\default-python-310



In [9]:
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())

if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))

2.9.0+cu128
12.8
True
NVIDIA GeForce RTX 5060 Ti


In [10]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 5060 Ti'

In [11]:
!set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"

print("Device:", device)

Device: cuda


In [13]:
data_yaml_path = "./data.yaml"

In [14]:
with open(data_yaml_path) as f:
    data = yaml.safe_load(f)

print("Classes:", data['names'])
print("Train images:", len(list(Path(data['train']).rglob("*.jpg"))))
print("Val images:", len(list(Path(data['val']).rglob("*.jpg"))))

Classes: ['wheel', 'fire', 'inside_rail', 'LEP', 'outside_rail', 'person', 'rail_switch', 'rail', 'unknown', 'pole', 'train', 'sign']
Train images: 79
Val images: 27


In [15]:
model = YOLO("yolo11s-seg.pt")

In [16]:
model.train(
  data=data_yaml_path,
    epochs=200,
    imgsz=712,
    batch=14,
    device=0,
    workers=4,
    optimizer='AdamW',
    cache='ram',
    lr0=0.005,
)

Ultralytics 8.3.221  Python-3.10.14 torch-2.9.0+cu128 CUDA:0 (NVIDIA GeForce RTX 5060 Ti, 8151MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=14, bgr=0.0, box=7.5, cache=ram, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=./data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=200, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=712, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.005, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11s-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=AdamW, overlap_mask=True, patience=100, perspective=0.0, plot

ultralytics.utils.metrics.SegmentMetrics object with attributes:

ap_class_index: array([ 0,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x000001830FF6B790>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', 'Precision-Recall(M)', 'F1-Confidence(M)', 'Precision-Confidence(M)', 'Recall-Confidence(M)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.03

In [17]:
metrics = model.val()

metrics

Ultralytics 8.3.221  Python-3.10.14 torch-2.9.0+cu128 CUDA:0 (NVIDIA GeForce RTX 5060 Ti, 8151MiB)
YOLO11s-seg summary (fused): 113 layers, 10,071,460 parameters, 0 gradients, 32.8 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 1055.4596.0 MB/s, size: 64.4 KB)
[K[34m[1mval: [0mScanning D:\Dev\Projects\cv-yolo-train-segment\dataset\valid\labels.cache... 27 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 27/27 27.0Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 2/2 0.8it/s 2.6s7.3s
                   all         27        219      0.568       0.43        0.4      0.235       0.54      0.389      0.328      0.162
                 wheel         13         16      0.464      0.438      0.428      0.287      0.464      0.438      0.367      0.132
           inside_rail         16         31        0.5      0.548      0.531      0.324      0.441   

ultralytics.utils.metrics.SegmentMetrics object with attributes:

ap_class_index: array([ 0,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000018615FF3F70>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', 'Precision-Recall(M)', 'F1-Confidence(M)', 'Precision-Confidence(M)', 'Recall-Confidence(M)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.03

In [18]:
results_pred = model.predict(source="./dataset/test/images", save=True)


image 1/25 d:\Dev\Projects\cv-yolo-train-segment\dataset\test\images\190227-03_jpg.rf.b7a4f22bc449516740722fdcaf07cd4b.jpg: 736x736 2 trains, 11.0ms
image 2/25 d:\Dev\Projects\cv-yolo-train-segment\dataset\test\images\1_jpg.rf.570bc327085ca0d30f6fb49f6abebb69.jpg: 736x736 1 train, 11.3ms
image 3/25 d:\Dev\Projects\cv-yolo-train-segment\dataset\test\images\2025-03-18T15_57_37_298_jpg.rf.5b75a9f3ad9534c2ffc4c8431a56e715.jpg: 736x736 1 inside_rail, 1 outside_rail, 1 person, 1 rail, 1 train, 11.0ms
image 4/25 d:\Dev\Projects\cv-yolo-train-segment\dataset\test\images\2025-03-18T15_57_37_299_jpg.rf.fe3a91219e584804180bf6c80cb6a34d.jpg: 736x736 2 outside_rails, 3 rails, 1 train, 10.8ms
image 5/25 d:\Dev\Projects\cv-yolo-train-segment\dataset\test\images\25_3_jpg.rf.5835e1259ab5863a273471c9bd978f55.jpg: 736x736 2 inside_rails, 2 LEPs, 1 outside_rail, 5 persons, 9 rails, 10.9ms
image 6/25 d:\Dev\Projects\cv-yolo-train-segment\dataset\test\images\2_jpg.rf.2cf748c946d93658e4caccac8a43ca65.jpg: 7