In [None]:
!pip install tensorboard
!pip install -qqq accelerate==0.28.0
!pip install ultralytics
!pip install fsspec==2024.10.0
!pip install -qqq datasets
!pip install torch


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.1/290.1 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ultralytics
  Downloading ultralytics-8.3.27-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.10-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.27-py3-none-any.whl (878 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m879.0/879.0 kB[0m [31m51.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.10-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.27 ultralytics-thop-2.0.10
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!pip install -U -q PyDrive
!pip install -U -q PyDrive tqdm
!pip install tqdm



In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Google Drive 내에서 파일 경로를 확인합니다.
!ls /content/drive/MyDrive/

# 확인된 zip 파일 경로를 설정합니다.
zip_file_path = '/content/drive/MyDrive/dataset.zip'  # 실제 zip 파일 경로로 변경
extract_to_path = '/dataset/data'  # 압축을 풀 폴더 경로 설정

import zipfile
import os
from tqdm import tqdm

# 압축을 풀 폴더가 존재하지 않으면 생성합니다.
if not os.path.exists(extract_to_path):
    os.makedirs(extract_to_path)

# zip 파일 압축 해제
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    # 파일 리스트를 얻습니다.
    file_list = zip_ref.namelist()

    # tqdm을 사용하여 진행 상황을 표시합니다.
    for file in tqdm(file_list, desc="Extracting"):
        zip_ref.extract(file, extract_to_path)

print('Zip file extracted successfully.')

Mounted at /content/drive
'Colab Notebooks'   dataset   dataset.zip   tensorboard_logs


Extracting: 100%|██████████| 9363/9363 [05:43<00:00, 27.25it/s]  

Zip file extracted successfully.





In [None]:
import os
import random
import numpy as np
import torch
import psutil
from ultralytics import YOLO
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import matplotlib.pyplot as plt
import time

# 시드 설정 함수 정의
def set_seed(seed_value=42):
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    random.seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 시드 설정
set_seed()

# CPU 코어 수 확인
cpu_count = psutil.cpu_count(logical=True)
print(f"CPU 코어 수: {cpu_count}")

# 메모리 용량 확인
memory_info = psutil.virtual_memory()
total_memory = memory_info.total / (1024 ** 3)  # GB 단위로 변환
print(f"총 메모리 용량: {total_memory:.2f} GB")

# 워커 수 자동 할당
# 메모리 사용량을 고려하여 워커 수를 설정 (여기서는 메모리의 1/8을 사용하도록 설정)
# 이 값은 필요에 따라 조정할 수 있습니다.
memory_limit = total_memory / 8  # 사용할 메모리의 1/8
worker_memory_usage = 0.5  # 워커 당 메모리 사용량 (예상 값, GB)
max_workers_by_memory = int(memory_limit / worker_memory_usage)

# CPU 코어 수와 메모리 제한 중 최소값 선택
workers = min(cpu_count * 2, max_workers_by_memory)
print(f"자동 할당된 워커 수: {workers}")

# TensorBoard 로그 디렉토리 설정
log_dir = '/content/drive/MyDrive/tensorboard_logs'
writer = SummaryWriter(log_dir)

# 모델 로드
model = YOLO("yolo11n.pt")

# 모델 학습 전 시간 기록
start_time = time.time()

# 모델 학습
results = model.train(
    data='/dataset/data/dataset.yaml',  # 데이터셋 경로
    epochs=30,  # 학습 에폭 수
    patience=10,
    imgsz=640,  # 이미지 크기
    optimizer='AdamW',  # 옵티마이저 설정
    lr0=0.001,  # 학습률 조정
    freeze=[-1],  # 동결된 레이어 확인
    workers=workers,  # 자동 할당된 데이터 로더 워커 수
    project=log_dir,  # TensorBoard 로그 디렉토리 설정
    name='train_experiment',  # 실험 이름 설정
    cache=False,  # 메모리 문제로 인해 캐시 비활성화
    batch=16,  # 배치 크기
    verbose=True,  # 상세 로그 출력
    deterministic=True,  # 재현성을 위해 결정론적 동작 설정
    seed=42,  # 시드 설정
)

# 학습 종료 후 시간 기록
end_time = time.time()
total_time = end_time - start_time
print(f"학습 시간: {total_time / 60:.2f}분")

# 학습 결과 시각화
def plot_results(log_dir):
    results_file = os.path.join(log_dir, 'train_experiment', 'results.csv')
    if os.path.exists(results_file):
        results = pd.read_csv(results_file)

        # 손실 값 시각화
        plt.figure(figsize=(10, 6))
        plt.plot(results['epoch'], results['box_loss'], label='Box Loss')
        plt.plot(results['epoch'], results['cls_loss'], label='Class Loss')
        plt.plot(results['epoch'], results['dfl_loss'], label='DFL Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training Loss over Epochs')
        plt.legend()
        plt.grid(True)
        plt.show()

        # 정확도 시각화 (필요시 사용, 예: mAP)
        plt.figure(figsize=(10, 6))
        plt.plot(results['epoch'], results['mAP50'], label='mAP@0.5')
        plt.plot(results['epoch'], results['mAP50-95'], label='mAP@0.5:0.95')
        plt.xlabel('Epoch')
        plt.ylabel('mAP')
        plt.title('Validation mAP over Epochs')
        plt.legend()
        plt.grid(True)
        plt.show()
    else:
        print(f"Results file not found at {results_file}")

plot_results(log_dir)


CPU 코어 수: 2
총 메모리 용량: 12.67 GB
자동 할당된 워커 수: 3
Ultralytics 8.3.27 🚀 Python-3.10.12 torch-2.5.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=/dataset/data/dataset.yaml, epochs=30, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=3, project=/content/drive/MyDrive/tensorboard_logs, name=train_experiment11, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=42, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=[-1], multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_

[34m[1mtrain: [0mScanning /dataset/data/labels/train... 3227 images, 0 backgrounds, 12 corrupt: 100%|██████████| 3228/3228 [08:25<00:00,  6.38it/s]

[34m[1mtrain: [0mNew cache created: /dataset/data/labels/train.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
[34m[1mval: [0mScanning /dataset/data/labels/val... 1450 images, 0 backgrounds, 4 corrupt: 100%|██████████| 1450/1450 [03:47<00:00,  6.37it/s]






[34m[1mval: [0mNew cache created: /dataset/data/labels/val.cache
Plotting labels to /content/drive/MyDrive/tensorboard_logs/train_experiment11/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1m/content/drive/MyDrive/tensorboard_logs/train_experiment11[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30      2.63G      1.129      2.759      1.372         54        640: 100%|██████████| 201/201 [07:39<00:00,  2.28s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [03:07<00:00,  4.07s/it]


                   all       1446       2853      0.594      0.496      0.481      0.368

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30      2.43G      1.103      1.864      1.342         81        640: 100%|██████████| 201/201 [07:33<00:00,  2.26s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:44<00:00,  3.58s/it]


                   all       1446       2853      0.719      0.508       0.56      0.436

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/30      2.43G      1.078       1.67      1.328         59        640: 100%|██████████| 201/201 [07:43<00:00,  2.31s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:42<00:00,  3.52s/it]


                   all       1446       2853      0.704      0.583      0.604      0.455

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/30      2.45G      1.065      1.577      1.324         79        640: 100%|██████████| 201/201 [07:29<00:00,  2.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:47<00:00,  3.63s/it]


                   all       1446       2853      0.794      0.625      0.673      0.542

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/30      2.43G      1.011      1.445      1.295         73        640: 100%|██████████| 201/201 [08:01<00:00,  2.40s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:41<00:00,  3.51s/it]


                   all       1446       2853      0.834      0.616      0.688      0.556

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/30      2.43G     0.9923       1.38      1.279         76        640: 100%|██████████| 201/201 [07:32<00:00,  2.25s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:47<00:00,  3.64s/it]


                   all       1446       2853      0.812      0.623      0.671      0.549

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/30      2.45G     0.9391      1.305      1.244         71        640: 100%|██████████| 201/201 [07:29<00:00,  2.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:47<00:00,  3.65s/it]


                   all       1446       2853      0.862      0.648      0.706      0.586

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/30      2.43G     0.9318      1.268      1.237         75        640: 100%|██████████| 201/201 [07:35<00:00,  2.27s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:45<00:00,  3.59s/it]


                   all       1446       2853      0.861       0.66      0.709      0.603

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/30      2.43G     0.9055      1.204      1.216         75        640: 100%|██████████| 201/201 [07:29<00:00,  2.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:45<00:00,  3.59s/it]


                   all       1446       2853      0.872      0.678      0.727      0.618

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/30      2.43G     0.8716      1.164      1.202         78        640: 100%|██████████| 201/201 [07:31<00:00,  2.25s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:44<00:00,  3.57s/it]


                   all       1446       2853      0.851      0.691      0.732      0.636

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/30      2.45G     0.8465      1.114      1.182         70        640: 100%|██████████| 201/201 [07:33<00:00,  2.25s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:45<00:00,  3.60s/it]


                   all       1446       2853       0.88      0.681      0.732      0.632

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/30      2.46G     0.8376      1.101      1.181         78        640: 100%|██████████| 201/201 [07:24<00:00,  2.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:42<00:00,  3.53s/it]


                   all       1446       2853      0.873      0.684      0.735      0.634

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/30      2.45G     0.8337       1.09      1.176         61        640: 100%|██████████| 201/201 [07:23<00:00,  2.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:46<00:00,  3.61s/it]


                   all       1446       2853      0.884      0.692      0.737      0.638

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/30      2.45G     0.8221      1.076      1.178         64        640: 100%|██████████| 201/201 [07:17<00:00,  2.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:42<00:00,  3.53s/it]


                   all       1446       2853      0.884      0.698      0.741      0.644

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/30      2.43G     0.7963      1.028      1.159         78        640: 100%|██████████| 201/201 [07:17<00:00,  2.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:43<00:00,  3.56s/it]


                   all       1446       2853      0.867      0.698      0.742      0.645

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/30      2.43G     0.7846     0.9958      1.142         73        640: 100%|██████████| 201/201 [07:34<00:00,  2.26s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:44<00:00,  3.58s/it]


                   all       1446       2853      0.898      0.695      0.748      0.655

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/30      2.43G     0.7658     0.9752      1.144         95        640: 100%|██████████| 201/201 [07:24<00:00,  2.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:46<00:00,  3.62s/it]


                   all       1446       2853      0.933      0.698      0.752      0.658

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/30      2.43G     0.7518     0.9482      1.126         83        640: 100%|██████████| 201/201 [07:27<00:00,  2.23s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:45<00:00,  3.60s/it]


                   all       1446       2853      0.897      0.713      0.757      0.668

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/30      2.43G     0.7445     0.9412      1.127         73        640: 100%|██████████| 201/201 [07:27<00:00,  2.23s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:42<00:00,  3.53s/it]


                   all       1446       2853      0.884      0.719       0.76      0.672

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/30      2.45G     0.7385     0.9241      1.121         77        640: 100%|██████████| 201/201 [07:24<00:00,  2.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:42<00:00,  3.53s/it]


                   all       1446       2853      0.896      0.713      0.752      0.666
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/30      2.43G     0.7943     0.9693      1.155         35        640: 100%|██████████| 201/201 [08:17<00:00,  2.47s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:40<00:00,  3.50s/it]


                   all       1446       2853       0.86      0.723      0.758      0.672

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/30      2.43G     0.7793     0.9124      1.149         35        640: 100%|██████████| 201/201 [07:17<00:00,  2.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [02:43<00:00,  3.55s/it]


                   all       1446       2853      0.861      0.727      0.761      0.675

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/30      2.43G     0.7473     0.8605      1.132         29        640:  42%|████▏     | 85/201 [03:09<04:07,  2.13s/it]