# 本番のデータを使ってモデルを作る

接続テスト

In [1]:
import torch

print(f"Pytorchのバージョン：{torch.__version__}")

print(f"CUDAの利用可否：{torch.cuda.is_available()}")

device_num:int = torch.cuda.device_count()
print(f"発見されたGPUデバイス数：{device_num}")
for idx in range(device_num):
    print(f"デバイス番号：{idx} | デバイス名：{torch.cuda.get_device_name(idx)}")

Pytorchのバージョン：2.1.0.dev20230719+cu121
CUDAの利用可否：True
発見されたGPUデバイス数：1
デバイス番号：0 | デバイス名：NVIDIA GeForce RTX 3060 Ti


必要なライブラリをインポート

In [2]:
# Standard libraries
import os
import random
import shutil

# Third-party libraries
import matplotlib.pyplot as plt
from ultralytics import YOLO

# Jupyter magic command
%matplotlib inline

必要な関数を定義

In [3]:
# rawdata → 複数のフォルダへ振り分ける関数
# 引数：folder1_path, folder1_ratio, folder2_path, folder2_ratio, ...

def split_and_copy_data(*args):
    
    if len(args) % 2 != 0 or len(args) < 4:
        raise ValueError("Arguments should be even and at least consist of 2 folder paths and their ratios.")
    
    total_ratio = sum(args[1::2])
    if not 0.99 <= total_ratio <= 1.01:  # Minor flexibility to handle floating point approximations
        raise ValueError("Sum of all ratios should be close to 1 (or 100%).")
    
    # 1. Count files in rawdata
    rawdata_path = "../datasets/rawdata"
    all_files = os.listdir(rawdata_path)
    paired_files = [f for f in all_files if f.endswith('.png') and f[:-4] + '.txt' in all_files]
    
    paired_files_count = len(paired_files)
    print(f"Total paired files: {paired_files_count}")
    
    # Shuffle and split the files
    random.shuffle(paired_files)
    
    last_index = 0
    for i in range(0, len(args), 2):
        folder_path = args[i]
        ratio = args[i+1]
        
        # Check if the target folder is empty. If not, delete its contents.
        if os.listdir(folder_path):
            for file in os.listdir(folder_path):
                os.remove(os.path.join(folder_path, file))
            print(f"Contents in {folder_path} were deleted to proceed.")
        
        count_files_for_current_folder = int(paired_files_count * ratio)
        selected_files = paired_files[last_index:last_index + count_files_for_current_folder]
        last_index += count_files_for_current_folder
        
        # Copy paired png and txt files
        for f in selected_files:
            shutil.copy(os.path.join(rawdata_path, f), folder_path)
            shutil.copy(os.path.join(rawdata_path, f[:-4] + '.txt'), folder_path)
    
    print("Finished copying files!")

# 実行例
# split_and_copy_data("../datasets/train", 0.8, "../datasets/valid", 0.2)
# split_and_copy_data("../datasets/train", 0.6, "../datasets/valid", 0.2, "../datasets/test", 0.2)


In [4]:
# 特定のフォルダ内のデータを削除する関数

def clear_data(directory_path):

    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # ファイルまたはシンボリックリンクを削除
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)  # ディレクトリを削除
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")
    
    # フォルダ名を取得してメッセージに使用
    directory_name = os.path.basename(directory_path)
    print(f"{directory_name} directory has been cleared!")

# 実行例
# clear_data("../datasets/train")
# clear_data("../datasets/valid")


モデルの作成

In [5]:
# rawdata → train,validへの振り分け
train_path = "../datasets/train"
valid_path = "../datasets/valid"
split_and_copy_data(train_path, 0.8, valid_path, 0.2)


# ベースとなるmodelの読み込み
model = YOLO('yolov8x.pt')


# ベースのmodelに追加学習
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

start.record()
model.train(data="../datasets/data_path.yaml", batch=8, epochs=50 ,device=0, project="9_22_1")
end.record()

torch.cuda.synchronize()
elapsed_time = start.elapsed_time(end) / 1000

print('---------------------------------')
print(f'学習時間 : {elapsed_time} 秒')


# 完成したmodelの性能を評価
metrics = model.val()


# train,valid内のデータを削除
clear_data(train_path)
clear_data(valid_path)

Total paired files: 1372
Contents in ../datasets/train were deleted to proceed.
Contents in ../datasets/valid were deleted to proceed.
Finished copying files!


New https://pypi.org/project/ultralytics/8.0.184 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.149 🚀 Python-3.10.12 torch-2.1.0.dev20230719+cu121 CUDA:0 (NVIDIA GeForce RTX 3060 Ti, 7940MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8x.pt, data=../datasets/data_path.yaml, epochs=50, patience=50, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=8, project=9_22_1, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=Fal

---------------------------------
学習時間 : 366.75040625 秒


Model summary (fused): 268 layers, 68128383 parameters, 0 gradients
[34m[1mval: [0mScanning /home/kotameyan/Programing/labo/workspace/datasets/valid.cache... 10 images, 0 backgrounds, 0 corrupt: 100%|██████████| 10/10 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:01<00:00,  1.26it/s]
                   all         10        103      0.955      0.854      0.952      0.566
             flowering         10        103      0.955      0.854      0.952      0.566
Speed: 0.1ms preprocess, 66.0ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1m9_22_1/val[0m


train directory has been cleared!
valid directory has been cleared!
