In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
import zipfile
import shutil
import os
import glob
import random
from datetime import datetime
import yaml

from tqdm import tqdm

# 1.데이터 준비

## 1-1 경로 설정

In [None]:
from google.colab import drive

drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [None]:
data_path = "/content/gdrive/MyDrive/"

ROOT_DIR = "/content"

DATA_ROOT_DIR = os.path.join(ROOT_DIR, "damage_detection")

TRAIN_DATA_ROOT_DIR = os.path.join(DATA_ROOT_DIR, "train")
VALID_DATA_ROOT_DIR = os.path.join(DATA_ROOT_DIR, "valid")

In [None]:
print(TRAIN_DATA_ROOT_DIR)
print(DATA_ROOT_DIR)

/content/damage_detection/train
/content/damage_detection


## 1-2 train용 images파일 labels파일 준비

In [None]:
shutil.copy(os.path.join(data_path, "damage_train_images_20000.zip"), ROOT_DIR)

'/content/damage_train_images_20000.zip'

In [None]:
shutil.copy(os.path.join(data_path, "damage_train_labels_20000.zip"), ROOT_DIR)

'/content/damage_train_labels_20000.zip'

In [None]:
shutil.copy(os.path.join(data_path, "best.pt"), ROOT_DIR)

'/content/best.pt'

In [None]:
with zipfile.ZipFile(os.path.join(ROOT_DIR, "damage_train_images_20000.zip")) as target_file:
  target_file.extractall(TRAIN_DATA_ROOT_DIR)
  print("success!")

success!


In [None]:
old_folder_name = os.path.join(TRAIN_DATA_ROOT_DIR, "damage")

# 새로운 폴더 이름
new_folder_name = os.path.join(TRAIN_DATA_ROOT_DIR, "images")

# 폴더 이름 재설정
os.rename(old_folder_name, new_folder_name)


In [None]:
with zipfile.ZipFile(os.path.join(ROOT_DIR, "damage_train_labels_20000.zip")) as target_file:
  target_file.extractall(TRAIN_DATA_ROOT_DIR)
  print("success!")


success!


### train 용 images파일과 label파일의 이름이 맞는지 set()으로 확인

In [None]:
TRAIN_DATA_IMAGE_ROOT_DIR = os.path.join(TRAIN_DATA_ROOT_DIR, "images")
TRAIN_DATA_LABEL_ROOT_DIR = os.path.join(TRAIN_DATA_ROOT_DIR, "labels")

VALID_DATA_IMAGE_ROOT_DIR = os.path.join(VALID_DATA_ROOT_DIR, "images")
VALID_DATA_LABEL_ROOT_DIR = os.path.join(VALID_DATA_ROOT_DIR, "labels")

In [None]:
print("train/images 폴더의 파일 갯수: ", len(os.listdir(TRAIN_DATA_IMAGE_ROOT_DIR)))   # (train/images) 파일의 갯수: 508964
print("train/labels 폴더의 파일 갯수: ", len(os.listdir(TRAIN_DATA_LABEL_ROOT_DIR)))   # (valid/labels)  파일의 갯수: 508964

image_file_set = ()        # 집합현산을 위해 빈 set() 만들어주기
label_file_set = ()

image_file_name_list = []
label_file_name_list = []

for file in os.listdir(TRAIN_DATA_IMAGE_ROOT_DIR):
  image_file_name_list.append(os.path.splitext(file)[0])   # [0426795_as-0099618, img] 에서 [0]이면 이름 부분

for file in os.listdir(TRAIN_DATA_LABEL_ROOT_DIR):
  label_file_name_list.append(os.path.splitext(file)[0])   # [0426795_as-0099618, txt] 에서 [0]이면 이름 부분

image_file_set = set(image_file_name_list)
label_file_set = set(label_file_name_list)
print("동일한 이름을 갖는 img파일과 txt파일의 갯수: ", len(image_file_set & image_file_set))

train/images 폴더의 파일 갯수:  18144
train/labels 폴더의 파일 갯수:  18144
동일한 이름을 갖는 img파일과 txt파일의 갯수:  18144


## 1-3 validation 용 images, labels 파일 준비

In [None]:
shutil.copy(os.path.join(data_path, "damage_valid_images_20000.zip"), ROOT_DIR)

'/content/damage_valid_images_20000.zip'

In [None]:
shutil.copy(os.path.join(data_path, "damage_valid_labels_20000.zip"), ROOT_DIR)

'/content/damage_valid_labels_20000.zip'

In [None]:
with zipfile.ZipFile(os.path.join(ROOT_DIR, "damage_valid_images_20000.zip")) as target_file:
  target_file.extractall(VALID_DATA_ROOT_DIR)
  print("success!")

success!


In [None]:
with zipfile.ZipFile(os.path.join(ROOT_DIR, "damage_valid_labels_20000.zip")) as target_file:
  target_file.extractall(VALID_DATA_ROOT_DIR)
  print("success!")

success!


### valid image 파일과 label 파일의 이름이 맞는지 set()으로 확인

In [None]:
print("valid/images 폴더의 파일 갯수: ", len(os.listdir(VALID_DATA_IMAGE_ROOT_DIR)))   # (valid/images) 파일의 갯수: 101792
print("valid/labels 폴더의 파일 갯수: ", len(os.listdir(VALID_DATA_LABEL_ROOT_DIR)))   # (valid/labels)  파일의 갯수: 101792

image_file_set = ()        # 집합연산을 위해 빈 set() 만들어주기
label_file_set = ()

image_file_name_list = []
label_file_name_list = []

for file in os.listdir(VALID_DATA_IMAGE_ROOT_DIR):
  image_file_name_list.append(os.path.splitext(file)[0])   # [0426795_as-0099618, img] 에서 [0]이면 이름 부분

for file in os.listdir(VALID_DATA_LABEL_ROOT_DIR):
  label_file_name_list.append(os.path.splitext(file)[0])   # [0426795_as-0099618, txt] 에서 [0]이면 이름 부분

image_file_set = set(image_file_name_list)
label_file_set = set(label_file_name_list)
print("동일한 이름을 갖는 img파일과 txt파일의 갯수: ", len(image_file_set & image_file_set))

valid/images 폴더의 파일 갯수:  3217
valid/labels 폴더의 파일 갯수:  3217
동일한 이름을 갖는 img파일과 txt파일의 갯수:  3217


# 2.Yolov5 source 가져오기

In [None]:
YOLOv5_ROOT_DIR = os.path.join(ROOT_DIR, "yolov5")

%rm -rf {YOLOv5_ROOT_DIR}

%cd {ROOT_DIR}

!git clone https://github.com/ultralytics/yolov5

/content
Cloning into 'yolov5'...
remote: Enumerating objects: 15814, done.[K
remote: Counting objects: 100% (46/46), done.[K
remote: Compressing objects: 100% (44/44), done.[K
remote: Total 15814 (delta 9), reused 23 (delta 2), pack-reused 15768[K
Receiving objects: 100% (15814/15814), 14.64 MiB | 1.58 MiB/s, done.
Resolving deltas: 100% (10821/10821), done.


## 2-1 yolov5 필수라이브러리 설치

In [None]:
!pip install -r {os.path.join(YOLOv5_ROOT_DIR, "requirements.txt")}

Collecting gitpython>=3.1.30 (from -r /content/yolov5/requirements.txt (line 5))
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/184.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from -r /content/yolov5/requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting ultralytics>=8.0.111 (from -r /content/yolov5/requirements.txt (line 18))
  Downloading ultralytics-8.0.128-py3-none-any.whl (625 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m625.9/625.9 kB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython>=3.1.30->-r /content/yolov5/requirements.txt (line 5))
  Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/

## 2-2 Pretrained 모델 설치

In [None]:
!wget -P /content/yolov5/ https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5m.pt

--2023-07-06 04:05:48--  https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5m.pt
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/264818686/0c5931c4-1273-4bc0-bd56-5c9da71cd35b?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230706%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230706T040548Z&X-Amz-Expires=300&X-Amz-Signature=6b85af2cdfa0868f5afc228529522b12ff048588a472a4c4a49a804f989ae284&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=264818686&response-content-disposition=attachment%3B%20filename%3Dyolov5m.pt&response-content-type=application%2Foctet-stream [following]
--2023-07-06 04:05:48--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/264818686/0c5931c4-1273-4bc0-bd56-5c9da71cd35b?X-Amz-Algorithm=AW

## 2-3 Yaml 파일 설정

In [None]:
import yaml

data = {
    'train': os.path.join(DATA_ROOT_DIR, "train/images"),
    'val': os.path.join(DATA_ROOT_DIR, "valid/images"),
    'nc': 4,
    'names': ['Crushed', 'Scratched', 'Breakage','Separated']
}

# data.yaml 파일로 저장
with open('/content/damage_detection/data.yaml', 'w') as file:
    yaml.dump(data, file)

# 3.학습

In [None]:
img_size = 576
batch_size = 32
epochs = 30

data_path = os.path.join(DATA_ROOT_DIR, "data.yaml")                # data에 대한 yaml파일
yaml_path = os.path.join(YOLOv5_ROOT_DIR, "models/yolov5s.yaml")    # 훈련해둔 yolo모델에 대한 yaml파일
weights_path = os.path.join(YOLOv5_ROOT_DIR, "yolov5m.pt")          # 훈련해둔 모델에 대한 정보

!python3 {os.path.join(YOLOv5_ROOT_DIR, "train.py")} \
     --img {img_size}     \
     --batch {batch_size} \
     --epochs {epochs}    \
     --data {data_path}   \
     --cfg {yaml_path}    \
     --weights {weights_path}

[34m[1mtrain: [0mweights=/content/yolov5/yolov5m.pt, cfg=/content/yolov5/models/yolov5s.yaml, data=/content/damage_detection/data.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=30, batch_size=32, imgsz=576, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-192-g459dd49 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1

In [None]:
img_size = 576
batch_size = 32
epochs = 50

data_path = os.path.join(DATA_ROOT_DIR, "data.yaml")                # data에 대한 yaml파일
yaml_path = os.path.join(YOLOv5_ROOT_DIR, "models/yolov5s.yaml")    # 훈련해둔 yolo모델에 대한 yaml파일
weights_path = os.path.join(YOLOv5_ROOT_DIR, "yolov5m.pt")          # 훈련해둔 모델에 대한 정보
checkpoint_path = os.path.join(ROOT_DIR, "best.pt")

!python3 {os.path.join(YOLOv5_ROOT_DIR, "train.py")} \
     --img {img_size}     \
     --batch {batch_size} \
     --epochs {epochs}    \
     --data {data_path}   \
     --cfg {yaml_path}    \
     --weights {checkpoint_path}

[34m[1mtrain: [0mweights=/content/best.pt, cfg=/content/yolov5/models/yolov5s.yaml, data=/content/damage_detection/data.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=50, batch_size=32, imgsz=576, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-193-g485da42 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05

# 4.예측

In [None]:
test_image_list = glob.glob(os.path.join(ROOT_DIR, "test-image/*.*"))
print(len(test_image_list))

10


In [None]:
test_image_list.sort()

for i in range(len(test_image_list)):
  print("i = ", i, test_image_list[i])

i =  0 /content/test-image/0003413_as-0082529.jpg
i =  1 /content/test-image/0003883_sc-133816.jpg
i =  2 /content/test-image/0004227_sc-1033779.jpg
i =  3 /content/test-image/0004622_as-0068203.jpg
i =  4 /content/test-image/0004928_sc-151189.jpg
i =  5 /content/test-image/0005286_as-2544031.jpg
i =  6 /content/test-image/0005807_as-0059800.jpg
i =  7 /content/test-image/0006019_sc-121160.jpg
i =  8 /content/test-image/0006255_as-0022242.jpg
i =  9 /content/test-image/0006713_as-0029978.jpg


In [None]:
test_data_path = os.path.join(ROOT_DIR, "test-image")
best_weights_path = ("/content/gdrive/MyDrive/runs/train/exp/weights/best.pt")

!python3 {os.path.join(YOLOv5_ROOT_DIR, "detect.py")} \
        --weights {best_weights_path}   \
        --source {test_data_path} \
        --img   512   \
        --conf  0.2

[34m[1mdetect: [0mweights=['/content/gdrive/MyDrive/runs/train/exp/weights/best.pt'], source=/content/test-image, data=yolov5/data/coco128.yaml, imgsz=[512, 512], conf_thres=0.2, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=yolov5/runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-189-ga453a45 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
YOLOv5s summary: 157 layers, 7020913 parameters, 0 gradients, 15.8 GFLOPs
image 1/10 /content/test-image/0003413_as-0082529.jpg: 384x512 1 Crushed, 1 Scratched, 65.2ms
image 2/10 /content/test-image/0003883_sc-133816.jpg: 384x512 3 Scratcheds, 7.5ms
image 3/10 /content/test-image/0004227_sc-1033779.jpg: 384x512 (no detections), 7.5ms
image 4/10 /content/test-image/0