<a href="https://colab.research.google.com/github/shuheilocale/googlecolab-test/blob/main/yolov5_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. ライブラリimport

In [None]:
import os
import glob
import json
import yaml
import numpy as np
import pandas as pd
import cv2
from google.colab.patches import cv2_imshow
import xml.etree.ElementTree as ET

## 2. パス設定

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#trainとtestを含むGoogle Drive内のディレクトリを指定
INPUT_PATH = '/content/drive/MyDrive/'

train_image_paths = glob.glob(INPUT_PATH+'train/*.jpeg')
train_xml_paths = glob.glob(INPUT_PATH+'train/*.xml')
test_image_paths = glob.glob(INPUT_PATH+'test/*.jpeg')
len(train_image_paths), len(train_xml_paths), len(test_image_paths)

(300, 300, 190)

## 3. 画像データおよびアノテーションデータ変換用関数の定義

In [None]:
#画像二値化(白黒反転)、枠線削除する関数
def transform(img):
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  th, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)
  black = np.zeros_like(binary, dtype=np.uint8)
  black[90:890,300:1740] = binary[90:890,300:1740]
  return black

In [None]:
classes = ['circle', 'round_rectangle', 'rectangle', 'triangle']

# xmlファイルからtxtファイルへの変換用関数
def convert_annotation(xml_path, txt_path):
  bboxes = []
  in_file = open(xml_path)
  tree = ET.parse(in_file)
  root = tree.getroot()
  with open(txt_path, mode='w') as f:
    for obj in root.iter('object'):
      cls = obj.find('name').text
      cls_id = classes.index(cls)
      xmlbox = obj.find('bndbox')
      b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)),
           int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
      f.write(str(cls_id)+' '+' '.join([str(x) for x in convert_xyxy_to_yolo(b)])+'\n')

# 境界ボックス(bounding box)をyolo形式に変換する関数
def convert_xyxy_to_yolo(bbox, size=(2100, 1181)):
  xmin, ymin, xmax, ymax = bbox
  xcenter = (xmax+xmin)/2
  ycenter = (ymax+ymin)/2
  width = xmax-xmin
  height = ymax-ymin
  return xcenter/size[0], ycenter/size[1], width/size[0], height/size[1]

## 4. 画像データおよびアノテーションデータの変換

In [None]:
OUTPUT_PATH = '/content/transformed/'
os.makedirs(OUTPUT_PATH+'train', exist_ok=True)
os.makedirs(OUTPUT_PATH+'test', exist_ok=True)

In [None]:
for inpath in train_image_paths:
  outpath = inpath.replace(INPUT_PATH, OUTPUT_PATH)
  img = cv2.imread(inpath)
  img = transform(img)
  cv2.imwrite(outpath, img)

for inpath in train_xml_paths:
  outpath = inpath.replace(INPUT_PATH, OUTPUT_PATH)
  outpath = outpath.replace('.xml', '.txt')
  convert_annotation(inpath, outpath)

for inpath in test_image_paths:
  outpath = inpath.replace(INPUT_PATH, OUTPUT_PATH)
  img = cv2.imread(inpath)
  img = transform(img)
  cv2.imwrite(outpath, img)

## 5. YOLOv5のインストール、パラメータ設定

YOLOv5の実装に際し下記を参照しました。  
YOLOv5 github: https://github.com/ultralytics/yolov5  
チュートリアル: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data  
参考記事1: https://qiita.com/shoku-pan/items/31bf3c975b73db153121  
参考記事2: https://www.alpha.co.jp/blog/202108_02  

In [None]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt -q

Cloning into 'yolov5'...
remote: Enumerating objects: 9149, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 9149 (delta 0), reused 2 (delta 0), pack-reused 9143[K
Receiving objects: 100% (9149/9149), 9.75 MiB | 36.45 MiB/s, done.
Resolving deltas: 100% (6345/6345), done.
/content/yolov5
[K     |████████████████████████████████| 636 kB 8.5 MB/s 
[?25h

In [None]:
with open('data.yaml', 'w') as yf:
    yaml.dump({
        'train': '../transformed/train',
        'val': '../transformed/test',
        'nc':4,
        'names':['circle', 'round_rectangle', 'rectangle', 'triangle']
        }, yf, default_flow_style=False)
%cat data.yaml

names:
- circle
- round_rectangle
- rectangle
- triangle
nc: 4
train: ../transformed/train
val: ../transformed/test


## 6. モデルの学習

In [None]:
# --weights: 学習済みの重みを使う場合は'yolov5s.pt'などとする
!python train.py --data data.yaml --cfg yolov5s.yaml --weights '' --batch-size 8 --epochs 100

Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[34m[1mtrain: [0mweights=, cfg=yolov5s.yaml, data=data.yaml, hyp=data/hyps/hyp.scratch.yaml, epochs=100, batch_size=8, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=runs/train, entity=None, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=-1, artifact_alias=latest, local_rank=-1, freeze=0, patience=100
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v5.0-455-g59aae85 torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls

## 7. モデルによる切削穴の検出

In [None]:
#--save-txt: bounding box(yolo形式)をtxtファイルで保存
#--save-conf: confidence scoreをtxtファイルに追記
!python detect.py --source ../transformed/test/ --weights runs/train/exp/weights/best.pt --save-txt --save-conf
#実行結果がruns/detect/exp/labelsに保存されます

[34m[1mdetect: [0mweights=['runs/train/exp/weights/best.pt'], source=../transformed/test/, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False
YOLOv5 🚀 v5.0-455-g59aae85 torch 1.9.0+cu102 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)

Fusing layers... 
Model Summary: 224 layers, 7062001 parameters, 0 gradients, 16.4 GFLOPs
image 1/190 /content/transformed/test/001.jpeg: 384x640 3 rectangles, 3 triangles, Done. (0.007s)
image 2/190 /content/transformed/test/002.jpeg: 384x640 2 circles, 6 round_rectangles, 4 rectangles, 4 triangles, Done. (0.007s)
image 3/190 /content/transformed/test/003.jpeg: 384x640 3 circles, 1 round_rectangle, Done. (0.007s)
image 4/190 /content/transformed/test/004.jpeg: 384x640

## 8. 提出ファイルの出力

In [None]:
# 境界ボックス(bounding box)をyolo形式から変換する関数
def convert_yolo_to_xyxy(bbox, size=(2100, 1181)):
  xcenter, ycenter, width, height = bbox
  xmin = xcenter-width/2
  xmax = xcenter+width/2
  ymin = ycenter-height/2
  ymax = ycenter+height/2
  return xmin*size[0], ymin*size[1], xmax*size[0], ymax*size[1]

In [None]:
# 検出結果のtxtファイルを１つの辞書形式のデータに集約
TXT_PATH = '/content/yolov5/runs/detect/exp2/labels'
dict_to_json = {}
for text_file in os.listdir(TXT_PATH):
  if text_file.endswith('.txt'):
    data = {'objects': [], 'confidence_score': [], 'xmin': [], 'ymin': [], 'xmax': [], 'ymax': []}
    df_data = pd.DataFrame()
    f = open(TXT_PATH + '/' + text_file, 'r') 
    contents = f.readlines()

    for string in contents:
      list_of_contents = string.replace('\n', '').split(' ')
      data['objects'].append(classes[int(list_of_contents[0])])
      data['confidence_score'].append(float(list_of_contents[5]))
      xcenter, ycenter, width, height = float(list_of_contents[1]), float(list_of_contents[2]), float(list_of_contents[3]), float(list_of_contents[4])
      xmin, ymin, xmax, ymax = convert_yolo_to_xyxy((xcenter, ycenter, width, height))
      data['xmin'].append(xmin)
      data['ymin'].append(ymin)
      data['xmax'].append(xmax)
      data['ymax'].append(ymax)

    df_data = df_data.from_dict(data)
    df_data_sorted  = df_data.sort_values(by='confidence_score', axis=0, ascending=False)
    text_file = text_file.replace('txt', 'jpeg')
    dict_to_json[text_file] = dict()
        
    for data in df_data_sorted.values.tolist():
      if data[0] in dict_to_json[text_file]:
        dict_to_json[text_file][data[0]].append([data[2], data[3], data[4], data[5]])
      else:
        dict_to_json[text_file][data[0]] = [[data[2], data[3], data[4], data[5]]]

In [None]:
display(dict_to_json)

{'001.jpeg': {'rectangle': [[1068.9995399999998,
    431.99976150000003,
    1127.99946,
    568.0001785],
   [856.9997099999999, 432.99947799999995, 1023.9996900000001, 567.999588],
   [861.0007350000001, 360.00039175000006, 1057.000665, 412.00041225]],
  'triangle': [[752.9989949999999,
    383.00007150000005,
    847.999005,
    584.0003665],
   [1188.000765, 393.000189, 1286.000835, 609.0003650000001],
   [1071.9996, 272.99978285, 1282.9992000000002, 367.99977715]]},
 '002.jpeg': {'circle': [[1182.998985,
    372.999954,
    1278.9990149999999,
    472.999948],
   [1020.0002400000001, 363.99984825, 1121.00016, 465.99986575]],
  'rectangle': [[1306.99989, 248.9997961, 1370.9999100000002, 318.9997919],
   [933.00039, 253.00043359999998, 997.0004099999999, 319.0003824],
   [1300.999245, 513.0005361, 1369.999155, 583.0005318999999],
   [935.99898, 513.9996621, 1000.99902, 583.9996579]],
  'round_rectangle': [[1153.0006950000002,
    470.9995702,
    1290.0007050000002,
    537.9995898]

In [None]:
# 辞書形式のデータを提出フォーマットであるjsonファイルとして書き出す
SAVE_PATH = '/content/'
with open(SAVE_PATH+'submission.json', 'w') as contents: json.dump(dict_to_json, contents)