In [1]:
import ast
import os
import shutil

import numpy as np
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import Image, clear_output
from sklearn.model_selection import StratifiedKFold, train_test_split
from tqdm import tqdm
InteractiveShell.ast_node_interactivity = "all"

In [8]:
# shutil.rmtree('/app/_data/jpg_transformed')

In [2]:
# !git clone https://github.com/ultralytics/yolov5  # clone repo
# clear_output()

In [3]:
os.chdir("/app/_data/yolov5")

In [4]:
!pip install -r requirements.txt
clear_output()

In [5]:
# df = pd.read_csv("/app/_data/transformed_bboxes_df.csv")
df = pd.read_csv("/app/_data/bboxes.csv")
labels = pd.read_csv('/app/_data/additional_dataset/labels_full.csv')

In [6]:
labels['Target'].value_counts()
labels[(labels['Target']==0)&(labels['x_center']).isna()].shape

0    20672
1     9555
Name: Target, dtype: int64

(20672, 17)

In [7]:
labels['img'] = labels['patientId']
labels['bbox'] = labels['Target']
labels['width'] = labels['w']
labels['height'] = labels['h']
labels['label'] = labels['bbox'].replace({0:'negative', 1:'positive'})
labels = labels[df.columns[1:]]
labels.head()

Unnamed: 0,img,label,bbox,x_center,y_center,width,height,class
0,0004cfab-14fd-4e49-80ba-63a80b6bddd6,negative,0,,,,,0
1,00313ee0-9eaa-42f4-b0ab-c148ed3241cd,negative,0,,,,,0
2,00322d4d-1c29-4943-afc9-b6754be640eb,negative,0,,,,,0
3,003d8fa0-6bf1-40ed-b54c-ac657f8495c5,negative,0,,,,,0
4,00436515-870c-4b36-a041-de91049b9ab4,positive,1,0.361816,0.333496,0.208008,0.370117,0


In [8]:
labels_1 = labels.query('bbox==1')
labels_0 = labels.query('bbox==0').sample(1000)


In [9]:
df = pd.concat([df[df.columns[1:]], labels_1, labels_0], axis=0, ignore_index=True).sample(frac=1, random_state=42).reset_index(drop=True)

In [10]:
df.shape

(20144, 8)

In [11]:
base_config = {
    "IMG_HEIGH": 640,
    "IMG_WIDTH": 640,
    "BATCH_SIZE": 20,
    "SEED": 1488,
}

In [12]:
groupped_data =pd.pivot_table(df,index='img', values=['label', 'bbox'],aggfunc='first')
groupped_data['img'] = groupped_data.index.tolist()
groupped_data = groupped_data.reset_index(drop=True)

In [13]:
groupped_data

Unnamed: 0,bbox,label,img
0,0,negative,0004cfab-14fd-4e49-80ba-63a80b6bddd6
1,1,typical,000a312787f2
2,0,negative,000c3a3f293f
3,1,positive,000db696-cf54-4385-b10b-6b16fbb3f985
4,1,positive,000fe35a-2649-43d4-b027-e67796d412e0
...,...,...,...
13037,1,typical,ffd9b6cf2961
13038,1,typical,ffdc682f7680
13039,1,typical,ffe942c8655f
13040,1,positive,fff0b503-72a5-446a-843d-f3d152e39053


In [14]:
groupped_data['bbox'].value_counts()

1    10306
0     2736
Name: bbox, dtype: int64

In [15]:
skf = StratifiedKFold(n_splits=5, random_state=base_config["SEED"], shuffle=True)
train_ids = []
val_ids = []
for train_index, valid_index in skf.split(groupped_data, groupped_data['label']):
    train_ids.append(train_index)
    val_ids.append(valid_index)

In [16]:
m=0
train_imgs = groupped_data.loc[train_ids[m]]['img'].tolist()
val_imgs = groupped_data.loc[val_ids[m]]['img'].tolist()


In [17]:
def make_yolo_dataset(df, train_imgs, val_imgs, fold_name):
    path = "/app/_data/yolo5_dataset/" + fold_name
    img_path = os.path.join(path, "images")
    labels_path = os.path.join(path, "labels")
    for dir_name in ["train", "val"]:
        os.makedirs(os.path.join(img_path, dir_name))
        os.makedirs(os.path.join(labels_path, dir_name))
    for img_name in tqdm(df['img'].unique().tolist()):
        new_df = df[df['img']==img_name].reset_index(drop=True)
        if img_name in train_imgs:
            dir_name = "train"
        elif img_name in val_imgs:
            dir_name = "val"
        shutil.copy(
            "/app/_data/jpg/" + img_name + ".jpg",
            os.path.join(img_path, dir_name, img_name + ".jpg"),
        )
        if new_df.loc[0,'bbox']==1:
            boxes = new_df[['class', 'x_center','y_center','width','height']].values
            list_boxes = []
            for box in boxes:
                list_boxes.append(
                    [
                        str(box[0]),
                        str(box[1]),
                        str(box[2]),
                        str(box[3]),
                        str(box[4]),
                    ]
                )
            with open(os.path.join(labels_path, dir_name, img_name + ".txt"), "w") as f:
                for j in range(len(list_boxes)):
                    text = " ".join(list_boxes[j])
                    f.write(text)
                    f.write("\n")

In [20]:
# shutil.rmtree('/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/')

In [21]:
make_yolo_dataset(df, train_imgs, val_imgs, "yolo5l6_skf_voi_1")

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 13042/13042 [00:59<00:00, 218.86it/s]


In [22]:
len(os.listdir("/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/labels/train"))
len(os.listdir("/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/images/train"))
len(os.listdir("/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/labels/val"))
len(os.listdir("/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/images/val"))

8245

10433

2061

2609

In [23]:
# customize iPython writefile so we can write variables

from IPython.core.magic import register_line_cell_magic


@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, "w") as f:
        f.write(cell.format(**globals()))

In [24]:
if not os.path.exists("/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/DataFile"):
    os.mkdir("/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/DataFile")

In [25]:
%%writetemplate /app/_data/yolo5_dataset/yolo5l6_skf_voi_1/DataFile/data.yaml
# train and val data
train: /app/_data/yolo5_dataset/yolo5l6_skf_voi_1/images/train
val: /app/_data/yolo5_dataset/yolo5l6_skf_voi_1/images/val
# number of classes
nc: 1
# class names
names: ["opacity"]

In [26]:
%%writetemplate /app/_data/yolo5_dataset/yolo5l6_skf_voi_1/DataFile/customYOLOv5l6.yaml
# parameters
nc: 1  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple

# anchors
anchors:
  - [ 19,27,  44,40,  38,94 ]  # P3/8
  - [ 96,68,  86,152,  180,137 ]  # P4/16
  - [ 140,301,  303,264,  238,542 ]  # P5/32
  - [ 436,615,  739,380,  925,792 ]  # P6/64

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [ [ -1, 1, Focus, [ 64, 3 ] ],  # 0-P1/2
    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
    [ -1, 3, C3, [ 128 ] ],
    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
    [ -1, 9, C3, [ 256 ] ],
    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
    [ -1, 9, C3, [ 512 ] ],
    [ -1, 1, Conv, [ 768, 3, 2 ] ],  # 7-P5/32
    [ -1, 3, C3, [ 768 ] ],
    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 9-P6/64
    [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
    [ -1, 3, C3, [ 1024, False ] ],  # 11
  ]

# YOLOv5 head
head:
  [ [ -1, 1, Conv, [ 768, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 8 ], 1, Concat, [ 1 ] ],  # cat backbone P5
    [ -1, 3, C3, [ 768, False ] ],  # 15

    [ -1, 1, Conv, [ 512, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
    [ -1, 3, C3, [ 512, False ] ],  # 19

    [ -1, 1, Conv, [ 256, 1, 1 ] ],
    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
    [ -1, 3, C3, [ 256, False ] ],  # 23 (P3/8-small)

    [ -1, 1, Conv, [ 256, 3, 2 ] ],
    [ [ -1, 20 ], 1, Concat, [ 1 ] ],  # cat head P4
    [ -1, 3, C3, [ 512, False ] ],  # 26 (P4/16-medium)

    [ -1, 1, Conv, [ 512, 3, 2 ] ],
    [ [ -1, 16 ], 1, Concat, [ 1 ] ],  # cat head P5
    [ -1, 3, C3, [ 768, False ] ],  # 29 (P5/32-large)

    [ -1, 1, Conv, [ 768, 3, 2 ] ],
    [ [ -1, 12 ], 1, Concat, [ 1 ] ],  # cat head P6
    [ -1, 3, C3, [ 1024, False ] ],  # 32 (P6/64-xlarge)

    [ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4, P5, P6)
  ]

In [27]:
# if os.path.exists('/app/_data/yolov5/runs/train/yolov5l6_voi_0'):
#     shutil.rmtree('/app/_data/yolov5/runs/train/yolov5l6_voi_0')

In [None]:
%%time
!python train.py --img 640 \
                 --batch 20 \
                 --epochs 150 \
                 --data /app/_data/yolo5_dataset/yolo5l6_skf_voi_1/DataFile/data.yaml \
                 --cfg /app/_data/yolo5_dataset/yolo5l6_skf_voi_1/DataFile/customYOLOv5l6.yaml \
                 --weights yolov5l6.pt  \
                 --name yolov5l6_voi_1 --cache

[34m[1mgithub: [0mskipping check (Docker image), for updates see https://github.com/ultralytics/yolov5
YOLOv5 ðŸš€ v5.0-150-gabb2a96 torch 1.8.1+cu111 CUDA:0 (NVIDIA GeForce RTX 3090, 24268.3125MB)

Namespace(adam=False, artifact_alias='latest', batch_size=20, bbox_interval=-1, bucket='', cache_images=True, cfg='/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/DataFile/customYOLOv5l6.yaml', data='/app/_data/yolo5_dataset/yolo5l6_skf_voi_1/DataFile/data.yaml', device='', entity=None, epochs=150, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[640, 640], label_smoothing=0.0, linear_lr=False, local_rank=-1, multi_scale=False, name='yolov5l6_voi_1', noautoanchor=False, nosave=False, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/yolov5l6_voi_1', save_period=-1, single_cls=False, sync_bn=False, total_batch_size=20, upload_dataset=False, weights='yolov5l6.pt', workers=8, world_size=1)
[34m

In [None]:
# shutil.rmtree('/app/_data/yolov5/runs/train/yolov5x_trans_0')

In [None]:
Image("./runs/train/Result15/results.png")

In [None]:
Image("/app/_data/Yolov5/yolov5/runs/train/Result15/confusion_matrix.png", width=600)

In [None]:
!python detect.py --img-size 640  --conf 0.3 --source /app/_data/yolo5_dataset/tr_test_42/images/val/ --weights /app/_data/Yolov5/yolov5/runs/train/Result15/weights/best.pt --augment --save-txt --save-conf

In [None]:
Image("/app/_data/Yolov5/yolov5/runs/detect/exp/ffdc682f7680.jpg")

In [None]:
for i in os.listdir("/app/_data/Yolov5/yolov5/runs/detect/exp2/labels/"):
    if ".jpg" not in i:
        print(i)

In [None]:
!pip uninstall wandb
clear_output()