In [None]:
import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt
from PIL import Image
import SimpleITK as sitk
import cv2

import os
from os import listdir, mkdir
from tqdm.auto import tqdm


In [None]:
df = pd.read_csv('/kaggle/input/data-covid-png-640/train.csv')

In [None]:
df['path'] = df['image'].apply(lambda x:'/kaggle/input/data-covid-png-640/train/'+ x + '.png')

添加opacity标签

In [None]:
df['image_label'] = 0
df['image_label'] = np.where(df['boxes'].isnull(),0,1)

### 生成训练集和验证级

In [None]:
from sklearn.model_selection import train_test_split
df_train, df_valid = train_test_split(df, test_size=0.2, random_state=42, stratify=df.image_label.values, shuffle=True)

df_train.loc[:, 'split'] = 'train'
df_valid.loc[:, 'split'] = 'valid'

df = pd.concat([df_train, df_valid])

### 模型下载

In [None]:
%cd /kaggle/working

In [None]:
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
# Install dependencies
%pip install -qr requirements.txt  # install dependencies

%cd ../
print(f"Setup complete. Using torch {torch.__version__}({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

In [None]:
import wandb
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()

# I have saved my API token with "wandb_api" as Label. 
# If you use some other Label make sure to change the same below. 
wandb_api = user_secrets.get_secret("wandb_key") 

wandb.login(key=wandb_api)

### 准备数据集

In [None]:
os.makedirs('/kaggle/covid/images/train', exist_ok=True)
os.makedirs('/kaggle/covid/images/valid', exist_ok=True)

os.makedirs('/kaggle/covid/labels/train', exist_ok=True)
os.makedirs('/kaggle/covid/labels/valid', exist_ok=True)

! ls /kaggle/covid/images

In [None]:
%cd /kaggle/working

In [None]:
IMG_SIZE = 640

for i in tqdm(df.index):
    image = cv2.imread(df.loc[i,'path'],0)
    if df.loc[i,'split']=='train':
        png_path = '/kaggle/covid/images/train/' + df.loc[i,'image'] +'.png'
    else:
        png_path = '/kaggle/covid/images/valid/' + df.loc[i,'image'] +'.png'
    cv2.imwrite(png_path,image,[int(cv2.IMWRITE_PNG_COMPRESSION), 0])  

In [None]:
# Get the raw bounding box by parsing the row value of the label column.
# Ref: https://www.kaggle.com/yujiariyasu/plot-3positive-classes
def get_bbox(row):
    bboxes = []
    bbox = []
    for i, l in enumerate(row.label.split(' ')):
        if (i % 6 == 0) | (i % 6 == 1):
            continue
        bbox.append(float(l))
        if i % 6 == 5:
            bboxes.append(bbox)
            bbox = []  
            
    return bboxes

# Scale the bounding boxes according to the size of the resized image. 
def scale_bbox(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.dim_w
    scale_y = IMG_SIZE/row.dim_h
    
    scaled_bboxes = []
    for bbox in bboxes:
        x = np.round(bbox[0]*scale_x, 4)
        y = np.round(bbox[1]*scale_y, 4)
        x1 = np.round(bbox[2]*(scale_x), 4)
        y1=  np.round(bbox[3]*scale_y, 4)

        scaled_bboxes.append([x, y, x1, y1]) # xmin, ymin, xmax, ymax
        
    return scaled_bboxes

# Convert the bounding boxes in YOLO format.
def get_yolo_format_bbox(IMG_SIZE, bboxes):
    yolo_boxes = []
    for bbox in bboxes:
        w = bbox[2] - bbox[0] # xmax - xmin
        h = bbox[3] - bbox[1] # ymax - ymin
        xc = bbox[0] + np.round(w/2) # xmin + width/2
        yc = bbox[1] + np.round(h/2) # ymin + height/2
        
        yolo_boxes.append([xc/IMG_SIZE, yc/IMG_SIZE, w/IMG_SIZE, h/IMG_SIZE]) # x_center y_center width height
    
    return yolo_boxes

In [None]:
# Prepare the txt files for bounding box
for i in tqdm(df.index):
    row = df.loc[i]
    # Get split
    split = row.split
    # Get image-level label
    label = row.image_label
    
    if row.split=='train':
        file_name = f'/kaggle/covid/labels/train/{row.image}.txt'
    else:
        file_name = f'/kaggle/covid/labels/valid/{row.image}.txt'
        
    
    if label == 1:
        # Get bboxes
        bboxes = get_bbox(row)
        # Scale bounding boxes
        scale_bboxes = scale_bbox(row, bboxes)
        # Format for YOLOv5
        yolo_bboxes = get_yolo_format_bbox(IMG_SIZE, scale_bboxes)
        
        with open(file_name, 'w') as f:
            for bbox in yolo_bboxes:
                bbox = [0]+bbox
                bbox = [str(i) for i in bbox]
                bbox = ' '.join(bbox)
                f.write(bbox)
                f.write('\n')

### 模型训练

In [None]:
# Create .yaml file 
import yaml

data_yaml = dict(
    train = '/kaggle/covid/images/train',
    val = '/kaggle/covid/images/valid',
    nc = 1,
    names = ['opacity']
)

# Note that I am creating the file in the yolov5/data/ directory.
with open('/kaggle/working/yolov5/data/data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)

%cat /kaggle/working/yolov5/data/data.yaml

In [None]:
hyp_yaml = dict(
lr0= 0.01,  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf= 0.02 , # final OneCycleLR learning rate (lr0 * lrf)
momentum= 0.937 , # SGD momentum/Adam beta1
weight_decay= 0.0005 , # optimizer weight decay 5e-4
warmup_epochs= 3.0 , # warmup epochs (fractions ok)
warmup_momentum= 0.8 , # warmup initial momentum
warmup_bias_lr= 0.1 , # warmup initial bias lr
box= 0.1 , # box loss gain
cls= 0.0 , # cls loss gain
cls_pw= 0.0 , # cls BCELoss positive_weight
obj= 3.0 , # obj loss gain (scale with pixels)
obj_pw= 1.5 , # obj BCELoss positive_weight
iou_t= 0.0 , # IoU training threshold
anchor_t= 4.0 , # anchor-multiple threshold
anchors= 0 , # anchors per output layer (0 to ignore)
fl_gamma= 0.0 , # focal loss gamma (efficientDet default gamma=1.5)
hsv_h= 0.015 , # image HSV-Hue augmentation (fraction)
hsv_s= 0.7 , # image HSV-Saturation augmentation (fraction)
hsv_v= 0.4 , # image HSV-Value augmentation (fraction)
degrees= 0.0 , # image rotation (+/- deg)
translate= 0.2 , # image translation (+/- fraction)
scale= 0.5 , # image scale (+/- gain)
shear= 0.0 , # image shear (+/- deg)
perspective= 0.0 , # image perspective (+/- fraction), range 0-0.001
flipud= 0.0 , # image flip up-down (probability)
fliplr= 0.5 , # image flip left-right (probability)
mosaic= 1.0 , # image mosaic (probability)
mixup= 0.0 , # image mixup (probability)
copy_paste= 0.0  # segment copy-paste (probability) 
)

with open('/kaggle/working/yolov5/data/hyps/hyp.scratch.yaml', 'w') as hypfile:
    yaml.dump(hyp_yaml,  hypfile, default_flow_style=True)

%cat /kaggle/working/yolov5/data/hyps/hyp.scratch.yaml

In [None]:
BATCH_SIZE = 32
EPOCHS =30

In [None]:
%cd /kaggle/working/yolov5

In [None]:
!python train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data data.yaml \
                 --cfg yolov5m.yaml \
                 --weights /kaggle/input/image-detection-model-1/yolov5/kaggle-siim-covid/exp/weights/best.pt \
                 --save_period 1\
                 --hyp /kaggle/working/yolov5/data/hyps/hyp.scratch.yaml\
                 --project kaggle-siim-covid