In [None]:
# Download YOLOv5下載yolo
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5

# Install dependencies
%pip install -qr requirements.txt  

# change directory
%cd ../
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

In [None]:

# Install W&B 
!pip install -q --upgrade wandb

# Login 
import wandb

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient() 



!wandb login 2475fd7c9b2de3f6cc173a97474a8ab98233627c


In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import os
import gc
import cv2

from tqdm import tqdm
import shutil
from shutil import copyfile
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


from IPython.core.magic import register_line_cell_magic

from os import listdir
from os.path import isfile, join
from glob import glob
import yaml

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))

In [None]:
#超參數
TRAIN_PATH = '/kaggle/input/siim-covid19-resized-384512-and-640px/SIIM-COVID19-Resized/img_sz_512/train/'
IMG_SIZE = 512
BATCH_SIZE = 16
EPOCHS = 10

In [None]:
# Load image level csv file載入檔案
df = pd.read_csv('../input/siimcovid19-detection-training-label/train_image_df.csv')
# Add absolute path
df['path'] = df.apply(lambda row: TRAIN_PATH+row.id+'.jpg', axis=1)
df.head(3)

In [None]:
# remove negative class=2刪掉陰性，陰性integer_label=2
df = df[df.integer_label!=2].reset_index(drop = True)
#把typical從3改成2
df["integer_label"][df.integer_label==3]=2
df.head(3)
#print出類別
class_ids, class_names = list(zip(*set(zip(df.integer_label, df.y_label))))
classes = list(np.array(class_names)[np.argsort(class_ids)])
classes = list(map(lambda x: str(x), classes))
classes

In [None]:
# 分訓練集和驗證集.
df,final_df=train_test_split(df, test_size=0.2, random_state=42, stratify=df.integer_label.values)
train_df, valid_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df.integer_label.values)

final_df.loc[:, 'split'] = 'final'
train_df.loc[:, 'split'] = 'train'
valid_df.loc[:, 'split'] = 'valid'

df = pd.concat([train_df, valid_df]).reset_index(drop=True)
final_df.reset_index(drop=True)

In [None]:
#顯示出資料數量
print(f'Size of final: {len(final_df)}')
print(f'Size of dataset: {len(df)}, training images: {len(train_df)}. validation images: {len(valid_df)}')

In [None]:
final_df=final_df.reset_index(drop=True)
final_df.head()

In [None]:
os.makedirs('/kaggle/working/CorrectRate', exist_ok=True)
for i in tqdm(range(len(final_df))):
    row = df.loc[i]
    copyfile(row.path, f'/kaggle/working/CorrectRate/{row.id}.jpg')

In [None]:
#建構檔案結構
'''
/Kaggle/working
    /Covid19
         /images
             /train/img0.jpg
             /val
         /labels
             /train/img0.txt
             /val
    /yolov5
'''
os.makedirs('covid19/images/train', exist_ok=True)
os.makedirs('covid19/images/valid', exist_ok=True)
os.makedirs('covid19/labels/train', exist_ok=True)
os.makedirs('covid19/labels/valid', exist_ok=True)

In [None]:
# Move the images to relevant split folder.搬圖片到資料夾下

for i in tqdm(range(len(df))):
    row = df.loc[i]
    if row.split == 'train':
        copyfile(row.path, f'covid19/images/train/{row.id}.jpg')
    else:
        copyfile(row.path, f'covid19/images/valid/{row.id}.jpg')


In [None]:
#建構設定檔
with open('/kaggle/working/covid19/train.txt', 'w') as f:
    for path in glob('/kaggle/working/covid19/images/train/*'):
        f.write(path+'\n')
            
with open('/kaggle/working/covid19/val.txt', 'w') as f:
    for path in glob('/kaggle/working/covid19/images/val/*'):
        f.write(path+'\n')
        
data = dict(
    train = '/kaggle/working/covid19/images/train',
    val = '/kaggle/working/covid19/images/valid',
    
    nc    = 3, # number of classes
    names = classes # classes
    )

with open('/kaggle/working/yolov5/data/data.yaml', 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)
    
f = open('/kaggle/working/yolov5/data/data.yaml', 'r') #第二個r意思是這個檔案只讀取
print('\nyaml:')
print(f.read())

In [None]:
!ls '/kaggle/working/yolov5/data'

In [None]:
# 從檔案定義原始bbox的位置.
def get_bbox(row):
    bboxes = []
    bbox = []
    b1=row.x_min
    b2=row.y_min
    b3=row.x_max 
    b4=row.y_max 
    bbox.append(float(b1))
    bbox.append(float(b2))
    bbox.append(float(b3))
    bbox.append(float(b4))

    bboxes.append(bbox)
    
            
    return bboxes

# 重新定位在512px的bbox位置
def scale_bbox(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.width
    scale_y = IMG_SIZE/row.height
    
    scaled_bboxes = []
    for bbox in bboxes:
        x = float(bbox[0]*scale_x)
        y = float(bbox[1]*scale_y)
        x1 = float(bbox[2]*(scale_x))
        y1= float(bbox[3]*scale_y)

        scaled_bboxes.append([x, y, x1, y1]) # xmin, ymin, xmax, ymax
        
    return scaled_bboxes

# 將bbox轉成yolo的格式.
def get_yolo_format_bbox(img_w, img_h, bboxes):
    yolo_boxes = []
    for bbox in bboxes:
        w = bbox[2] - bbox[0] # xmax - xmin
        h = bbox[3] - bbox[1] # ymax - ymin
        xc = bbox[0] + float(w/2) # xmin + width/2
        yc = bbox[1] + float(h/2) # ymin + height/2
        
        yolo_boxes.append([xc/img_w, yc/img_h, w/img_w, h/img_h]) # x_center y_center width height
    
    return yolo_boxes

In [None]:
dfu=df
dfuu=dfu.drop_duplicates(subset=['id'])

In [None]:
#將bbox寫到成txt等等訓練要用的
for i in tqdm(dfuu.index):
    row = df.loc[i]
    # Get image id
    img_id = row.id
    # Get split
    split = row.split
    # Get image-level label
    label = row.integer_label   
    if row.split=='train':
        file_name = f'covid19/labels/train/{row.id}.txt'
    else:
        file_name = f'covid19/labels/valid/{row.id}.txt'

        
   # print(row)
    #len(df[df.id==row.id])
    ln=df[df.id==row.id].shape[0]
    with open(file_name, 'w') as f:
        for j in range(ln):
            row1 = df.loc[j]
            bboxes = get_bbox(row1)
            # Scale bounding boxes
            scale_bboxes = scale_bbox(row1, bboxes)
            # Format for YOLOv5
            yolo_bboxes = get_yolo_format_bbox(IMG_SIZE, IMG_SIZE, scale_bboxes)
            for bbox in yolo_bboxes:
                bbox = [label]+bbox
                bbox = [str(i) for i in bbox]
                bbox = ' '.join(bbox)
                f.write(bbox)
                f.write('\n')

In [None]:
%cd yolov5/

In [None]:

!python train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data data.yaml \
                 --weights yolov5s.pt \
                 --project kaggle-siim-covid19 \
                 --cache


In [None]:
%cd "../"
path = "covid19"
shutil.rmtree(path) #移除資料夾

In [None]:
!ls '/kaggle/working/yolov5/kaggle-siim-covid19/exp'

In [None]:
#比賽測試集TEST_PATH = '/kaggle/input/siim-covid19-resized-384512-and-640px/SIIM-COVID19-Resized/img_sz_512/test/' 
#正確率測試
TEST_PATH="/kaggle/working/CorrectRate/"
weights_dir = 'kaggle-siim-covid19/exp/weights/best.pt'

In [None]:
%cd 'yolov5'

In [None]:
os.makedirs('/kaggle/working/yolov5/kaggle-siim-covid19/exp/weights', exist_ok=True)

In [None]:

import shutil
src=r"/kaggle/input/yolov5-3class/best.pt"
#src="/kaggle/input/correct-rate/best1.pt"
des=r'/kaggle/working/yolov5/kaggle-siim-covid19/exp/weights/best.pt'
shutil.copy(src,des)

In [None]:
!python detect.py --weights {weights_dir} \
                  --source {TEST_PATH} \
                  --img {IMG_SIZE} \
                  --conf 0.28 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf \
                  --exist-ok

In [None]:
#integer_label 0=atypical 1=indeterminate 2=typical
!cat runs/detect/exp/labels/ffcc16bbf428.txt

In [None]:
# Read the submisison file
sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
sub_df.tail()

In [None]:
final_df.head()

In [None]:
# The submisison requires xmin, ymin, xmax, ymax format. 
# YOLOv5 returns x_center, y_center, width, height
def correct_bbox_format(bboxes):
    correct_bboxes = []
    for b in bboxes:
        xc, yc = int(np.round(b[0]*IMG_SIZE)), int(np.round(b[1]*IMG_SIZE))
        w, h = int(np.round(b[2]*IMG_SIZE)), int(np.round(b[3]*IMG_SIZE))

        xmin = xc - int(np.round(w/2))
        xmax = xc + int(np.round(w/2))
        ymin = yc - int(np.round(h/2))
        ymax = yc + int(np.round(h/2))
        
        correct_bboxes.append([xmin, xmax, ymin, ymax])
        
    return correct_bboxes

# Read the txt file generated by YOLOv5 during inference and extract 
# confidence and bounding box coordinates.
def get_conf_bboxes(file_path):
    confidence = []
    bboxes = []
    with open(file_path, 'r') as file:
        for line in file:
            preds = line.strip('\n').split(' ')
            preds = list(map(float, preds))
            confidence.append(preds[-1])
            bboxes.append(preds[1:-1])
    return confidence, bboxes
def get_class(file_path):
    classs = []
    bboxes = []
    with open(file_path, 'r') as file:
        for line in file:
            preds = line.strip('\n').split(' ')
            preds = list(map(float, preds))
            classs.append(preds[0])
    return int(classs[0])

In [None]:
print(get_class("runs/detect/exp/labels/ffcc16bbf428.txt"))

In [None]:
# Prediction loop for submission
predictions = []
PRED_PATH="runs/detect/exp/labels"
prediction_files=os.listdir(PRED_PATH)
count=0

for i in tqdm(range(len(final_df))):
    row = final_df.loc[i]
    id_name = row.id
    
    if f'{id_name}.txt' in prediction_files:
        category = get_class(f'{PRED_PATH}/{id_name}.txt')
        if category==final_df.integer_label[i]:
            count+=1
print(count)