# Main

In [1]:
import pandas as pd, numpy as np
import sys,os,shutil,gc,re,json,glob,math,time,random,warnings
from tqdm import tqdm
from sklearn.model_selection import StratifiedGroupKFold
import sklearn.metrics as skm
import torch
import cv2
import yaml
import albumentations as A
from ultralytics.data.build import YOLODataset
import ultralytics.data.build as build
from ultralytics import YOLO
from ensemble_boxes import *

N_SPLITS = 5
RANDOM_STATE = 41
FOLD=0

def fix_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False

fix_seed(RANDOM_STATE)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  check_for_updates()


In [2]:
DIR_DATA = 'data'
df_test = pd.read_csv(f'{DIR_DATA}/Test.csv')
df_test['path'] = f'{DIR_DATA}/images/'+df_test.Image_ID
df_test.shape

(2101, 8)

In [3]:
assert len(df_test) == df_test.Image_ID.nunique()

In [4]:
IMGSZS =  [640,768,800,896,960,1024,1280,1344,1440,1536,1600,1760,1856,1920]
len(IMGSZS)

14

In [5]:
class2id = {'Corn_Cercospora_Leaf_Spot': 0, 'Corn_Common_Rust': 1, 'Corn_Healthy': 2, 'Corn_Northern_Leaf_Blight': 3, 'Corn_Streak': 4, 'Pepper_Bacterial_Spot': 5, 'Pepper_Cercospora': 6, 'Pepper_Early_Blight': 7, 'Pepper_Fusarium': 8, 'Pepper_Healthy': 9, 'Pepper_Late_Blight': 10, 'Pepper_Leaf_Blight': 11, 'Pepper_Leaf_Curl': 12, 'Pepper_Leaf_Mosaic': 13, 'Pepper_Septoria': 14, 'Tomato_Bacterial_Spot': 15, 'Tomato_Early_Blight': 16, 'Tomato_Fusarium': 17, 'Tomato_Healthy': 18, 'Tomato_Late_Blight': 19, 'Tomato_Leaf_Curl': 20, 'Tomato_Mosaic': 21, 'Tomato_Septoria': 22}
id2class = {v:k for k,v in class2id.items()}
print(id2class)

{0: 'Corn_Cercospora_Leaf_Spot', 1: 'Corn_Common_Rust', 2: 'Corn_Healthy', 3: 'Corn_Northern_Leaf_Blight', 4: 'Corn_Streak', 5: 'Pepper_Bacterial_Spot', 6: 'Pepper_Cercospora', 7: 'Pepper_Early_Blight', 8: 'Pepper_Fusarium', 9: 'Pepper_Healthy', 10: 'Pepper_Late_Blight', 11: 'Pepper_Leaf_Blight', 12: 'Pepper_Leaf_Curl', 13: 'Pepper_Leaf_Mosaic', 14: 'Pepper_Septoria', 15: 'Tomato_Bacterial_Spot', 16: 'Tomato_Early_Blight', 17: 'Tomato_Fusarium', 18: 'Tomato_Healthy', 19: 'Tomato_Late_Blight', 20: 'Tomato_Leaf_Curl', 21: 'Tomato_Mosaic', 22: 'Tomato_Septoria'}


In [6]:
%%time
from collections import defaultdict
FOLD = 0
model = YOLO(f'runs/detect/train_{FOLD}/weights/best.pt',task='detect')
model.eval();
model.training=False

all_data = defaultdict(list)

minconf = 0.0
for _,row in tqdm(df_test.iterrows(),total=len(df_test)):
    img = cv2.imread(row.path)
    h,w,c = img.shape

    for imgsz in IMGSZS:
        results = model(img,imgsz=imgsz, verbose=False,conf=minconf,augment=True,iou=0.4,max_det=500)[0]

        boxes = results.boxes.xyxy.cpu().numpy()
        classes = results.boxes.cls.cpu().numpy()
        confidences = results.boxes.conf.cpu().numpy()
        ixs = confidences>=minconf
        boxes = boxes[ixs]
        classes = classes[ixs]
        confidences = confidences[ixs]
        if len(boxes)==0:
            # If no detections, add default class
            print('No detection', row.Image_ID)
            all_data[imgsz].append({
                'Image_ID': row.Image_ID,
                'class': 'Corn_Healthy',
                'confidence': 0.5,
                'ymin': 0,
                'xmin': 0,
                'ymax': 0,
                'xmax': 0
            })
        else:
            # Iterate through the results for this image
            for box, cls, conf in zip(boxes, classes, confidences):
                x1, y1, x2, y2 = box
                detected_class = id2class[int(cls)]
                # Add the result to the all_data list
                all_data[imgsz].append({
                    'Image_ID': row.Image_ID,
                    'class': detected_class,
                    'confidence': conf,
                    'ymin': y1,
                    'xmin': x1,
                    'ymax': y2,
                    'xmax': x2
                })


100%|██████████| 2101/2101 [1:16:41<00:00,  2.19s/it]

CPU times: user 52min 52s, sys: 23min 10s, total: 1h 16min 2s
Wall time: 1h 16min 41s





In [7]:
!mkdir -p test_preds
for imgsz in IMGSZS:
  preds = pd.DataFrame(all_data[imgsz])
  preds.to_csv(f'test_preds/preds_{FOLD}_{imgsz}.csv',index=False)
  print(imgsz,preds.shape)


640 (1050500, 7)
768 (1050500, 7)
800 (1050500, 7)
896 (1050500, 7)
960 (1050500, 7)
1024 (1050500, 7)
1280 (1050500, 7)
1344 (1050500, 7)
1440 (1050500, 7)
1536 (1050500, 7)
1600 (1050500, 7)
1760 (1050500, 7)
1856 (1050500, 7)
1920 (1050500, 7)


In [8]:
def merge_bboxes(dfs,iou_thr,skip_box_thr,df_meta=None):
    wb_boxes = []; wb_scores = []; wb_labels = []
    df_res_wbf = []
    image_ids = pd.concat(dfs).Image_ID.unique().tolist()
    weights = None

    cols = ['xmin','ymin','xmax','ymax']

    for Image_ID in tqdm(image_ids):
      boxes_list = []; scores_list = []; labels_list = []
      h,w,_ = cv2.imread(f'{DIR_DATA}/images/{Image_ID}').shape
      for ix_res, df_res in enumerate(dfs):
          d = df_res[df_res.Image_ID==Image_ID].copy()

          boxes = d[cols].values
          scores = d.confidence.tolist()
          labels = d['class'].map(class2id).values.tolist()

          boxes_ = []
          scores_ = []
          labels_ = []
          for i,box in enumerate(boxes):
            if box[2]> box[0] and box[3]>box[1]:
              boxes_.append(box)
              scores_.append(scores[i])
              labels_.append(labels[i])

          boxes = boxes_
          scores = scores_
          labels = labels_

          boxes = [[x[0]/w,x[1]/h,x[2]/w,x[3]/h] for x in boxes]

          boxes_list.append(boxes)
          scores_list.append(scores)
          labels_list.append(labels)

      if len(boxes_list)>0:
        boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=[1 for i in range(len(boxes_list))], iou_thr=iou_thr, skip_box_thr=skip_box_thr)

      else:
        boxes = []

      boxes = [[x[0]*w,x[1]*h,x[2]*w,x[3]*h] for x in boxes]
      ws = [b[2]-b[0] for b in boxes]
      hs = [b[3]-b[1] for b in boxes]

      if len(boxes)==0:
        d_res = pd.DataFrame([{ 'Image_ID': Image_ID, 'class': 'Corn_Healthy', 'confidence': 0.5, 'ymin': 0, 'xmin': 0, 'ymax': 0, 'xmax': 0 }])
      else:
        xmin = np.array(boxes)[:,0]
        ymin = np.array(boxes)[:,1]
        xmax = np.array(boxes)[:,2]
        ymax = np.array(boxes)[:,3]

        d_res = pd.DataFrame(dict(Image_ID=Image_ID,confidence=scores,ymin=ymin,xmin=xmin,ymax=ymax,xmax=xmax))
        d_res['class'] =  [id2class[l] for l in labels ]

      df_res_wbf.append(d_res)


    df_res_wbf = pd.concat(df_res_wbf)
    return df_res_wbf



In [9]:
dfs = [pd.read_csv(f'test_preds/preds_0_{imgsz}.csv') for imgsz in IMGSZS]
print(len(dfs))

iou_thr = 0.5
df_res_wbf = merge_bboxes(dfs,iou_thr = iou_thr, skip_box_thr=0.001)
df_res_wbf.to_csv('submission.csv',index=False)
print(iou_thr)

14


100%|██████████| 2101/2101 [53:30<00:00,  1.53s/it]


0.5


In [10]:
df_res_wbf.describe()

Unnamed: 0,confidence,ymin,xmin,ymax,xmax
count,669659.0,669659.0,669659.0,669659.0,669659.0
mean,0.018402,1151.838746,1709.500306,1329.656518,1919.912918
std,0.081542,755.638911,1100.768919,772.254385,1129.585117
min,7.1e-05,0.0,0.0,0.000941,0.001427
25%,0.000194,574.399556,849.475908,734.742393,1005.547028
50%,0.000664,1043.595469,1595.572321,1236.948392,1818.708143
75%,0.003258,1644.293977,2479.476892,1832.133145,2731.222186
max,0.964719,4478.825,6686.7036,4480.000534,6720.000801
