# Version note

# Setup for Notebook

## Import libaries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm.notebook import tqdm
import seaborn as sns
import os
from sklearn.model_selection import GroupKFold
import cv2
import PIL.Image as Image

print('Setup Completed')

## Global value

In [None]:
iou_thr = 0.5
skip_box_thr = 0.03
sigma = 0.1

iou_thr_large = 0.4
skip_box_thr_large = 0.03
sigma = 0.1

class_large = [0, 1, 3, 4, 12]

## Global function

In [None]:
def Preprocessing(input_path, output_path):
    #Example image
    #Read image
    img = cv2.imread(input_path)
    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    #Histogram Equlization
    # create a CLAHE object
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl1 = clahe.apply(gray_image)
    img_f = cv2.cvtColor(cl1, cv2.COLOR_GRAY2BGR)
    
    #Normalization
    norm_img = np.zeros((800,800))
    n_img = cv2.normalize(img_f,  norm_img, 0, 255, cv2.NORM_MINMAX)
    
    final_image = Image.fromarray(n_img)
    final_image.save(output_path)

# Import Data

In [None]:
#Define folder path (Custom)
TEST_DIR = '/kaggle/input/test640/test640/test_image.csv'
#Read csv
test_df = pd.read_csv(TEST_DIR)
display(test_df.head())
image_ids = test_df.image_id.unique()
print(f'Number of tets image: {len(image_ids)} ')

In [None]:
#Image path
input_path = '/kaggle/input/test640/test640/Test'
output_path = '/kaggle/working/testimage'
os.makedirs('/kaggle/working/testimage', exist_ok = True)

## Preprocessing

In [None]:
for idx in tqdm(image_ids, total = len(image_ids)):
    origin_image_path = os.path.join(input_path,idx+'.jpg')
    output_image_path = os.path.join(output_path, idx +".jpg")
    Preprocessing(origin_image_path, output_image_path)

In [None]:
TRAIN_DIR = '/kaggle/input/vinbigdata-640pixel/train_vin.csv'
#Custom dataset
train_df = pd.read_csv(TRAIN_DIR)
#test_df = pd.read_csv(TEST_DIR)
display(train_df.head())
print(train_df.shape)

In [None]:

width = {}
height = {}
for indx in tqdm(image_ids, total=len(image_ids)):
    width.update({indx:train_df[train_df.image_id == indx].width.unique()[0]})
    height.update({indx:train_df[train_df.image_id == indx].height.unique()[0]})

In [None]:
#ADD width and height of image from train_df to Final_df
test_df['width'] = test_df.apply(lambda row: width[row.image_id], axis =1)
test_df['height'] = test_df.apply(lambda row: height[row.image_id], axis =1)

In [None]:
display(test_df.head())
image_ids = test_df.image_id.unique()
print(f'Number of tets image: {len(image_ids)} ')

# Yolov5 Predict

In [None]:
#cloning yolov5 model
!git clone https://github.com/ultralytics/yolov5

#cloning NVIDIA/apex to speed up the process
!git clone https://github.com/NVIDIA/apex.git

In [None]:
import torch
from IPython.display import Image, clear_output  # to display images
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!mv yolov5/* ./
!pip install -r requirements.txt

In [None]:
!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source /kaggle/working/data/images/zidane.jpg

In [None]:
Image(filename='/kaggle/working/runs/detect/exp/zidane.jpg', width=600)

In [None]:
!pip install ensemble-boxes
from ensemble_boxes import *

In [None]:
from glob import glob
image_id_list = []
label_id_list = []
conf_id_list = []
x_id_list = []
y_id_list = []
w_id_list = []
h_id_list = []
fold_list = []

### Fold 0

In [None]:
!python detect.py --weights /kaggle/input/weight/best_fold0.pt --img 640 --conf 0.01 --iou 0.5 --source /kaggle/working/testimage --save-txt --save-conf --augment

In [None]:

for file_path in tqdm(glob('runs/detect/exp2/labels/*txt')):
    image_id = file_path.split('/')[-1].split('.')[0]
    w = test_df[test_df.image_id == image_id].width.values[0]
    h = test_df[test_df.image_id == image_id].height.values[0]
    f = open(file_path, 'r')
    data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
    data = data[:, [0, 5, 1, 2, 3, 4]]
    for i in range(len(data)):
        image_id_list.append(image_id)
        label_id_list.append(data[i,0])
        conf_id_list.append(data[i,1])
        x_id_list.append(data[i,2])
        y_id_list.append(data[i,3])
        w_id_list.append(data[i,4])
        h_id_list.append(data[i,5])
        fold_list.append(0)



### Fold 1

In [None]:
!python detect.py --weights /kaggle/input/weight/best_fold1.pt --img 640 --conf 0.01 --iou 0.5 --source /kaggle/working/testimage --save-txt --save-conf --augment

In [None]:
for file_path in tqdm(glob('runs/detect/exp3/labels/*txt')):
    image_id = file_path.split('/')[-1].split('.')[0]
    w = test_df[test_df.image_id == image_id].width.values[0]
    h = test_df[test_df.image_id == image_id].height.values[0]
    f = open(file_path, 'r')
    data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
    data = data[:, [0, 5, 1, 2, 3, 4]]
    for i in range(len(data)):
        image_id_list.append(image_id)
        label_id_list.append(data[i,0])
        conf_id_list.append(data[i,1])
        x_id_list.append(data[i,2])
        y_id_list.append(data[i,3])
        w_id_list.append(data[i,4])
        h_id_list.append(data[i,5])
        fold_list.append(1)
        


### Fold 2

In [None]:
!python detect.py --weights /kaggle/input/weight/best_fold2.pt --img 640 --conf 0.01 --iou 0.5 --source /kaggle/working/testimage --save-txt --save-conf --augment

In [None]:
for file_path in tqdm(glob('runs/detect/exp4/labels/*txt')):
    image_id = file_path.split('/')[-1].split('.')[0]
    w = test_df[test_df.image_id == image_id].width.values[0]
    h = test_df[test_df.image_id == image_id].height.values[0]
    f = open(file_path, 'r')
    data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
    data = data[:, [0, 5, 1, 2, 3, 4]]
    for i in range(len(data)):
        image_id_list.append(image_id)
        label_id_list.append(data[i,0])
        conf_id_list.append(data[i,1])
        x_id_list.append(data[i,2])
        y_id_list.append(data[i,3])
        w_id_list.append(data[i,4])
        h_id_list.append(data[i,5])
        fold_list.append(2)

### Fold 3

In [None]:
!python detect.py --weights /kaggle/input/weight/best_fold3.pt --img 640 --conf 0.01 --iou 0.5 --source /kaggle/working/testimage --save-txt --save-conf --augment

In [None]:
for file_path in tqdm(glob('runs/detect/exp5/labels/*txt')):
    image_id = file_path.split('/')[-1].split('.')[0]
    w = test_df[test_df.image_id == image_id].width.values[0]
    h = test_df[test_df.image_id == image_id].height.values[0]
    f = open(file_path, 'r')
    data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
    data = data[:, [0, 5, 1, 2, 3, 4]]
    for i in range(len(data)):
        image_id_list.append(image_id)
        label_id_list.append(data[i,0])
        conf_id_list.append(data[i,1])
        x_id_list.append(data[i,2])
        y_id_list.append(data[i,3])
        w_id_list.append(data[i,4])
        h_id_list.append(data[i,5])
        fold_list.append(3)

### Fold 4

In [None]:
!python detect.py --weights /kaggle/input/weight/best_fold4.pt --img 640 --conf 0.01 --iou 0.5 --source /kaggle/working/testimage --save-txt --save-conf --augment

In [None]:
for file_path in tqdm(glob('runs/detect/exp6/labels/*txt')):
    image_id = file_path.split('/')[-1].split('.')[0]
    w = test_df[test_df.image_id == image_id].width.values[0]
    h = test_df[test_df.image_id == image_id].height.values[0]
    f = open(file_path, 'r')
    data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
    data = data[:, [0, 5, 1, 2, 3, 4]]
    for i in range(len(data)):
        image_id_list.append(image_id)
        label_id_list.append(data[i,0])
        conf_id_list.append(data[i,1])
        x_id_list.append(data[i,2])
        y_id_list.append(data[i,3])
        w_id_list.append(data[i,4])
        h_id_list.append(data[i,5])
        fold_list.append(4)

In [None]:
columns =['image_id', 'label', 'conf', 'x_mid', 'y_mid', 'w', 'h','fold']
submit_df = pd.DataFrame(list(zip(image_id_list, label_id_list,conf_id_list,x_id_list,y_id_list,w_id_list,h_id_list,fold_list)),
               columns =columns)
display(submit_df.tail())
print(submit_df.shape)
csv_path = os.path.join('/kaggle/working','result_vin'+'.csv')
submit_df.to_csv(csv_path,index = False)

In [None]:
print('Number image of Fold 0: ')
print(len(submit_df[submit_df['fold']==1].image_id.unique()))
print('Number image of Fold 1: ')
print(len(submit_df[submit_df['fold']==1].image_id.unique()))
print('Number image of Fold 2: ')
print(len(submit_df[submit_df['fold']==2].image_id.unique()))
print('Number image of Fold 3: ')
print(len(submit_df[submit_df['fold']==3].image_id.unique()))
print('Number image of Fold 4: ')
print(len(submit_df[submit_df['fold']==4].image_id.unique()))

In [None]:
results = []
for image_id in tqdm(image_ids, total=len(image_ids)):
    data = submit_df[submit_df["image_id"] == image_id]
    data = data.reset_index(drop=True)
    annotations = {}
    weights = []
    for idx, row in data.iterrows():
        fold_id = row['fold']
        if fold_id not in annotations:
            annotations[fold_id] = {
                "boxes_list": [],
                "scores_list": [],
                "labels_list": [],
                "boxes_list_large": [],
                "scores_list_large": [],
                "labels_list_large": [],
            }
            weights.append(1.0)
        x_mid = row['x_mid']
        y_mid = row['y_mid']
        w = row['w']
        h = row['h']
        x1 = x_mid - w/2
        y1 = y_mid - h/2
        x2 = x_mid + w/2
        y2 = y_mid + h/2
        if row["label"] in class_large:
            annotations[fold_id]["boxes_list_large"].append([x1, y1, x2, y2])
            annotations[fold_id]["scores_list_large"].append(row['conf'])
            annotations[fold_id]["labels_list_large"].append(row["label"])
        else:
            annotations[fold_id]["boxes_list"].append([x1, y1, x2, y2])
            annotations[fold_id]["scores_list"].append(row['conf'])
            annotations[fold_id]["labels_list"].append(row["label"])
    boxes_list = []
    scores_list = []
    labels_list = []
    boxes_list_large = []
    scores_list_large = []
    labels_list_large = []
    for annotator in annotations.keys():
        boxes_list.append(annotations[annotator]["boxes_list"])
        scores_list.append(annotations[annotator]["scores_list"])
        labels_list.append(annotations[annotator]["labels_list"])
        boxes_list_large.append(annotations[annotator]["boxes_list_large"])
        scores_list_large.append(annotations[annotator]["scores_list_large"])
        labels_list_large.append(annotations[annotator]["labels_list_large"])
    boxes, scores, labels = weighted_boxes_fusion(
        boxes_list,
        scores_list,
        labels_list,
        weights=weights,
        iou_thr=iou_thr,
        skip_box_thr=skip_box_thr
    )
    boxes_large, scores_large, labels_large = weighted_boxes_fusion(
        boxes_list_large,
        scores_list_large,
        labels_list_large,
        weights=weights,
        iou_thr=iou_thr_large,
        skip_box_thr=skip_box_thr_large
    )
    for idx, box in enumerate(boxes):
        results.append({
            "image_id": image_id,
            "class_id": int(labels[idx]),
            "conf": scores[idx],
            "x_min": box[0],
            "y_min": box[1],
            "x_max": box[2],
            "y_max": box[3],
        })
    for idx, box in enumerate(boxes_large):
        results.append({
            "image_id": image_id,
            "class_id": int(labels_large[idx]),
            "conf": scores_large[idx],
            "x_min": box[0],
            "y_min": box[1],
            "x_max": box[2],
            "y_max": box[3],
        })
Final_df = pd.DataFrame(results)
display(Final_df.head())
print(f'Size of Final_df Dataframe: {Final_df.shape}')

In [None]:
k = submit_df[submit_df["image_id"] == '002a34c58c5b758217ed1f584ccbcfe9']
display(k)
p = Final_df[Final_df["image_id"] == '002a34c58c5b758217ed1f584ccbcfe9']
display(p)
print(len(Final_df.class_id.unique()))
print(len(Final_df.image_id.unique()))

In [None]:
csv_path = os.path.join('/kaggle/working','result_vin'+'.csv')
Final_df.to_csv(csv_path,index = False)