## This notebook is quick save version, so if you want see the results, please see ver 9.


# References

1. https://www.kaggle.com/trungthanhnguyen0502/eda-vinbigdata-chest-x-ray-abnormalities
2. https://www.kaggle.com/bhallaakshit/dicom-wrangling-and-enhancement
3. https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-train
4. https://www.kaggle.com/kuuuuub/x-ray-image-enhancement-test/data

Thanks for above great works!



# What to do?

1. Introducing another way to enhance chest x-ray image (BCET) 
2. After training the images (Original, Clahe, Bcet, Bcet+Clahe) with Yolo v5 model, inference the result
3. GT label vs prediction
4. Compare with mAP score



# Balance Contrast Enhancement Technique(BCET)

Paper : LIU JIAN GUO (1991) Balance contrast enhancement technique and its application in image colour composition, International Journal of Remote Sensing, 12:10, 2133-2151, DOI: 10.1080/01431169108955241

Matlab implemented code : https://www.imageeprocessing.com/2017/11/balance-contrast-enhancement-technique.html

The above code was modified and used.

In [None]:
import numpy as np
import pandas as pd 
import os
import shutil
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
import matplotlib.pyplot as plt
from skimage import exposure
from glob import glob

from scipy.io import wavfile
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import GroupKFold

In [None]:
dataset_dir = '../input/vinbigdata-chest-xray-abnormalities-detection'

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data
        
    
def plot_img(img, size=(7, 7), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()


def plot_imgs(imgs, cols=4, size=7, is_rgb=True, title="", cmap='gray', img_size=(1000,1000)):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    for i, img in enumerate(imgs):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()

In [None]:
def clahe(image, clipLimit = 2., tileGridSize = (10,10)):
    clahe = cv2.createCLAHE(
        clipLimit, 
        tileGridSize
    )
    
    image = clahe.apply(image) 
    #image = tf.expand_dims(image, axis = 2)
    
    return image

In [None]:
def bcet(img):
    Lmin = np.min(img) # MINIMUM OF INPUT IMAGE
    Lmax = np.max(img) # MAXIMUM OF INPUT IMAGE
    Lmean = np.mean(img) #MEAN OF INPUT IMAGE
    LMssum = np.mean(img * img) #MEAN SQUARE SUM OF INPUT IMAGE

    Gmin = 0 #MINIMUM OF OUTPUT IMAGE
    Gmax = 255 #MAXIMUM OF OUTPUT IMAGE
    Gmean = 110 #MEAN OF OUTPUT IMAGE

    bnum = Lmax * Lmax *(Gmean-Gmin) - LMssum*(Gmax-Gmin) + Lmin * Lmin *(Gmax-Gmean)
    bden = 2*(Lmax*(Gmean-Gmin)-Lmean*(Gmax-Gmin)+Lmin*(Gmax-Gmean))

    b = bnum/bden

    a = (Gmax-Gmin)/((Lmax-Lmin)*(Lmax+Lmin-2*b))

    c = Gmin - a*(Lmin-b) * (Lmin-b)

    y = a*(img-b) * (img-b) +c #PARABOLIC FUNCTION
    y = np.array(y, dtype=np.uint8)

    return y

# Image comparison when seen with the eyes

In [None]:
dicom_paths = glob(f'{dataset_dir}/train/*.dicom')
imgs = [dicom2array(path) for path in dicom_paths[:4]]
plot_imgs(imgs)
plt.show()
clahe_img = [clahe(img) for img in imgs]
plot_imgs(clahe_img)
plt.show()
bcet_img = [bcet(img) for img in imgs]
plot_imgs(bcet_img)
plt.show()


1st row : original images \
2nd row : clahe images \
3rd row : bcet images

Clahe highlights the bones and blood vessels \
Bcet emphasizes the information on the volume of the lungs.

# Histogram analysis

In [None]:
hist_ori = cv2.calcHist(imgs[0],[0],None,[256],[0,256])
hist_clahe = cv2.calcHist(clahe_img[0],[0],None,[256],[0,256])
hist_bcet = cv2.calcHist(bcet_img[0],[0],None,[256],[0,256])


plt.plot(hist_ori,color='r', label='original'), plt.legend()
plt.plot(hist_clahe,color='g', label = 'clahe'), plt.legend()
plt.plot(hist_bcet,color='b', label = 'bcet'), plt.legend()


Bcet emphasizes black area

In [None]:
clahe_img = [clahe(img) for img in imgs]
plot_imgs(clahe_img)
plt.show()
bcet_img = [bcet(img) for img in imgs]
plot_imgs(bcet_img)
plt.show()
bcet_clahe = [clahe(img) for img in bcet_img]
plot_imgs(bcet_clahe)

1st row : clahe images  \
2nd row : bcet images  \
3rd row : bcet -> clahe images

How about vice versa??

In [None]:
clahe_bcet = [bcet(img) for img in clahe_img]
plot_imgs(clahe_bcet)

Terrible...

# Train and compare performances

In [None]:
dim = 512 #1024, 256, 'original'
test_dir = f'/kaggle/input/vinbigdata-{dim}-image-dataset/vinbigdata/test'
weights_dir = '/kaggle/input/vinbigdata-cxr-ad-yolov5-14-class-train/yolov5/runs/train/exp/weights/best.pt'

train_df = pd.read_csv(f'../input/vinbigdata-{dim}-image-dataset/vinbigdata/train.csv')
train_df['image_path'] = f'/kaggle/input/vinbigdata-{dim}-image-dataset/vinbigdata/train/'+train_df.image_id+('.png' if dim!='original' else '.jpg')

train_df = train_df[train_df.class_id!=14].reset_index(drop = True)


fold = 4
gkf  = GroupKFold(n_splits = 5)
train_df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(train_df, groups = train_df.image_id.tolist())):
    train_df.loc[val_idx, 'fold'] = fold
val_df = train_df[train_df['fold']==4]
#val_df.head()


In [None]:
train_files = []
val_files   = []
val_files += list(train_df[train_df.fold==fold].image_path.unique())
train_files += list(train_df[train_df.fold!=fold].image_path.unique())
len(train_files), len(val_files)

In [None]:
os.makedirs('/kaggle/working/vinbigdata/labels/train', exist_ok = True)
os.makedirs('/kaggle/working/vinbigdata/labels/val', exist_ok = True)
os.makedirs('/kaggle/working/vinbigdata/images/train', exist_ok = True)
os.makedirs('/kaggle/working/vinbigdata/images/val', exist_ok = True)
label_dir = '/kaggle/input/vinbigdata-yolo-labels-dataset/labels'
for file in train_files:
    shutil.copy(file, '/kaggle/working/vinbigdata/images/train')
    filename = file.split('/')[-1].split('.')[0]
    shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/vinbigdata/labels/train')
    
for file in val_files:
    shutil.copy(file, '/kaggle/working/vinbigdata/images/val')
    filename = file.split('/')[-1].split('.')[0]
    shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/vinbigdata/labels/val')
    
val_dir = f'/kaggle/working/vinbigdata/images/val'

In [None]:
os.makedirs('/kaggle/working/vinbigdata_clahe/images/train', exist_ok = True)
os.makedirs('/kaggle/working/vinbigdata_clahe/images/val', exist_ok = True)

os.makedirs('/kaggle/working/vinbigdata_bcet/images/train', exist_ok = True)
os.makedirs('/kaggle/working/vinbigdata_bcet/images/val', exist_ok = True)

os.makedirs('/kaggle/working/vinbigdata_bcet_clahe/images/train', exist_ok = True)
os.makedirs('/kaggle/working/vinbigdata_bcet_clahe/images/val', exist_ok = True)


# clahe
shutil.copytree('/kaggle/working/vinbigdata/labels/train','/kaggle/working/vinbigdata_clahe/labels/train', )
shutil.copytree('/kaggle/working/vinbigdata/labels/val','/kaggle/working/vinbigdata_clahe/labels/val')

# bcet
shutil.copytree('/kaggle/working/vinbigdata/labels/train','/kaggle/working/vinbigdata_bcet/labels/train')
shutil.copytree('/kaggle/working/vinbigdata/labels/val','/kaggle/working/vinbigdata_bcet/labels/val')

# bcet_clahe
shutil.copytree('/kaggle/working/vinbigdata/labels/train','/kaggle/working/vinbigdata_bcet_clahe/labels/train')
shutil.copytree('/kaggle/working/vinbigdata/labels/val','/kaggle/working/vinbigdata_bcet_clahe/labels/val')

In [None]:
# clahe
path = '/kaggle/working/vinbigdata/images/train'
save_path = '/kaggle/working/vinbigdata_clahe/images/train'
images = os.listdir(path)

for image in images:
    img_path = os.path.join(path, image)
    ori_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    dst_img = clahe(ori_img)

    cv2.imwrite(os.path.join(save_path, image), dst_img)
    
print('clahe done')

# bcet    
path = '/kaggle/working/vinbigdata/images/train'
save_path_bcet = '/kaggle/working/vinbigdata_bcet/images/train'
images_bcet = os.listdir(path)

for image in images_bcet:
    img_path = os.path.join(path, image)
    ori_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    dst_img = bcet(ori_img)

    cv2.imwrite(os.path.join(save_path_bcet, image), dst_img)
    
print('bcet done')

# bcet -> clahe
path = '/kaggle/working/vinbigdata_bcet/images/train'
save_path_bclahe = '/kaggle/working/vinbigdata_bcet_clahe/images/train'
images_bcet_clahe = os.listdir(path)

for image in images_bcet_clahe:
    img_path = os.path.join(path, image)
    ori_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    dst_img = clahe(ori_img)

    cv2.imwrite(os.path.join(save_path_bclahe, image), dst_img)
    
print('bcet_clahe done')

Check samples

In [None]:
print(len(os.listdir(save_path)))
sample = cv2.imread(os.path.join(save_path, images[0]))
plt.imshow(sample)

In [None]:
print(len(os.listdir(save_path_bcet)))
sample = cv2.imread(os.path.join(save_path_bcet, images[0]))
plt.imshow(sample)

In [None]:
print(len(os.listdir(save_path_bclahe)))
sample = cv2.imread(os.path.join(save_path_bclahe, images[0]))
plt.imshow(sample)

In [None]:
# clahe
val_path = '/kaggle/working/vinbigdata/images/val'
save_path = '/kaggle/working/vinbigdata_clahe/images/val'
images = os.listdir(val_path)

for image in images:
    img_path = os.path.join(val_path, image)
    ori_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    dst_img = clahe(ori_img)

    cv2.imwrite(os.path.join(save_path, image), dst_img)
    
print('clahe done')

# bcet    
val_path = '/kaggle/working/vinbigdata/images/val'
val_save_path_bcet = '/kaggle/working/vinbigdata_bcet/images/val'
images = os.listdir(val_path)

for image in images:
    img_path = os.path.join(val_path, image)
    ori_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    dst_img = bcet(ori_img)

    cv2.imwrite(os.path.join(val_save_path_bcet, image), dst_img)
    
print('bcet done')

# bcet -> clahe
val_path = '/kaggle/working/vinbigdata_bcet/images/val'
val_save_path_bclahe = '/kaggle/working/vinbigdata_bcet_clahe/images/val'
images = os.listdir(val_path)

for image in images:
    img_path = os.path.join(val_path, image)
    ori_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    dst_img = clahe(ori_img)

    cv2.imwrite(os.path.join(val_save_path_bclahe, image), dst_img)
    
print('bcet_clahe done')

In [None]:
print(len(os.listdir(save_path)))
sample = cv2.imread(os.path.join(save_path, images[0]))
plt.imshow(sample)

In [None]:
print(len(os.listdir(val_save_path_bcet)))
sample = cv2.imread(os.path.join(val_save_path_bcet, images[0]))
plt.imshow(sample)

In [None]:
print(len(os.listdir(save_path)))
sample = cv2.imread(os.path.join(val_save_path_bclahe, images[0]))
plt.imshow(sample)

# Setups for training yolo v5


In [None]:
class_ids, class_names = list(zip(*set(zip(train_df.class_id, train_df.class_name))))
classes = list(np.array(class_names)[np.argsort(class_ids)])
classes = list(map(lambda x: str(x), classes))
classes

# train with original image

In [None]:
from os import listdir
from os.path import isfile, join
import yaml

cwd = '/kaggle/working/'

with open(join( cwd , 'train.txt'), 'w') as f:
    for path in glob('/kaggle/working/vinbigdata/images/train/*'):
        f.write(path+'\n')
            
with open(join( cwd , 'val.txt'), 'w') as f:
    for path in glob('/kaggle/working/vinbigdata/images/val/*'):
        f.write(path+'\n')

data = dict(
    train =  join( cwd , 'train.txt') ,
    val   =  join( cwd , 'val.txt' ),
    nc    = 14,
    names = classes
    )

with open(join( cwd , 'vinbigdata.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(join( cwd , 'vinbigdata.yaml'), 'r')
print('\nyaml:')
print(f.read())


In [None]:
shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5') # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!WANDB_MODE="dryrun" python train.py --img 640 --batch 16 --epochs 20 --data /kaggle/working/vinbigdata.yaml --weights yolov5s.pt --cache

# train with clahe image

In [None]:
from os import listdir
from os.path import isfile, join
import yaml

cwd = '/kaggle/working/'

with open(join( cwd , 'train_clahe.txt'), 'w') as f:
    for path in glob('/kaggle/working/vinbigdata_clahe/images/train/*'):
        f.write(path+'\n')
            
with open(join( cwd , 'val_clahe.txt'), 'w') as f:
    for path in glob('/kaggle/working/vinbigdata_clahe/images/val/*'):
        f.write(path+'\n')

data = dict(
    train =  join( cwd , 'train_clahe.txt') ,
    val   =  join( cwd , 'val_clahe.txt' ),
    nc    = 14,
    names = classes
    )

with open(join( cwd , 'vinbigdata_clahe.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(join( cwd , 'vinbigdata_clahe.yaml'), 'r')
print('\nyaml:')
print(f.read())


In [None]:
shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5_clahe')
os.chdir('/kaggle/working/yolov5_clahe') # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!WANDB_MODE="dryrun" python train.py --img 640 --batch 16 --epochs 20 --data /kaggle/working/vinbigdata_clahe.yaml --weights yolov5s.pt --cache


# Train with bcet images

In [None]:
from os import listdir
from os.path import isfile, join
import yaml

cwd = '/kaggle/working/'

with open(join( cwd , 'train_bcet.txt'), 'w') as f:
    for path in glob('/kaggle/working/vinbigdata_bcet/images/train/*'):
        f.write(path+'\n')
            
with open(join( cwd , 'val_bcet.txt'), 'w') as f:
    for path in glob('/kaggle/working/vinbigdata_bcet/images/val/*'):
        f.write(path+'\n')

data = dict(
    train =  join( cwd , 'train_bcet.txt') ,
    val   =  join( cwd , 'val_bcet.txt' ),
    nc    = 14,
    names = classes
    )

with open(join( cwd , 'vinbigdata_bcet.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(join( cwd , 'vinbigdata_bcet.yaml'), 'r')
print('\nyaml:')
print(f.read())

In [None]:
shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5_bcet')
os.chdir('/kaggle/working/yolov5_bcet') # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!WANDB_MODE="dryrun" python train.py --img 640 --batch 16 --epochs 20 --data /kaggle/working/vinbigdata_bcet.yaml --weights yolov5s.pt --cache


# Train with bcet_clahe images

In [None]:
from os import listdir
from os.path import isfile, join
import yaml

cwd = '/kaggle/working/'

with open(join( cwd , 'train_bcet_clahe.txt'), 'w') as f:
    for path in glob('/kaggle/working/vinbigdata_bcet_clahe/images/train/*'):
        f.write(path+'\n')
            
with open(join( cwd , 'val_bcet_clahe.txt'), 'w') as f:
    for path in glob('/kaggle/working/vinbigdata_bcet_clahe/images/val/*'):
        f.write(path+'\n')

data = dict(
    train =  join( cwd , 'train_bcet_clahe.txt') ,
    val   =  join( cwd , 'val_bcet_clahe.txt' ),
    nc    = 14,
    names = classes
    )

with open(join( cwd , 'vinbigdata_bcet_clahe.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(join( cwd , 'vinbigdata_bcet_clahe.yaml'), 'r')
print('\nyaml:')
print(f.read())

In [None]:
shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5_bcet_clahe')
os.chdir('/kaggle/working/yolov5_bcet_clahe') # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!WANDB_MODE="dryrun" python train.py --img 640 --batch 16 --epochs 20 --data /kaggle/working/vinbigdata_bcet_clahe.yaml --weights yolov5s.pt --cache

# Inference

In [None]:
os.chdir('/kaggle/working/yolov5')
!python detect.py --weights '/kaggle/working/yolov5/runs/train/exp/weights/best.pt'\
--img 640\
--conf 0.15\
--iou 0.5\
--source /kaggle/working/vinbigdata/images/val\
--exist-ok

In [None]:
os.chdir('/kaggle/working/yolov5_clahe')
!python detect.py --weights '/kaggle/working/yolov5_clahe/runs/train/exp/weights/best.pt'\
--img 640\
--conf 0.15\
--iou 0.5\
--source /kaggle/working/vinbigdata_clahe/images/val\
--exist-ok

In [None]:
os.chdir('/kaggle/working/yolov5_bcet')
!python detect.py --weights '/kaggle/working/yolov5_bcet/runs/train/exp/weights/best.pt'\
--img 640\
--conf 0.15\
--iou 0.5\
--source /kaggle/working/vinbigdata_bcet/images/val\
--exist-ok

In [None]:
os.chdir('/kaggle/working/yolov5_bcet_clahe')
!python detect.py --weights '/kaggle/working/yolov5_bcet_clahe/runs/train/exp/weights/best.pt'\
--img 640\
--conf 0.15\
--iou 0.5\
--source /kaggle/working/vinbigdata_bcet_clahe/images/val\
--exist-ok

# Inference result (train with original images)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
from glob import glob
from tqdm import tqdm

files = glob('/kaggle/working/yolov5/runs/detect/exp/*')

for _ in range(1):
    row = 4
    col = 4
    grid_files = files[:16]
    images     = []
    for image_path in tqdm(grid_files):
        img          = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(img)

    fig = plt.figure(figsize=(col*5, row*5))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(col, row),  # creates 2x2 grid of axes
                     axes_pad=0.05,  # pad between axes in inch.
                     )

    for ax, im in zip(grid, images):
        # Iterating over the grid returns the Axes.
        ax.imshow(im)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()

# Inference result (train with clahe images)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
from glob import glob
from tqdm import tqdm

files = glob('/kaggle/working/yolov5_clahe/runs/detect/exp/*')

for _ in range(1):
    row = 4
    col = 4
    grid_files = files[:16]
    images     = []
    for image_path in tqdm(grid_files):
        img          = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(img)

    fig = plt.figure(figsize=(col*5, row*5))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(col, row),  # creates 2x2 grid of axes
                     axes_pad=0.05,  # pad between axes in inch.
                     )

    for ax, im in zip(grid, images):
        # Iterating over the grid returns the Axes.
        ax.imshow(im)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()

# Inference result (train with bcet images)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
from glob import glob
from tqdm import tqdm

files = glob('/kaggle/working/yolov5_bcet/runs/detect/exp/*')

for _ in range(1):
    row = 4
    col = 4
    grid_files = files[:16]
    images     = []
    for image_path in tqdm(grid_files):
        img          = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(img)

    fig = plt.figure(figsize=(col*5, row*5))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(col, row),  # creates 2x2 grid of axes
                     axes_pad=0.05,  # pad between axes in inch.
                     )

    for ax, im in zip(grid, images):
        # Iterating over the grid returns the Axes.
        ax.imshow(im)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()

# Inference result (train with bcet_clahe images)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
from glob import glob
from tqdm import tqdm

files = glob('/kaggle/working/yolov5_bcet_clahe/runs/detect/exp/*')

for _ in range(1):
    row = 4
    col = 4
    grid_files = files[:16]
    images     = []
    for image_path in tqdm(grid_files):
        img          = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(img)

    fig = plt.figure(figsize=(col*5, row*5))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(col, row),  # creates 2x2 grid of axes
                     axes_pad=0.05,  # pad between axes in inch.
                     )

    for ax, im in zip(grid, images):
        # Iterating over the grid returns the Axes.
        ax.imshow(im)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()

# Compare with GT labels

In [None]:
import random
from random import randint

imgs = []
file = '/kaggle/working/yolov5/runs/detect/exp/'
original_file = '/kaggle/working/vinbigdata/images/val/'
img_ids = os.listdir(file)
class_ids = val_df['class_id'].unique()

# map label_id to specify color
label2color = {class_id:[randint(0,255) for i in range(3)] for class_id in class_ids}
thickness = 3
scale = 5


for i in range(16):
    img_id = img_ids[i][:-4]
    img_png = img_ids[i]
    #img_path = f'{dataset_dir}/train/{img_id}.dicom'
    img = cv2.imread(os.path.join(original_file, img_png))
    #img = cv2.resize(img, None, fx=1/scale, fy=1/scale)
    #img = np.stack([img, img, img], axis=-1)
    
    boxes = val_df.loc[val_df['image_id'] == img_id, ['x_min', 'y_min', 'x_max', 'y_max']].values/scale
    labels = val_df.loc[val_df['image_id'] == img_id, ['class_id']].values.squeeze()

    
    for label_id, box in zip(labels, boxes):
        color = label2color[label_id]
        img = cv2.rectangle(
            img,
            (int(box[0]), int(box[1])),
            (int(box[2]), int(box[3])),
            color, thickness
    )
    #img = cv2.resize(img, (500,500))
    imgs.append(img)
    
plot_imgs(imgs, cmap=None)

# Conclusion

> mAP compare


1. Original
    * mAP_0.5 0.30146
    * mAP_0.5:0.95 0.12736

1. Clahe
    * mAP_0.5 0.29131  
    * mAP_0.5:0.95 0.12017

2. Bcet
    * mAP_0.5 0.28734
    * mAP_0.5:0.95 0.12204

3. Bcet-clahe
    * mAP_0.5 0.28066
    * mAP_0.5:0.95 0.1193
    
    
train(3515) 20 epoch and validate abnormal images(879) , 512x512, yolov5s

### It seems without Clahe performs best for mAP score. 
#### But this experiment use abnormal images only, so I will experiment with normal images.
### Bcet-clahe is the worst, so multiple preprocessing are not recommended.