In [1]:
import pandas as pd
from torch.utils.data.dataset import Dataset
from torchvision.transforms import transforms
import os
import cv2
from os.path import isfile
import numpy as np
from tqdm import tqdm

In [2]:
def crop_image1(img, tol=7):
    # img is image data
    # tol  is tolerance

    mask = img > tol
    return img[np.ix_(mask.any(1), mask.any(0))]


def crop_image_from_gray(img, tol=7):
    if img.ndim == 2:
        mask = img > tol
        return img[np.ix_(mask.any(1), mask.any(0))]
    elif img.ndim == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol

        check_shape = img[:, :, 0][np.ix_(mask.any(1), mask.any(0))].shape[0]
        if (check_shape == 0):  # image is too dark so that we crop out everything,
            return img  # return original image
        else:
            img1 = img[:, :, 0][np.ix_(mask.any(1), mask.any(0))]
            img2 = img[:, :, 1][np.ix_(mask.any(1), mask.any(0))]
            img3 = img[:, :, 2][np.ix_(mask.any(1), mask.any(0))]
            #         print(img1.shape,img2.shape,img3.shape)
            img = np.stack([img1, img2, img3], axis=-1)
        #         print(img.shape)
        return img

def expand_path(p, train_path):
    p = str(p)
    if isfile(train_path + p + ".png"):
        return train_path + (p + ".png")
    return p

In [3]:
def save_preprocessing(train_path, save_path, df, idx):
    p = df.id_code.values[idx]
    p_path = expand_path(p, train_path)
    image = cv2.imread(p_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (256, 256))
    image = cv2.addWeighted(image, 4, cv2.GaussianBlur(image, (0, 0), 30), -4, 128)
    cv2.imwrite(save_path + p + '.png', image)

In [4]:
train_new_csv = pd.read_csv('./input/train_new.csv')
train_old_csv = pd.read_csv('./input/train_old.csv')
train_csv = pd.concat([train_new_csv, train_old_csv], axis=0)

for i in tqdm(range(len(train_csv))):
    save_preprocessing('./input/train_mixed_images/',
                       './input/train_mixed_BEN_preprocessing/',
                       train_csv,
                       i)

100%|██████████| 38788/38788 [45:28<00:00, 14.22it/s] 
