# Unsupervised Computer Vision Segmentation with Type-Separation

In this notebook I won't use the training data, only for exploration and algorithm checking.

Type-separation means that there are different type of images (colored/gray/gray with light background) and I try to "detect" which type the image belongs to and after than apply different separation algorithm for each types.

I am not saying that this is the solution for the problem, so it is only a experiment.

The output of the notebook reaches **0.230 LB**

In [None]:
import glob
import os
import numpy as np
import pandas as pd
import cv2
%matplotlib inline
import matplotlib.pyplot as plt

# Prepare data

## Train DataFrame

In [None]:
train_df = pd.DataFrame()

In [None]:
train_image_ids = []
train_image_paths = []
train_image_mask_paths = []

for base_path in glob.glob("../input/stage1_train/*"):
    image_id = os.path.basename(base_path)
    train_image_path = glob.glob(os.path.join(base_path, "images", "*.png"))[0]
    mask_paths = glob.glob(os.path.join(base_path, "masks", "*.png"))
    
    train_image_ids.append(image_id)
    train_image_paths.append(train_image_path)
    train_image_mask_paths.append(mask_paths)

In [None]:
train_df["image_id"] = train_image_ids
train_df["image_path"] = train_image_paths
train_df["mask_path"] = train_image_mask_paths

In [None]:
train_df.sample(5)

In [None]:
train_df.to_csv("train_df.csv")

## Test DataFrame

In [None]:
test_df = pd.DataFrame()

In [None]:
test_image_ids = []
test_image_paths = []

for base_path in glob.glob("../input/stage1_test/*"):
    image_id = os.path.basename(base_path)
    test_image_path = glob.glob(os.path.join(base_path, "images", "*.png"))[0]
    
    test_image_ids.append(image_id)
    test_image_paths.append(test_image_path)

In [None]:
test_df["image_id"] = test_image_ids
test_df["image_path"] = test_image_paths

In [None]:
test_df.sample(5)

In [None]:
test_df.to_csv("test_df.csv")

# Type-Separation and processing

- Type gray with white bg: `3594684b9ea0e16196f498815508f8d364d55fea2933a2e782122b6f00375d04`
- Type color: `74a7785530687a11ecd073e772f90912d9967d02407a192bfab282c35f55ab94`
- Type gray with black bg: `f113626a04125d97b27f21b45a0ce9a686d73dee7b5dbc0725d49194ba0203bd`

In [None]:
# Select random train image

tmp_image_row = train_df.sample(1)
tmp_image_id = tmp_image_row["image_id"].values[0]
print("Imge id is: {0}".format(tmp_image_id))
tmp_image_path = tmp_image_row["image_path"].values[0]
tmp_image_masks = tmp_image_row["mask_path"].values[0]

In [None]:
tmp_image = cv2.imread(tmp_image_path)

In [None]:
plt.imshow(tmp_image)

In [None]:
def create_unified_mask(mask_image_paths):
    tmp_image_mask = None
    for m in mask_image_paths:
        m = cv2.imread(m, cv2.IMREAD_GRAYSCALE)
        if tmp_image_mask is None:
            tmp_image_mask = m
        tmp_image_mask = cv2.bitwise_or(tmp_image_mask, m)
    return tmp_image_mask

In [None]:
tmp_image_mask = create_unified_mask(tmp_image_masks)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(10,10))

axs[0].imshow(tmp_image)
axs[0].grid()

axs[1].imshow(tmp_image_mask)
axs[1].grid()

### Type-separation

In [None]:
hsv_image = cv2.cvtColor(tmp_image, cv2.COLOR_BGR2HSV)
h, s, v =cv2.split(hsv_image)

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(20,20))
axs[0].imshow(h)
axs[1].imshow(s)
axs[2].imshow(v)

In [None]:
def get_image_type(image):
    # 0 is gray with black bg
    # 1 is gray with white/gray bg
    # 2 is colored

    image_type = -1
    
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v =cv2.split(hsv_image)
    
    # Decide if it is a colored image or not
    
    if np.max(h) == 0 and np.min(h) == 0:
        v_blurred = cv2.GaussianBlur(v, (5,5), 10)
        ret, thresh = cv2.threshold(v, 0, 255, cv2.THRESH_OTSU)
        _, cnts, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
        max_cnt_area = cv2.contourArea(cnts[0])
        
        # Decide which type of gray it is
        
        if max_cnt_area > 65000:
            image_type = 1
        else:
            image_type=0
    else:
        # TODO: here we can separate colored images based on the lightness of the BG. Just like we did it
        # for the gray images
        image_type = 2
    
    return image_type, (h, s, v)

In [None]:
image_type, (h, s, v) = get_image_type(tmp_image)

### Method 1: Thresholding only

In [None]:
if image_type == 0:
    v_blurred = cv2.GaussianBlur(v, (7,7), 1)
    ret, thresh = cv2.threshold(v_blurred, 0, 255, cv2.THRESH_OTSU)
    print("Type GRAY with black bg")
elif image_type == 1:
    ret, thresh = cv2.threshold(v, 100, 150, cv2.THRESH_BINARY_INV)
    print("Type GRAY with white/light-gray bg")
elif image_type == 2:
    s_blurred = cv2.GaussianBlur(s, (7,7), 1)
    ret, thresh = cv2.threshold(s_blurred,0, 255, cv2.THRESH_OTSU)
    print("Type COLOR with light bg")

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(10,10))

axs[0].imshow(thresh)
axs[0].grid()

axs[1].imshow(tmp_image_mask)
axs[1].grid()

In [None]:
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4))

In [None]:
mask = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=2)
mask = cv2.dilate(mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3)))

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(10,10))
axs[0].imshow(mask)
axs[1].imshow(tmp_image_mask)

### Method 2: Watershed

# Create submission with test images

In [None]:
def preproces_image_based_on_type(image_type, saturation_image, value_image):
    if image_type == 0:
        # v_blurred = cv2.GaussianBlur(value_image, (7,7), 1)
        v_blurred  = v
        ret, thresh = cv2.threshold(v_blurred, 0, 255, cv2.THRESH_OTSU)
    elif image_type == 1:
        ret, thresh = cv2.threshold(value_image, 100, 150, cv2.THRESH_BINARY_INV)
    elif image_type == 2:
        # s_blurred = cv2.GaussianBlur(saturation_image, (7,7), 1)
        s_blurred = s
        ret, thresh = cv2.threshold(s_blurred, 0, 255, cv2.THRESH_OTSU)
    else:
        raise ValueError("Not known image type")
    return thresh

In [None]:
def apply_morphology(mask_image):
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    mask_image = cv2.morphologyEx(mask_image, cv2.MORPH_CLOSE, kernel, iterations=1)
    mask_image = cv2.morphologyEx(mask_image, cv2.MORPH_OPEN, kernel, iterations=1)
    # mask_image = cv2.dilate(mask_image, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2,2)))
    return mask_image

In [None]:
submission_image_masks = []
submission_image_ids = test_df["image_id"].values

for n, image_path in enumerate(test_df["image_path"].values):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image_type, (h, s, v) = get_image_type(image)
    mask = preproces_image_based_on_type(image_type, s, v)
    mask = apply_morphology(mask)
    submission_image_masks.append(mask)

In [None]:
# Run length Encoding from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python

from skimage.morphology import label

def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x, cutoff=0.5):
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)

In [None]:
new_test_ids = []
rles = []
for n, id_ in enumerate(submission_image_ids):
    rle = list(prob_to_rles(submission_image_masks[n]))
    rles.extend(rle)
    new_test_ids.extend([id_] * len(rle))

In [None]:
submission_df = pd.DataFrame()
submission_df['ImageId'] = new_test_ids
submission_df['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))

In [None]:
submission_df.head(5)

In [None]:
len(submission_df)

In [None]:
if not len(np.unique(submission_df["ImageId"])) == len(test_image_ids):
    print("Submission is not complete")
    print("Missing test ids: {0}".format(set(test_image_ids).difference(set(np.unique(submission_df["ImageId"])))))
else:
    print("Submission is ready")

In [None]:
submission_df.to_csv('submission_computer_vision.csv', index=False)