# Imports

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import glob
import scipy
import json
import cv2
import os
import string
from matplotlib import pyplot as plt
import seaborn as sns
from IPython.display import clear_output
from sklearn.decomposition import PCA
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import RobustScaler, Normalizer
from tensorflow.keras import Sequential
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.layers import Dense, Input, Dropout, Conv2D, MaxPooling2D, Flatten, concatenate, add
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model

!pip install keras_tuner
import keras_tuner as kt

# Loading Files (Test and Train Data)

In [None]:
in_path = "/kaggle/input/sartorius-cell-instance-segmentation"

train_img_paths = glob.glob(in_path+'/train/*.png')
test_img_paths = glob.glob(in_path+'/test/*.png')
df = pd.read_csv(in_path+'/train.csv')
df

# Image Pre-processing

In [None]:
class ImagePreProcess:
    """
        Class can be used to remove background and noise of images with simple backgrounds and complex foregrounds
    """
    def __init__(self, image_paths, train=None):
        self.train_true = train
        self.image_paths = image_paths
        self.offset = 2 # set 2 or 3, higher numbers remove more background
        self.cwd = '/kaggle/working'
        
    def __load_image_default(self, image_path):
        img = cv2.imread(image_path)
        return img
        
    def __threshold_identification(self, image_path):
        img = cv2.imread(image_path)
        hist, bins = np.histogram(img.ravel(), 256, [0,256]) # bins pixel values
        grad = np.gradient(hist, edge_order=2) # finds upper bound of unused pixels
        threshold = np.where(grad == np.amax(grad))[0][0]-self.offset # calculates max pixel position, and offsets for grad calc
        return threshold
        
        
    def __binary_mask(self, image_path, threshold):
        img = cv2.imread(image_path)
        ret, img_binary = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)
        return img_binary
    
    
    def __erode_dilate(self, img, erode_or_dilate='erode'):
        kernel = np.ones((2, 2), np.uint8)
        if erode_or_dilate == 'erode':
            image = cv2.erode(img, kernel)
        if erode_or_dilate == 'dilate':
            image = cv2.dilate(img, kernel)
        return image
    
    
    def __remask(self, image_path, img_binary):
        img = cv2.imread(image_path)
        image = cv2.bitwise_or(img_binary, img)
        return image
    
    def __save_image(self, image_name, img):
        try:
            os.mkdir(f'{self.cwd}/train/')
            os.mkdir(f'{self.cwd}/test/')
        except FileExistsError:
            pass
        
        clear_output(wait=True)
        print(image_name)
        
        if self.train_true:
            cv2.imwrite(f'{self.cwd}/train/{image_name}.png',img)
        else:
            cv2.imwrite(f'{self.cwd}/test/{image_name}.png',img)
        
    def process_images(self):
        for image_path in self.image_paths:
            image_name = image_path[image_path.rfind('/')+1:image_path.rfind('.')]
            #img = self.__load_image_default(image_path)
            threshold = self.__threshold_identification(image_path) # Identifies optimal cut threshold, removing background
            img = self.__binary_mask(image_path, threshold) # Creates binary mask
            self.__save_image(image_name, img=img)
        g = plt.imshow(img)
        print("Completed.")


IMPP_train = ImagePreProcess(train_img_paths, train=True)
IMPP_test = ImagePreProcess(test_img_paths, train=False)

IMPP_train.process_images()
IMPP_test.process_images()

# Making Annotation Masks

In [None]:
class generateAnnotations:
    
    """
        loads annotations into mask format
    """
    
    def __init__(self, df):
        self.df = df
        self.cwd = '/kaggle/working'

    def __rle_decode(self, mask_rle, shape, color=0):
        '''
        mask_rle: run-length as string formated (start length)
        shape: (height, width, channels) of array to return 
        color: color for the mask
        Returns numpy array (mask)

        '''
        s = mask_rle.split()

        starts = list(map(lambda x: int(x) - 1, s[0::2]))
        lengths = list(map(int, s[1::2]))

        ends = [x + y for x, y in zip(starts, lengths)]
        img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)

        for start, end in zip(starts, ends):
            img[start : end] = color

        return img.reshape(shape)
    
    def __save_image(self, image_name, img):
        clear_output(wait=True)
        print(image_name)
        try:
            os.mkdir(f'{self.cwd}/annotation_masks/')
        except FileExistsError:
            pass
        cv2.imwrite(f'{self.cwd}/annotation_masks/{image_name}.png',img)
    
    def create_annotation_masks(self):
        ID_annotation = df.groupby(by='id')['annotation'].apply(list)
        df_annotation = pd.DataFrame(ID_annotation).reset_index()
        """
            normally wouldn't set this, but all imgs are the same HxW
        """
        h = df.height[0]
        w = df.width[0]
        
        for image_name, image_annotations in df_annotation.values:
            annotations = " ".join(image_annotations)
            mask = self.__rle_decode(mask_rle=annotations,shape=(h, w, 3), color=255)
            self.__save_image(image_name=image_name, img=mask)
        g = plt.imshow(mask)

GA = generateAnnotations(df=df)
GA.create_annotation_masks()

# Mask Handling

In [None]:
class combineMasks:
    def __init__(self, df):
        self.df = df
        self.cwd = '/kaggle/working'
    
    def pull_image_path(self):
        cell_labels = self.df.groupby(by='id')['cell_type'].apply(set).reset_index()
        for img_name, cell_type in cell_labels.values:
            cell_type = list(cell_type)[0]
            
            clear_output(wait=True)
            print(img_name)
            try:
                path_base = f'{self.cwd}/train/{img_name}.png'
            except:
                path_base = f'{self.cwd}/test/{img_name}.png'
                
            path_mask = f'{self.cwd}/annotation_masks/{img_name}.png'  
            img_mask = cv2.imread(path_mask)
            img_base = cv2.imread(path_base)
            print(cell_type)
            
CM = combineMasks(df=df)
CM.pull_image_path()

# Model Define

In [None]:
def model_builder(hp):
    