In [None]:
# Import libraries and install

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import matplotlib.image as image
import os as os
import imageio
import tqdm
from tqdm import trange
import glob
import cv2
import scipy.ndimage as ndi
import shutil
from PIL import Image

#For the models
import keras
from keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Activation, GlobalAveragePooling2D, Dense, Input
import tensorflow as tf

#For EfficientNet
#!pip install -U efficientnet
#import efficientnet.keras as efn

#For HPA Cell Segmentator
#!pip install https://github.com/CellProfiling/HPA-Cell-Segmentation/archive/master.zip
!pip install "../input/hpacellsegmentatormaster/HPA-Cell-Segmentation-master"
import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell, label_nuclei

#For encoding masks
#!pip install pycocotools
!pip install "../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"
import base64
from pycocotools import _mask as coco_mask
import typing as t
import zlib

In [None]:
!pip install '../input/kerasapplications'

In [None]:
!pip install '../input/efficientnet-keras-source-code'

In [None]:
import efficientnet.keras as efn 

In [None]:
!pip install "../input/hpapytorchzoozip/pytorch_zoo-master"

In [None]:
# Input
competition_data_dir = '../input/hpa-single-cell-image-classification'

In [None]:
#DATAFRAMES - only submission csv used for final submission

# Main dataframe from the train.csv file
print('\033[95m'+'\033[1m'+'DF (train.csv)')
df = pd.read_csv("../input/hpa-single-cell-image-classification/train.csv")
df["Label"]=df["Label"].apply(lambda x:x.split("|"))
display(df)

# df_Labels, the main dataframe separated into individual labels, in a list
df_Labels = []
print('\033[95m'+'\033[1m'+'DF_LABELS')
for i in range(0,19):
    temp_labels = df['Label']
    df_temp = df.copy()
    for index,row in df_temp.iterrows():
        if str(i) in temp_labels[index]:
            row['Label'] = 1 #np.asarray(1).astype(np.float32)
        if not str(i) in temp_labels[index]:
            row['Label'] = 0 #np.asarray(0).astype(np.float32)
    df_Labels.append(df_temp)
display(df_Labels[5])

# Base submission csv to dataframe
print('\033[95m'+'\033[1m'+'SUBMISSION DF (submission.csv)')
df_submission = pd.read_csv("../input/base-submission-csv-2/submission.csv")
display(df_submission)

In [None]:
#TEST IMAGES from test folder - 'images_test'
# individual lists for red, yellow, blue, green filter paths, but for segmentator we need red, yellow, blue
red_filters_test = sorted(glob.glob(competition_data_dir + '/test/' + '*_red.png'))
yellow_filters_test = sorted(glob.glob(competition_data_dir + '/test/' + '*_yellow.png'))
blue_filters_test = sorted(glob.glob(competition_data_dir + '/test/' + '*_blue.png'))
green_filters_test = sorted(glob.glob(competition_data_dir + '/test/' + '*_green.png'))

images_test = [red_filters_test, yellow_filters_test, blue_filters_test]#, green_filters_test]

proba = cv2.imread(green_filters_test[0])
plt.imshow(proba)
plt.show()

#Read new ids and size of images in test folder for final submission
ids = []
widths = []
height = []
for ids_filters in range(len(red_filters_test)):
    widths.append(plt.imread(red_filters_test[ids_filters]).shape[1])
    height.append(plt.imread(red_filters_test[ids_filters]).shape[0])
for ids_filters in range(len(red_filters_test)):
    ids.append(os.path.basename(red_filters_test[ids_filters])[:-8])
df_submission = pd.DataFrame(data={'ID': ids, 'ImageWidth': widths, 'ImageHeight': height})

In [None]:
#We use separate models for each label, models is a list with these efficientnet models
def create_models():
    models = []
    for i in trange(19):  
        base_model = efn.EfficientNetB0(weights=None, include_top=False, input_shape=(224, 224, 3))

        model_1 = Sequential()
        model_1.add(Conv2D(3, (3, 3), padding='same', input_shape=(224, 224, 4)))

        base_model._layers.pop(0)

        model = keras.Sequential([
        model_1,
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')    
        ])

        # Unfreeze the layers
        for layer in model.layers[0:]:
            layer.trainable = True

        model.compile(keras.optimizers.Adam(learning_rate=1e-4),loss="binary_crossentropy",metrics=["accuracy"])
        model.load_weights('../input/models-weights/Models copy/MODEL' + str(i) + '/')
        models.append(model)
    return models


In [None]:
#LOAD MODELS
# Create new model instances
models_loaded = create_models()


In [None]:
#Prepare cell segmentator
NUC_MODEL = "../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth"
CELL_MODEL = "../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth"

segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device="cuda",
    # NOTE: setting padding=True seems to solve most issues that have been encountered
    #       during our single cell Kaggle challenge.
    padding=True,#False,
    multi_channel_model=True,
)

In [None]:
#Encode cell mask to submission format (RLE, compress, base64)
def encode_binary_mask(mask: np.ndarray) -> t.Text:
    """Converts a binary mask into OID challenge encoding ascii text."""

    # check input mask --
    if mask.dtype != np.bool:
        raise ValueError(
           "encode_binary_mask expects a binary mask, received dtype == %s" %
           mask.dtype)

    mask = np.squeeze(mask)
    if len(mask.shape) != 2:
        raise ValueError(
            "encode_binary_mask expects a 2d mask, received shape == %s" %
            mask.shape)

    # convert input mask to expected COCO API input --
    mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
    mask_to_encode = mask_to_encode.astype(np.uint8)
    mask_to_encode = np.asfortranarray(mask_to_encode)

    # RLE encode mask --
    encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

    # compress and base64 encoding --
    binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
    base64_str = base64.b64encode(binary_str)
    return base64_str

In [None]:
import gc
import torch

In [None]:
df.to_csv('submission.csv', index=False)

In [None]:
#Predict
#cell segmentation, masks, crop cells, get an array of cells

batch_size = 16
submissions = []

for i in trange(0, len(images_test[0]), batch_size):
    # sub_images - one batch of images
    # cell_masks - masks of one batch of images
    # cells      - all cells in the batch
    # predictions - predictions for all cells in the batch
    # masks      - masks for all cells in the batch
    # encoded_masks - encoded masks
   
    #one batch of images
    sub_images = [img_channel_list[i:i+batch_size] for img_channel_list in images_test]

    #segment cells
    cell_segmentations = segmentator.pred_cells(sub_images)
    nuc_segmentations = segmentator.pred_nuclei(sub_images[2])
    
    cell_masks = []
    # get masks
    for k, pred in enumerate(cell_segmentations):
        #get masks from segmentations
        nuclei_mask, cell_mask = label_cell(nuc_segmentations[k], cell_segmentations[k])
        cell_masks.append(cell_mask)
    
    cells = []
    #get bboxes
    for j in range(0,len(cell_masks)):
        
        #finding bboxes
        bboxes = ndi.find_objects(cell_masks[j].astype(np.uint8) )
      
        #visualization of bboxes and cropping

        #reading image for visualization
        path = images_test[0][j+i][:-8]
        #read filters
        microtubule = plt.imread(path + "_red.png", cv2.IMREAD_GRAYSCALE)   
        endoplasmicrec = plt.imread(path + "_yellow.png", cv2.IMREAD_GRAYSCALE)    
        nuclei = plt.imread(path + "_blue.png", cv2.IMREAD_GRAYSCALE)
        protein = plt.imread(path + "_green.png", cv2.IMREAD_GRAYSCALE)
        #stack filters, resize later
        #img =microtubule+ protein+ nuclei+ endoplasmicrec
        img = np.stack((microtubule, protein, nuclei, endoplasmicrec), axis=-1)
          
        for l in range(0, len(bboxes)):
            #cropping bbox
            img_cropped = img[bboxes[l]]
            cells.append(img_cropped)
    
    #resize
    for index_cells in range(0,len(cells)):
        img_resized = cv2.resize(cells[index_cells],(224, 224))
        img_resized = np.asarray(img_resized).astype(np.float32)
        cells[index_cells] = img_resized
    
    #PREDICTION
    list_of_predictions = []
    for index_models in range(0,19):
        predictions = models_loaded[index_models].predict(np.asarray(cells))
        list_of_predictions.append(predictions)
    
    #SUBMISSION ENCODING
    encoded_masks = []
    for index_masks in range(0,len(cell_masks)):
        unique_values = np.unique(cell_masks[index_masks])
        for index_cells in range(1,len(unique_values)):
            mask = np.where(cell_masks[index_masks]==unique_values[index_cells], 1, 0).astype(np.bool)
            encoded_mask = encode_binary_mask(mask)
            encoded_masks.append(encoded_mask)
    
    #Create submission rows
    count = 0 #counter for cells already in the row
    for index_images in range(0, len(cell_masks)):
        submission_row = ""
        for index_cells in range(0,len(ndi.find_objects(cell_masks[index_images].astype(np.uint8)))):
            sub_row_cell = ""
            for index_predictions in range(0,19):
                sub_row_cell =  sub_row_cell + str(index_predictions) + " " + str(list_of_predictions[index_predictions][index_cells+count])[1:-1] + " " + str(encoded_masks[index_cells+count])[2:-1] + " "
            submission_row = submission_row + sub_row_cell
        submissions.append(submission_row)
        count = count + len(ndi.find_objects(cell_masks[index_images].astype(np.uint8)))
        
    gc.collect()
    torch.cuda.empty_cache()

In [None]:
#SUBMISSION CSV from dataset and the predicted submissions
df_submission_test = df_submission.copy()
df_submission_test['PredictionString'] = submissions

df_submission_test.to_csv('submission.csv', index=False)  

In [None]:
df_submission_test