In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import os
import cv2 
import tensorflow as tf

from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.core import Dropout, Lambda
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
files = 0
images = 0
annotations = 0

for num, (dirname, _, filenames,) in enumerate(os.walk('/kaggle/input')):
    for file, filename in enumerate(filenames):
        if file==2:
            print(dirname, "  ... many on this folder")
        if filename.endswith(("xlsx", "txt", "csv")):
            files+=1
            print(os.path.join(dirname, filename))
        elif filename.endswith(("png", "jpeg", "jpg",)):
            images+=1
        else:
            annotations+=1
print("...\n")
print("#"*10, "        files: {} images: {} annotations: {}".format(files, images, annotations))

In [None]:
# Original unet taken from https://www.kaggle.com/stpeteishii/cell-instance-segmentation-unet

 # data understanding

In [None]:
# read data first. The csv is the core.
train_data = pd.read_csv('../input/sartorius-cell-instance-segmentation/train.csv')
print(train_data.shape)
train_data.head(3)

In [None]:
# submision sample
sample_submission=pd.read_csv('../input/sartorius-cell-instance-segmentation/sample_submission.csv')
print(sample_submission.shape)
sample_submission.head()

In [None]:
# we will focus on the train folder, as it contains the images to process. The others not for now.
print("Number of images in the folder train:")
len(os.listdir('../input/sartorius-cell-instance-segmentation/train'))

In [None]:
print("Number of unique id on the file:")
train_data.id.unique().shape

The id column contains all the train images codes. But each id column is repetead an have several notations. Each of the have diferent counts

In [None]:
plt.figure(figsize=(20,6))
train_data.groupby("id").size().plot.bar();
plt.xticks([])
plt.show()

In [None]:
print(train_data[train_data["id"]=="0030fd0e6378"]["id"].count())
print(train_data[train_data["id"]=="0140b3c8f445"]["id"].count())

In [None]:
# other columns
train_data.sample_id.unique().shape

In [None]:
train_data[train_data["id"]=="0030fd0e6378"]["sample_id"].unique()

In [None]:
# sample_id is bounded to the id
train_data.groupby(["id", "sample_id"]).size().count()

In [None]:
#checking for missing values. None
train_data.isnull().sum().sum()

In [None]:
import matplotlib.pyplot as plt
train_data.groupby('cell_type').size().plot.bar()
plt.show()

In [None]:
import matplotlib.pyplot as plt
train_data.groupby(['width', 'height']).size().plot.bar()
plt.show()

Summary of the columns:
    - id: name of the train picture
    - annotation: info of the target mask of the neuron cells
    - width, heigh: width and heigh of the images (constant of 704x520)
    - plate_time: not usefull
    - sample_date: not usefull
    - sample_id
    - Elapsed_timedelta: not usefull

In [None]:
# check the images
img = cv2.imread("../input/sartorius-cell-instance-segmentation/train_semi_supervised/astro[hippo]_D1-1_Vessel-361_2020-09-14_13h00m00s_Ph_1.png")
plt.imshow(img);

In [None]:
img = cv2.imread("../input/sartorius-cell-instance-segmentation/train/0140b3c8f445.png")
plt.imshow(img);

In [None]:
# add the filters to see it better
from PIL import Image, ImageEnhance
img = cv2.imread("../input/sartorius-cell-instance-segmentation/train/042c17cd9143.png")
img = np.asarray(ImageEnhance.Contrast(Image.fromarray(img)).enhance(16))

plt.figure()
plt.imshow(img);

In [None]:
# sooo an image id has several anotations... but there are multiple id's repetitions with the same sample_id...
train_data[train_data["id"]=="0030fd0e6378"]

In [None]:
# how to process annotations.

# read one annotation
mask_rle = train_data[train_data["id"] == "0030fd0e6378"]["annotation"].tolist()[0]
shape=(520, 704, 3)
s = mask_rle.split()

In [None]:
starts = list(map(lambda x: int(x) - 1, s[0::2])) # get the starting coordinate (on the even positions)
lengths = list(map(int, s[1::2])) # get the lenght (on the not even positions)
ends = [x + y for x, y in zip(starts, lengths)] # calculate the end point (starting point + lenght)

# create a blank inage
img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)

# fill the positions on the image with a color to create the mask
for start, end in zip(starts, ends):
    img[start : end] = 1

plt.figure()  
plt.imshow(img.reshape(shape));

In [None]:
len(train_data[train_data["id"] == "0030fd0e6378"]["annotation"].tolist())

In [None]:
# Reference: https://www.kaggle.com/ihelon/cell-segmentation-run-length-decoding
#https://www.kaggle.com/susnato/understanding-run-length-encoding-and-decoding?scriptVersionId=77552323
# coding packed into one function

def rle_decode(mask_rle, shape, color=3):     #color=1,3
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height, width, channels) of array to return 
    color: color for the mask
    Returns numpy array (mask)
    '''
    s = mask_rle.split()
    
    starts = list(map(lambda x: int(x) - 1, s[0::2]))
    lengths = list(map(int, s[1::2]))
    ends = [x + y for x, y in zip(starts, lengths)]
    
    img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)
            
    for start, end in zip(starts, ends):
        img[start : end] = color
    
    return img.reshape(shape)

In [None]:
# other version
# https://www.kaggle.com/c/sartorius-cell-instance-segmentation/discussion/291627
def rle_decode(mask_rle, shape=(520, 704, 1)):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)  # Needed to align to RLE direction

def rle_encode(img):
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
# each mask annotation has one area
mask = train_data[train_data["id"] == "0030fd0e6378"]["annotation"].tolist()[0]
img = rle_decode(mask)
plt.imshow(img, cmap="gray");

In [None]:
mask

In [None]:
img.shape

In [None]:
rle_encode(img)

In [None]:
# revert the encodig has a correct recosntruction?
plt.imshow(rle_decode(rle_encode(img)));

# data processing

In [None]:
def plot_masks(image_id, colors=False):
    labels = train_data[train_data["id"] == image_id]["annotation"].tolist()

    if colors:
        mask = np.zeros((520, 704, 3))
        for label in labels:
            mask += rle_decode(label, shape=(520, 704, 3), color=np.random.rand(3))
    else:
        mask = np.zeros((520, 704, 1))
        for label in labels:
            mask += rle_decode(label, shape=(520, 704, 1))
    mask = mask.clip(0, 1)

    image = cv2.imread(f"../input/sartorius-cell-instance-segmentation/train/{image_id}.png")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    plt.figure(figsize=(18,6))
    plt.subplot(1, 3, 1)
    plt.imshow(image)
    plt.title('Input image')
    plt.axis("off")
    
    plt.subplot(1, 3, 2)
    plt.imshow(image)
    plt.imshow(mask, alpha=0.1)
    plt.title('Input image with mask')
    plt.axis("off")
    
    plt.subplot(1, 3, 3)
    plt.imshow(mask)
    plt.title('Only mask')
    plt.axis("off")
    
    plt.show();

In [None]:
sample_ids = ['0030fd0e6378','0140b3c8f445','01ae5a43a2ab']

for sample_id in sample_ids:
    celltype=train_data[train_data['id']==sample_id]['cell_type'].tolist()[0]
    file_path = '../input/sartorius-cell-instance-segmentation/train/' + sample_id + '.png'
    image_df = cv2.imread(file_path)
    print('ID:', sample_id, ', CellType:',celltype)
    plot_masks(sample_id, colors=False)

In [None]:
# Reference: https://www.kaggle.com/keegil/keras-u-net-starter-lb-0-277
IMG_HEIGHT = 256
IMG_WIDTH = 256
IMG_CHANNELS = 3
TRAIN_PATH = '../input/sartorius-cell-instance-segmentation/train/'

train_ids = train_data['id'].unique().tolist()
test_ids = sample_submission['id'].unique().tolist()

# Get and resize train images and masks
X_train = np.zeros((train_data['id'].nunique(), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_train = np.zeros((train_data['id'].nunique(), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)

In [None]:
from tqdm import tqdm
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    path = TRAIN_PATH + id_
    img = cv2.imread(path + '.png')[:,:]
    img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH), interpolation = cv2.INTER_LINEAR)
    #img = np.expand_dims(img, axis = 2)
    X_train[n] = img
    
    labels = train_data[train_data["id"] == id_]["annotation"].tolist()
    mask = np.zeros((520, 704, 1))
    for label in labels:
        mask += rle_decode(label, shape=(520, 704, 1))
    mask = mask.clip(0, 1)
    mask = mask[:,:,0]

    mask = np.expand_dims(cv2.resize(mask, (IMG_HEIGHT, IMG_WIDTH), interpolation = cv2.INTER_LINEAR), axis=-1)
    
    Y_train[n] = mask
print("Done")

In [None]:
# Get and resize test images
X_test = np.zeros((sample_submission['id'].nunique(), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
test_images_id = []
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = TRAIN_PATH.replace('train', 'test') + id_
    img = cv2.imread(path + '.png')[:,:]
    img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH), interpolation = cv2.INTER_LINEAR)
    #img = np.expand_dims(img, axis = 2)
    X_test[n] = img
    test_images_id.append(id_)
print("Done")

In [None]:
print(X_train.shape,Y_train.shape,X_test.shape)

In [None]:
sample_id_num = 40
plt.imshow(X_train[sample_id_num][:,:,0], cmap = 'gray')
plt.show()
plt.imshow(Y_train[sample_id_num][:,:,0])
plt.show()

print('Input image:','Min:', X_train[sample_id_num][:,:,0].min(), '; Max:', X_train[sample_id_num][:,:,0].max(), '; Mean:', X_train[sample_id_num][:,:,0].mean())
print('Mask:','Min:', Y_train[sample_id_num][:,:,0].min(), '; Max:', Y_train[sample_id_num][:,:,0].max(), '; Mean:', Y_train[sample_id_num][:,:,0].mean())

# modeling

In [None]:
#dice_coefficient
def dice_coefficient(y_true, y_pred):
    numerator = 2 * tf.reduce_sum(y_true * y_pred)
    denominator = tf.reduce_sum(y_true + y_pred)
    return numerator / (denominator + tf.keras.backend.epsilon())

In [None]:
# Build U-Net model
inputs = Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
activation='elu'
s = Lambda(lambda x: x / 255) (inputs)

c1 = Conv2D(16, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (s)
c1 = Dropout(0.1) (c1)
c1 = Conv2D(16, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c1)
p1 = MaxPooling2D((2, 2)) (c1)

c2 = Conv2D(32, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (p1)
c2 = Dropout(0.1) (c2)
c2 = Conv2D(32, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c2)
p2 = MaxPooling2D((2, 2)) (c2)

c3 = Conv2D(64, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (p2)
c3 = Dropout(0.2) (c3)
c3 = Conv2D(64, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c3)
p3 = MaxPooling2D((2, 2)) (c3)

c4 = Conv2D(128, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (p3)
c4 = Dropout(0.2) (c4)
c4 = Conv2D(128, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c4)
p4 = MaxPooling2D(pool_size=(2, 2)) (c4)

c5 = Conv2D(256, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (p4)
c5 = Dropout(0.3) (c5)
c5 = Conv2D(256, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c5)

u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5)
u6 = concatenate([u6, c4])
c6 = Conv2D(128, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (u6)
c6 = Dropout(0.2) (c6)
c6 = Conv2D(128, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c6)

u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6)
u7 = concatenate([u7, c3])
c7 = Conv2D(64, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (u7)
c7 = Dropout(0.2) (c7)
c7 = Conv2D(64, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c7)

u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c7)
u8 = concatenate([u8, c2])
c8 = Conv2D(32, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (u8)
c8 = Dropout(0.1) (c8)
c8 = Conv2D(32, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c8)

u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c8)
u9 = concatenate([u9, c1], axis=3)
c9 = Conv2D(16, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (u9)
c9 = Dropout(0.1) (c9)
c9 = Conv2D(16, (3, 3), activation=activation, kernel_initializer='he_normal', padding='same') (c9)

outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)

model = Model(inputs=[inputs], outputs=[outputs])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[dice_coefficient])
#model.summary()

In [None]:
# Fit model
earlystopper = EarlyStopping(patience=40, verbose=1)
#checkpointer = ModelCheckpoint('best_model.h5', verbose=1, save_best_only=True)

results = model.fit(X_train, Y_train, validation_split=0.12, batch_size=10, epochs=71, 
                    callbacks=[earlystopper])

In [None]:
plt.figure(figsize=(14,4))
plt.plot(results.history['loss'])
plt.plot(results.history['val_loss'])
plt.title('model loss')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

In [None]:
plt.figure(figsize=(14,4))
plt.plot(results.history['dice_coefficient'])
plt.plot(results.history['val_dice_coefficient'])
plt.title('dice_coefficient')
plt.ylabel('dice_coefficient')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
X_train.shape[0]*0.85

In [None]:
# Predict on train, val and test
#model = load_model('best_model.h5', custom_objects={'dice_coefficient': dice_coefficient})
preds_train = model.predict(X_train[:int(X_train.shape[0]*0.85)], verbose=1)
preds_test = model.predict(X_test, verbose=1)

In [None]:
# by default I use threshold of 0.5. It is worth optimize it? In some notebooks the use it per class type. 
from statistics import mean
def get_threshold(Y, pred):
    scores = list(pred.ravel())
    mask = list(Y.ravel())
    
    idxs=np.argsort(scores)[::-1]
    mask_sorted=np.array(mask)[idxs]
    sum_mask_one=np.cumsum(mask_sorted)
    IoU=sum_mask_one/(np.arange(1,len(mask_sorted)+1)+np.sum(mask_sorted)-sum_mask_one)
    best_IoU_idx=IoU.argmax()
    best_threshold=scores[idxs[best_IoU_idx]]
    best_IoU=IoU[best_IoU_idx]

    return best_threshold, best_IoU


img_thresholds = []         # one for each image
img_IoUs = []
for Y, P in tqdm(zip(Y_train, preds_train), total=Y_train.shape[0]):

    best_img_threshold, best_img_IoU = get_threshold(Y, P)
    img_thresholds.append(best_img_threshold)
    img_IoUs.append(best_img_IoU)
    
best_threshold = np.mean(img_thresholds)
best_threshold_spread = np.std(img_thresholds)
avg_IoU = mean(img_IoUs)

print(f"Best threshold: {best_threshold:.3g} (+-{best_threshold_spread:.3g}), Avg. Train IoU: {avg_IoU:.3f}")

In [None]:
# Threshold predictions
default_threshold = False

if default_threshold:
    threshold=0.5
else:
    threshold=best_threshold


preds_train_t = (preds_train > threshold).astype(np.uint8)
preds_test_t = (preds_test > threshold).astype(np.uint8)

# Create list of upsampled test masks
preds_test_upsampled = []
for i in range(len(preds_test)):
    preds_test_upsampled.append(cv2.resize(np.squeeze(preds_test[i]), 
                                    (IMG_HEIGHT, IMG_WIDTH), interpolation = cv2.INTER_AREA))

In [None]:
# Perform a sanity check on some random training samples
from random import randint
ix = randint(0, len(preds_train_t))
plt.figure(figsize=(18,6))
plt.subplot(1, 3, 1)
plt.imshow(X_train[ix])
plt.title('Input image')
plt.axis("off")

plt.subplot(1, 3, 2)
plt.imshow(np.squeeze(Y_train[ix]))
plt.title('Mask')
plt.axis("off")

plt.subplot(1, 3, 3)
plt.imshow(np.squeeze(preds_train_t[ix]))
plt.title('predicted mask')
plt.axis("off")

In [None]:
ix = randint(0, len(preds_train_t))
plt.figure(figsize=(18,6));
plt.subplot(1, 3, 1)
plt.imshow(X_train[ix])
plt.title('Input image')
plt.axis("off")

plt.subplot(1, 3, 2)
plt.imshow(np.squeeze(Y_train[ix]))
plt.title('Mask')
plt.axis("off")

plt.subplot(1, 3, 3)
plt.imshow(np.squeeze(preds_train_t[ix]))
plt.title('predicted mask')
plt.axis("off")

In [None]:
#predictions not empty right?
print(np.count_nonzero(preds_train_t[ix]))

In [None]:
from matplotlib.colors import ListedColormap
cmap = ListedColormap(['black', 'gray', 'orange', 'green'])

def plot_colored(img_Y, img_pred):
    output = np.zeros_like(img_Y)
    output = np.where((img_Y == 0) & (img_pred == 1), 1, output)
    output = np.where((img_Y == 1) & (img_pred == 0), 2, output)
    output = np.where((img_Y == 1) & (img_pred == 1), 3, output)

    plt.figure(figsize=(10,10))
    plt.imshow(output, cmap=cmap)
    plt.xticks([])
    plt.yticks([]);

In [None]:
N = 4
for i in range(N):
    plot_colored(Y_train[i], preds_train_t[i])
    plt.show()
# green: correct prediction
# gray: false positive (too much)
# orange: false negative (missed)

# prepare the submision file

In [None]:
#test_mask: after reshape before fix_overlapping
test_masks = [cv2.resize(pred,dsize=(704,520),interpolation=cv2.INTER_CUBIC).reshape(520,704,1) for pred in preds_test_t]
print(test_masks[0].shape)
print(test_masks[1].shape)
print(test_masks[2].shape)

# fix_overlap
https://www.kaggle.com/c/sartorius-cell-instance-segmentation/discussion/279995

In [None]:
def check_overlap(msk):
    msk = msk.astype(np.bool).astype(np.uint8)
    return np.any(np.sum(msk, axis=-1)>1)

def fix_overlap(msk):
    """
    Args:
        mask: multi-channel mask, each channel is an instance of cell, shape:(520,704,None)
    Returns:
        multi-channel mask with non-overlapping values, shape:(520,704,None)
    """
    msk = np.array(msk)
    msk = np.pad(msk, [[0,0],[0,0],[1,0]])
    ins_len = msk.shape[-1]
    msk = np.argmax(msk,axis=-1)
    msk = tf.keras.utils.to_categorical(msk, num_classes=ins_len)
    msk = msk[...,1:]
    msk = msk[...,np.any(msk, axis=(0,1))]
    return msk

def remove_isolated_points_from_rle(strin):
    t2 = strin.split(" ")
    a = []
    for i in range(0, len(t2), 2):
        if t2[i+1]!="1":
            a.append(t2[i])
            a.append(t2[i+1])
    return ' '.join(a)

In [None]:
for test_mask in test_masks:
    print(check_overlap(test_mask))

In [None]:
#test_mask2: after reshape after fix_overlapping. No need right now
#test_masks2=[]
#for test_mask in test_masks:
#    test_mask2 = fix_overlap(test_mask).reshape(520,704,1)
#    print(test_mask2.shape)
#    test_masks2+=[test_mask2]

#for test_mask2 in test_masks2:
 #   print(check_overlap(test_mask2))

In [None]:
# revert the output codes to the file

In [None]:
predicted2 = [rle_encode(test_mask2) for test_mask2 in test_masks]
#print(predicted2[0])

# split the mask into each cluster nucleus

In [None]:
# split the mask into each cluster nucleus for the submision
# seen on https://www.kaggle.com/c/sartorius-cell-instance-segmentation/discussion/288376
def post_process(mask, min_size=80, shape=(520, 704,)):
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = []
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            a_prediction = np.zeros(shape, np.float32)
            a_prediction[p] = 1
            predictions.append(a_prediction)
    return predictions

In [None]:
# test the nucleus thing. Take one simple mask
plt.imshow(Y_train[4], cmap="gray");

In [None]:
# connectedComponents returns the number of compenets of the image and a image with a pixel value of each of them
num_component, component = cv2.connectedComponents(Y_train[4].astype(np.uint8))
print(num_component)
plt.imshow(component, cmap="gray");

In [None]:
# extraction of the component of value 5 as example
compenent_5 = (component == 5)
plt.imshow(compenent_5, cmap="gray");

In [None]:
# all together
# notice that with the size of the training the min size also is affected. The minimun size on this case is 20
final = post_process(Y_train[4], min_size=20, shape=(IMG_HEIGHT, IMG_WIDTH,))
print(final[0].shape)
plt.imshow(final[0], cmap="gray");

# create submision file

In [None]:
preds_test_t[0].shape

In [None]:
# new version with the mask nucleus split
predicted_nucleus = []
test_nucleus_image_id = []

for index, s in enumerate(preds_test_t):
    nucleus = post_process(cv2.resize(s, (704,520,), interpolation = cv2.INTER_LINEAR))
    for nucl in nucleus:
        predicted_nucleus.append(nucl)
        test_nucleus_image_id.append(test_images_id[index])

In [None]:
plt.imshow(predicted_nucleus[0], cmap="gray");
predicted_nucleus[0].shape

In [None]:
predicted2 = [rle_encode(test_mask2) for test_mask2 in predicted_nucleus]
print(predicted2[0])
predicted_filt = [remove_isolated_points_from_rle(s) for s in predicted2]
print(predicted_filt[0])

In [None]:
submit = sample_submission.copy()
submit = pd.DataFrame({'id':test_nucleus_image_id, 'predicted':predicted_filt})
print(submit.shape)
submit.head()

In [None]:
submit.to_csv('submission.csv', index=False)