# Intro
Hello! This rather quick and dirty kernel shows how to get started on segmenting nuclei using a neural network in Keras.

The architecture used is the so-called U-Net, which is very common for image segmentation problems such as this. I believe they also have a tendency to work quite well even on small datasets.

> Let's get started importing everything we need!

The nucleus is an organelle present within all eukaryotic cells, including human cells. Abberant nuclear shape can be used to identify cancer cells (e.g. pap smear tests and the diagnosis of cervical cancer). Likewise, a growing body of literature suggests that there is some connection between the shape of the nucleus and human disease states such as cancer and aging. As such, the quantitative assessment of nuclear size and shape has important biomedical applications.  Methods for assessing nuclear size and shape typically involve identifying the nucleus via traditional image segmentation approaches.  Here we demonstrate a deep learning approach for the identification and segmentation of nuclei from images of cells.  

For more information about the relationship between nuclear shape and human disease, please refer to the following resources: [https://www.ncbi.nlm.nih.gov/pubmed/15343274](https://www.ncbi.nlm.nih.gov/pubmed/15343274), , [https://www.ncbi.nlm.nih.gov/pubmed/26940517](https://www.ncbi.nlm.nih.gov/pubmed/26940517)


# Step 1: Import libraries

In [None]:
from zipfile import ZipFile 
import os
import cv2 
import numpy as np
import matplotlib.pyplot as plt

# Step 2: Explore and Read Data

Image segmentation can easily to explore and read by Python libraries ZipFile,  OpenCV and Matplotlib,  but we want to use deep learning to develop an even more accurate result. 

In [None]:
#unzip train data
file_name = "../input/data-science-bowl-2018/stage1_train.zip" 
with ZipFile(file_name, 'r') as zip: 
    print('Extracting the files') 
    zip.extractall("traindata") 
    print('Done!')

#unzip test data
file_name = "../input/data-science-bowl-2018/stage1_test.zip" 
with ZipFile(file_name, 'r') as zip: 
    print('Extracting the files') 
    zip.extractall("testdata") 
    print('Done!')

In [None]:
#trian path
trainPath="./traindata"
folder=os.listdir(trainPath)
print(folder[:5])
print()
#fil for patient id file
pathfile1=os.path.join(trainPath,folder[5])
file1=os.listdir(pathfile1)
print(file1)

#image for patient 
imagePath=os.path.join(pathfile1,file1[1])
images=os.listdir(imagePath)
print(images)





In [None]:
#read and show image
readimage=plt.imread(os.path.join(imagePath,images[0]))

from skimage.color import rgb2gray

plt.imshow(rgb2gray(readimage))

In [None]:
plt.imshow((readimage))

In [None]:
#show mask

#mask for patient 
maskPath=os.path.join(pathfile1,file1[0])
masks=os.listdir(maskPath)
print("the number of masks",len(masks))


plt.figure(figsize=(50,50))
for i in range(len(masks)):
    plt.subplot(15,10,i+1)
    plt.imshow(plt.imread(os.path.join(maskPath,masks[i])),cmap="gray")
plt.show()

In [None]:

#to Comline the masks in individual image
mask = np.zeros((128, 128, 1), dtype=np.bool)
for i in range(len(masks)):

            mask_ = plt.imread(os.path.join(maskPath,masks[i]))
            mask_ = np.expand_dims(cv2.resize(mask_, (128, 128)), axis=-1)
    #         print(mask_)
            mask = np.maximum(mask, mask_)
plt.imshow(np.squeeze(mask))
plt.title(np.squeeze(mask).shape)

In [None]:
import pathlib
import imageio
import numpy as np

training_images = pathlib.Path("./traindata").glob('*/images/*.png')
training_images = [x for x in training_images]
print(training_images)

imagePath = training_images[10]
image = plt.imread(imagePath)
print("image shape ",image.shape )

In [None]:
#Read train data
X_Train=[]
Y_Train=[]



for patient in range(len(folder)):
    #fil for patient id file
    pathfile=os.path.join(trainPath,folder[patient])
    file=os.listdir(pathfile)

    #image for patient 
    imagePath=os.path.join(pathfile,file[1])
    images=os.listdir(imagePath)
    #print(images)

    #read and show image
    image=plt.imread(os.path.join(imagePath,images[0]))[:,:,:3]
    image=cv2.resize(image,(128,128))
    print(image.shape)

    X_Train.append(np.array(image))
    
    #mask for patient 
    maskPath=os.path.join(pathfile,file[0])
    masks=os.listdir(maskPath)
    print("the number of masks",len(masks))
    

    #to Comline the masks in individual image
    mask = np.zeros((128, 128, 1), dtype=np.bool)
    for i in range(len(masks)):

            mask_ =plt.imread(os.path.join(maskPath,masks[i]))
            mask_ = np.expand_dims(cv2.resize(mask_, (128, 128)), axis=-1)
    #         print(mask_)
            mask = np.maximum(mask, mask_)
    Y_Train.append(np.array(mask))

        
        
        


In [None]:
X_Train=np.array(X_Train)
Y_Train=np.array(Y_Train)
print(X_Train.shape)
print(Y_Train.shape)

In [None]:
testPath="./testdata"
testfolder=os.listdir(testPath)
print(testfolder)

pathfile=os.path.join(testPath,testfolder[0])
file=os.listdir(pathfile)
print(file)

# #image for patient 
# imagePath=os.path.join(pathfile1,file1[1])
# images=os.listdir(imagePath)
# print(images)



In [None]:

X_Test=[]

testPath="./testdata"
testfolder=os.listdir(testPath)
for patient in range(len(testfolder)):
    #fil for patient id file
    pathfile=os.path.join(testPath,testfolder[patient])
    file=os.listdir(pathfile)

    #image for patient 
    imagePath=os.path.join(pathfile,file[0])
    images=os.listdir(imagePath)
#     print(images)

    #read and show image
    image1=plt.imread(os.path.join(imagePath,images[0]))[:,:,:3]
    image1=cv2.resize(image1,(128,128))
    print(image1.shape)
    X_Test.append(np.array(image1))
    
  
        
        
        


In [None]:
X_Test=np.array(X_Test)
for i in range(len(X_Test)):
    X_Test[i]=cv2.resize(X_Test[i],(128,128))


In [None]:
for i in range(5):
        plt.figure(figsize=(10,10))
        plt.subplot(1,2,1)
        plt.title("Real")
        plt.imshow(X_Train[i])
        plt.subplot(1,2,2)
        plt.title("Mask")
        plt.imshow(np.squeeze(Y_Train[i]))
        plt.show()

# Build and train our neural network
Next we build our U-Net model, loosely based on [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/pdf/1505.04597.pdf) and very similar to [this repo](https://github.com/jocicmarko/ultrasound-nerve-segmentation) from the Kaggle Ultrasound Nerve Segmentation competition.

![](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/u-net-architecture.png)

In [None]:
#donot forget to add DropOut
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D ,Flatten ,Dense ,Concatenate ,UpSampling2D,Conv2DTranspose
from tensorflow.keras import Sequential
#Build model
inputs=tf.keras.layers.Input(shape=(128,128,3))
s=tf.keras.layers.Lambda(lambda x:x/255)(inputs)

#Contraction
c1=Conv2D(16, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(s) # 16*(3*3)*3(channel)+16
c1=Conv2D(16, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(c1) #16 *(3*3)*16 +16
print("first convolutional layer",c1.shape)
p1=MaxPooling2D((2,2),strides=2)(c1)
print("first maxPool layer",p1.shape,"\n")

c2=Conv2D(32, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(p1)#16*(3*3)*32+32
c2=Conv2D(32, kernel_size=(3,3),activation="relu", kernel_initializer='he_normal',padding="same")(c2)
print("second convolutional layer",c2.shape)
p2=MaxPooling2D((2,2),strides=2)(c2)
print("second maxPool layer",p2.shape,"\n")

c3=Conv2D(64, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(p2)
c3=Conv2D(64, kernel_size=(3,3),activation="relu", kernel_initializer='he_normal',padding="same")(c3)
print("third convolutional layer",c3.shape)
p3=MaxPooling2D((2,2),strides=2)(c3)
print("third maxPool layer",p3.shape,"\n")

c4=Conv2D(128, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(p3)
c4=Conv2D(128, kernel_size=(3,3),activation="relu", kernel_initializer='he_normal',padding="same")(c4)
print("fourth convolutional layer",c4.shape)
p4=MaxPooling2D((2,2),strides=2)(c4)
print("fourth  maxPool layer",p4.shape,"\n")

c5=Conv2D(256, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(p4)
c5=Conv2D(256, kernel_size=(3,3),activation="relu", kernel_initializer='he_normal',padding="same")(c5)
print("fifth convolutional layer",c5.shape)

#Expansive path

U6=Conv2DTranspose(128, kernel_size=(2,2),activation="relu", kernel_initializer='he_normal',padding="same",strides=(2,2))(c5)
print("\nfirst upSampling",U6.shape)
U6=Concatenate()([U6, c4])
print("first Concatenate",U6.shape,"\n")
c6=Conv2D(128, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(U6)
c6=Conv2D(128, kernel_size=(3,3),activation="relu", kernel_initializer='he_normal',padding="same")(c6)
print("sixth convolutional layer",c6.shape)


U7=Conv2DTranspose(64, kernel_size=(2,2),activation="relu", kernel_initializer='he_normal',padding="same",strides=(2,2))(c6)
print("\nsecond upSampling",U7.shape)
U7=Concatenate()([U7, c3])
print("second Concatenate",U6.shape)
c7=Conv2D(64, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(U7)
c7=Conv2D(64, kernel_size=(3,3),activation="relu", kernel_initializer='he_normal',padding="same")(c7)
print("\nseventh  convolutional layer",c7.shape)


U8=Conv2DTranspose(32, kernel_size=(2,2),activation="relu", kernel_initializer='he_normal',padding="same",strides=(2,2))(c7)
print("\nthird upSampling",U8.shape)
U8=Concatenate()([U8, c2])
print("third Concatenate",U8.shape)
c8=Conv2D(32, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(U8)
c8=Conv2D(32, kernel_size=(3,3),activation="relu", kernel_initializer='he_normal',padding="same")(c8)
print("\nEIGTH  convolutional layer",c8.shape)



U9=Conv2DTranspose(16, kernel_size=(2,2),activation="relu", kernel_initializer='he_normal',padding="same",strides=(2,2))(c8)
print("\nfourth upSampling",U9.shape)
U9=Concatenate()([U9, c1])
print("fourth Concatenate",U9.shape)
c9=Conv2D(16, kernel_size=(3,3),padding="same", kernel_initializer='he_normal',activation="relu")(U9)
c9=Conv2D(16, kernel_size=(3,3),activation="relu", kernel_initializer='he_normal',padding="same")(c9)
print("\nninth  convolutional layer",c9.shape)

output=Conv2D(1, kernel_size=(1,1),activation="sigmoid")(c9)
print("\nOutput layer",output.shape)

model=tf.keras.Model(inputs=[inputs],outputs=[output])



In [None]:

model.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

model.summary()

In [None]:
#checkpoint
checkpoint=tf.keras.callbacks.ModelCheckpoint("modelForNuclei.h5", verbose=1, save_best_only=True)
#Callbacks

Callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=5, monitor="val_loss"),
    tf.keras.callbacks.TensorBoard(log_dir='log'),
    checkpoint
    
]


#Fit model 
history=model.fit(X_Train,Y_Train,validation_split=0.01, batch_size=4 , epochs=25 , callbacks=Callbacks)

In [None]:
#Vgg19 Model

print("- the Accuracy and Loss  With 25 Epochs")

plt.figure(figsize=(40,20))
# summarize history for accuracy
plt.subplot(5,5,1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','validation'], loc='upper left')


# summarize history for loss
plt.subplot(5,5,2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','validation'], loc='upper left')
plt.show()

In [None]:
from tensorflow.keras.models import load_model
load_model("./modelForNuclei.h5").evaluate(X_Train,Y_Train)

In [None]:
preds_train = model.predict(X_Train[9:10], verbose=1)
predict=cv2.resize(np.squeeze(preds_train), (128,128))

In [None]:
plt.figure(figsize=(10,10))

plt.subplot(1,3,1)
plt.title("Predict")
plt.imshow(predict)


plt.subplot(1,3,2)
plt.title("ground truth")
plt.imshow(Y_Train[9].reshape(128,128))


plt.subplot(1,3,3)
plt.title("real")
plt.imshow(X_Train[9])

In [None]:



for i in range(1,10):
    preds_test = model.predict(X_Test[i-1:i], verbose=1)
    predict=cv2.resize(np.squeeze(preds_test), (128,128))
    plt.figure(figsize=(10,10))
    plt.subplot(1,2,1)
    plt.title("Predict")
    plt.imshow(predict)


    plt.subplot(1,2,2)
    plt.title("real")
    plt.imshow(X_Test[i-1])
