In this notebook, we aim to create a classifier that simply outputs the cell class of each input image. The purpose of this is to split data based on its class and then train a separate model for each class. The classifier uses a resnet50 architecture as its backbone. We use the available 606 images from the train dataset and 1972 semi-supervised images in order to develop our train our model.

# Importing libraries

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
from shutil import copyfile
from glob import glob

import random
random.seed(7)
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from sklearn.model_selection import train_test_split
        
my_data_dir = './'
my_data_train = os.path.join(my_data_dir, 'train_class')
my_data_val = os.path.join(my_data_dir, 'val_class')
data_dir = '../input/sartorius-cell-instance-segmentation/'
train_data_dir = '../input/sartorius-cell-instance-segmentation/train'
classes = ['shsy5y', 'cort', 'astro']

def preprocess_input(img):
    return tf.keras.applications.resnet50.preprocess_input(img)

def get_model():
    resnet50 = tf.keras.applications.resnet50.ResNet50(
        include_top=False,
        weights="imagenet",
        input_shape=(704, 520, 3),
        pooling='avg'
    )

    fc1 = tf.keras.layers.Dense(128, activation='relu', name="Dense1")(resnet50.output)
    fc2 = tf.keras.layers.Dense(3, name="Dense2")(fc1)

    model = tf.keras.models.Model(inputs=resnet50.input, outputs=fc2)
    return model

epochs = 1
batch_size = 16

# Indexing all the training and semi supervised data

In [2]:
images = []
image_class = []

train_df = pd.read_csv('../input/sartorius-cell-instance-segmentation/train.csv')

for astro_img in glob('../input/sartorius-cell-instance-segmentation/train_semi_supervised/astro*'):
    images.append(astro_img)
    image_class.append('astro')
    
for cort_img in glob('../input/sartorius-cell-instance-segmentation/train_semi_supervised/cort*'):
    images.append(cort_img)
    image_class.append('cort')
    
for shsy5y_img in glob('../input/sartorius-cell-instance-segmentation/train_semi_supervised/shsy5y*'):
    images.append(shsy5y_img)
    image_class.append('shsy5y')

images_to_cell_type = {}    

for _, row in train_df.iterrows():
    images_to_cell_type[row['id']] = row['cell_type']

for image, cell_type in images_to_cell_type.items():
    images.append('../input/sartorius-cell-instance-segmentation/train/' + image + '.png')
    image_class.append(cell_type)

image_class = np.array(image_class)
images = np.array(images)

# Generating stratified train test split (80/20)

In [3]:
train_images, val_images, train_class, val_class = train_test_split(images, image_class, stratify=image_class, test_size=0.2, random_state=17, shuffle=True)

Distribution of train data:

In [4]:
t, train_counts = np.unique(train_class, return_counts=True)
print(t, train_counts / train_counts.sum())

['astro' 'cort' 'shsy5y'] [0.44180407 0.31668283 0.24151309]


Distribution of val data:

In [5]:
t,  val_counts = np.unique(val_class, return_counts=True)
print(t, val_counts / val_counts.sum())

['astro' 'cort' 'shsy5y'] [0.44186047 0.31589147 0.24224806]


This shows that our validation and training data have a similar distribution between classes.

# Creating directory structure for training and validation data

We define the following variables in order to make the training code cleaner:

In [6]:
TRAIN_DIR = './train'
VAL_DIR = './val'

os.makedirs(TRAIN_DIR, exist_ok=True)
os.makedirs(VAL_DIR, exist_ok=True)

os.makedirs(TRAIN_DIR + '/astro', exist_ok=True)
os.makedirs(TRAIN_DIR + '/cort', exist_ok=True)
os.makedirs(TRAIN_DIR + '/shsy5y', exist_ok=True)

os.makedirs(VAL_DIR + '/astro', exist_ok=True)
os.makedirs(VAL_DIR + '/cort', exist_ok=True)
os.makedirs(VAL_DIR + '/shsy5y', exist_ok=True)

for image, category in zip(train_images, train_class):
    copyfile(image, TRAIN_DIR + '/' +  category + '/' + image.split('/')[-1])
for image, category in zip(val_images, val_class):
    copyfile(image, VAL_DIR + '/' +  category + '/' + image.split('/')[-1])

In [7]:
model = get_model()

model.compile(optimizer='adam',loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

2021-12-15 01:11:00.184927: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-15 01:11:00.327100: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-15 01:11:00.327856: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-15 01:11:00.329033: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


Performing data augmentations as shown below:

In [8]:
train_datagen = ImageDataGenerator(vertical_flip=True, zoom_range=0.2, shear_range=0.2, preprocessing_function=preprocess_input)
train_generator = train_datagen.flow_from_directory(TRAIN_DIR, target_size=(704, 520), color_mode='rgb', batch_size=batch_size) 
val_generator = train_datagen.flow_from_directory(VAL_DIR, target_size=(704, 520), color_mode='rgb', batch_size=batch_size)

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='./model_checkpoint',
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1)

Found 2062 images belonging to 3 classes.
Found 516 images belonging to 3 classes.


In [9]:
history = model.fit(
    x=train_generator,
    steps_per_epoch=int(np.ceil(train_generator.n / float(batch_size))),
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=int(np.ceil(val_generator.n / float(batch_size))),
    callbacks=[model_checkpoint_callback],
    verbose=2
)

2021-12-15 01:11:08.044505: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2021-12-15 01:11:15.167932: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


129/129 - 243s - loss: 0.2322 - accuracy: 0.9355 - val_loss: 308.4874 - val_accuracy: 0.5349

Epoch 00001: val_accuracy improved from -inf to 0.53488, saving model to ./model_checkpoint


In [10]:
new_model= get_model()
new_model.load_weights('./model_checkpoint')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f43fda43b10>

In [11]:
new_model.compile(optimizer='adam',loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),metrics=['accuracy'])
new_model.evaluate(
    val_generator,
    verbose=1)



[291.6203918457031, 0.5290697813034058]