# 1. Load the libraries

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import cv2
import glob
import numpy as np
import pandas as pd  
from PIL import Image
from skimage import measure
from keras import backend as K
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.patches as patches
import tensorflow as tf
from skimage.measure import label, regionprops, regionprops_table

2024-11-22 21:20:58.506582: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# 2. MobileNetV2

## 2.1 Load the model

In [2]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Flatten, Dense, GlobalAveragePooling2D, MaxPooling2D, AveragePooling2D
from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input


#load pre-trained model
model = MobileNetV2(include_top=False, weights="imagenet", input_shape=(90, 90, 3))


  model = MobileNetV2(include_top=False, weights="imagenet", input_shape=(90, 90, 3))
2024-11-22 21:21:01.096995: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9612 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:17:00.0, compute capability: 7.5


## 2.2 Assign Feature extraction layer

In [3]:
#feature extraction from pre-trained model
layer = model.get_layer(name="block_16_project").output #512 features
output = GlobalAveragePooling2D()(layer)
# define new model
feature_extraction_model = Model(inputs=model.inputs, outputs=output)
# summarize
feature_extraction_model.summary()

# preprocessing and extracting features
def preprocess_image(image):
    image = tf.image.resize(image, (90, 90))  # Resize to match model input
    image = preprocess_input(image)  # Apply MobileNetV2-specific preprocessing
    return image

In [4]:
#define the file paths for the images and corresponding masks
image_path = '/4tbint/Corrected Merged FOVs/Set5/20200408_N7863__2020-04-08T10_57_43-Measurement 2'
mask_path = '/4tbint/Cellpose Masks/Set5/20200408_N7863__2020-04-08T10_57_43-Measurement 2'

#use the glob library to generate lists of image and mask filenames
images =  sorted([os.path.basename(x) for x in glob.glob(image_path + '/*.tiff')])
cellpose_mask = sorted([os.path.basename(x) for x in glob.glob(mask_path + '/*.tiff')])


ch1 = [] #fibroblast
ch2 = [] #cancer
ch4 = [] #dapi


for i in range(0, 1152, 3):
    
    CH1_path = os.path.join(image_path, images[i])
    CH2_path = os.path.join(image_path, images[i+2]) #for MHB ch3 is cancer 
    CH4_path = os.path.join(image_path, images[i+1]) #for MHB ch2 is DAPI

    
    #read images and masks using the Image library, and converts them to numpy arrays.
    ch1_img = Image.open(CH1_path)
    ch2_img = Image.open(CH2_path)
    ch4_img = Image.open(CH4_path)

    ch1.append(np.array(ch1_img))
    ch2.append(np.array(ch2_img))
    ch4.append(np.array(ch4_img))

ch1_max = np.max(ch1)
ch2_max = np.max(ch2)
ch4_max = np.max(ch4)
print("Maximum intensity for channel 1:", ch1_max)
print("Maximum intensity for channel 2:", ch2_max)
print("Maximum intensity for channel 4:", ch4_max)

ch1_q099 =  np.quantile(ch1, 0.99)
ch2_q099 =  np.quantile(ch2, 0.99)
ch4_q099 =  np.quantile(ch4, 0.99)
print("Quantile_099 channel 1:", ch1_q099)
print("Quantile_099 channel 2:", ch2_q099)
print("Quantile_099 channel 4:", ch4_q099)

Maximum intensity for channel 1: 65289
Maximum intensity for channel 2: 65535
Maximum intensity for channel 4: 64458
Quantile_099 channel 1: 1959.0
Quantile_099 channel 2: 2514.0
Quantile_099 channel 4: 5185.0


## 2.3 Read images as tensors and extract features

In [None]:
#define the file paths for the images and corresponding masks
image_path = '/4tbint/Corrected Merged FOVs/Set5/20200408_N7863__2020-04-08T10_57_43-Measurement 2'
mask_path = '/4tbint/Cellpose Masks/Set5/20200408_N7863__2020-04-08T10_57_43-Measurement 2'

#use the glob library to generate lists of image and mask filenames
images =  sorted([os.path.basename(x) for x in glob.glob(image_path + '/*.tiff')])
cellpose_mask = sorted([os.path.basename(x) for x in glob.glob(mask_path + '/*.tiff')])

# "mean_feature_list" and "j" variables are initialized to empty list and 0, respectively to store the features for each image and to iterate over the list of masks.
mean_fetaure_list =[]
image_list = []
j=0

#loop with step size of 3 to iterate over a range of indices, pulling the filenames for three channels of each image.
for i in range(0, 1152, 3):
    
    CH1_path = os.path.join(image_path, images[i])
    CH2_path = os.path.join(image_path, images[i+2]) #for MHB ch3 is cancer 
    CH4_path = os.path.join(image_path, images[i+1]) #for MHB ch2 is DAPI 

    #read images and masks using the Image library, and converts them to numpy arrays.
    ch1_img = Image.open(CH1_path)
    ch2_img = Image.open(CH2_path)
    ch4_img = Image.open(CH4_path)

    image_array_ch1 = np.array(ch1_img)
    image_array_ch2 = np.array(ch2_img)
    image_array_ch4 = np.array(ch4_img)


    #use cellpose mask to extract individual cell regions,to generate 50x50 pixel images.
    cellpose_path = os.path.join(mask_path, cellpose_mask[j])
    cellpose_img = Image.open(cellpose_path)
    masks_ch2 = np.array(cellpose_img)
    
    labels_ch2 = label(masks_ch2)
    props = regionprops(labels_ch2)
    
    j=j+1
    single_cell_bbox = []
    
    for cell_label, target_region in enumerate(props, start=1):
        centroid_row, centroid_col = target_region.centroid

        half_size = 45
        min_row = int(max(centroid_row - half_size, 0))
        max_row = int(min(centroid_row + half_size, image_array_ch2.shape[0]))
        min_col = int(max(centroid_col - half_size, 0))
        max_col = int(min(centroid_col + half_size, image_array_ch2.shape[1]))

        if (min_row > 0 and max_row < image_array_ch2.shape[0] and 
        min_col > 0 and max_col < image_array_ch2.shape[1]):

            target_cell_mask = labels_ch2 == cell_label

            cell_area_original = np.where(target_cell_mask[min_row:max_row, min_col:max_col], image_array_ch2[min_row:max_row, min_col:max_col], 0)

            empty_array_ch2 = np.zeros((90, 90))
            paste_row_original = (90 - cell_area_original.shape[0]) // 2
            paste_col_original = (90 - cell_area_original.shape[1]) // 2

            empty_array_ch2[paste_row_original:paste_row_original+cell_area_original.shape[0], paste_col_original:paste_col_original+cell_area_original.shape[1]] = cell_area_original

            empty_array_cellpose = np.zeros((90, 90))
            cell_area_cellpose = target_cell_mask[min_row:max_row, min_col:max_col]

            paste_row_cellpose = (90 - cell_area_cellpose.shape[0]) // 2
            paste_col_cellpose = (90 - cell_area_cellpose.shape[1]) // 2

            empty_array_cellpose[paste_row_cellpose:paste_row_cellpose+cell_area_cellpose.shape[0], paste_col_cellpose:paste_col_cellpose+cell_area_cellpose.shape[1]] = cell_area_cellpose

            cell_area_ch1 = np.where(target_cell_mask[min_row:max_row, min_col:max_col], image_array_ch1[min_row:max_row, min_col:max_col], 0)
            cell_area_ch4 = np.where(target_cell_mask[min_row:max_row, min_col:max_col], image_array_ch4[min_row:max_row, min_col:max_col], 0)

            empty_array_ch1 = np.zeros((90, 90))
            paste_row_ch1 = (90 - cell_area_ch1.shape[0]) // 2
            paste_col_ch1 = (90 - cell_area_ch1.shape[1]) // 2

            empty_array_ch4 = np.zeros((90, 90))
            paste_row_ch4 = (90 - cell_area_ch4.shape[0]) // 2
            paste_col_ch4 = (90 - cell_area_ch4.shape[1]) // 2

            empty_array_ch1[paste_row_ch1:paste_row_ch1+cell_area_ch1.shape[0], paste_col_ch1:paste_col_ch1+cell_area_ch1.shape[1]] = cell_area_ch1
            empty_array_ch4[paste_row_ch4:paste_row_ch4+cell_area_ch4.shape[0], paste_col_ch4:paste_col_ch4+cell_area_ch4.shape[1]] = cell_area_ch4
            
            '''
            fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(10, 5))
            ax1.imshow(empty_array_cellpose, cmap='gray')
            ax1.set_title(f'Cellpose Cell {cell_label}')
            ax2.imshow(empty_array_ch2, cmap='gray')
            ax2.set_title(f'Channel 2 Cell {cell_label}')
            ax3.imshow(empty_array_ch1, cmap='gray')
            ax3.set_title(f'Channel 1 Cell {cell_label}')
            ax4.imshow(empty_array_ch4, cmap='gray')
            ax4.set_title(f'Channel 4 Cell {cell_label}')
            plt.show()
            '''
            
            empty_array_ch1 = (empty_array_ch1.astype('float32')/ch1_q099)*255
            empty_array_ch2 = (empty_array_ch2.astype('float32')/ch2_q099)*255
            empty_array_ch4 = (empty_array_ch4.astype('float32')/ch4_q099)*255

            singlecell = np.stack((empty_array_ch1, empty_array_ch2, empty_array_ch4), axis=-1)
            reshaped_singlecell = singlecell.reshape((1,90,90,3))
            single_cell_bbox.append(reshaped_singlecell)
            #print(np.max(empty_array_ch1))
            
    single_cells = np.array(single_cell_bbox)

    if len(single_cells) != 0:
        
        print(images[i])
    
        #convert single cell objects into a tensor using tf.data.Dataset.from_tensor_slices.
        single_cell_tensor = tf.data.Dataset.from_tensor_slices(single_cells)

        def extract_features(image):
            preprocessed_image = preprocess_image(image)
            features = feature_extraction_model(preprocessed_image)
            return features

        #A map operation is used to apply a feature extraction function, extract_features, to each cell image 
        #using a pre-trained model. This creates a feature_dataset of extracted features.
        feature_dataset = single_cell_tensor.map(extract_features)

        #features are summed across all cells, divided by the number of cells, and added to the mean_feature_list.
        sum_feature_dataset = tf.zeros((1,320))

        for f in feature_dataset:
            sum_feature_dataset += f

        mean_feature = sum_feature_dataset/len(feature_dataset)

        mean_fetaure_list.append(mean_feature)
        image_list.append(images[i])

r01c01ch1.tiff


2024-11-22 21:24:08.125221: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


r01c02ch1.tiff


2024-11-22 21:24:11.656577: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


r01c03ch1.tiff


In [None]:
#Convert mean feature tensor array to numpy array
feature_list = [tensor.numpy() for tensor in mean_fetaure_list]

In [10]:
print(len(feature_list))
print(len(image_list))

382
382


In [None]:
# Create a 2D numpy array from the feature list
feature_array = np.vstack(feature_list)

# Create a DataFrame with column names
df = pd.DataFrame(feature_array, columns=['feature{}'.format(i+1) for i in range(320)])

#add col to the beginning of the dataframe
df.insert(0, 'Image', image_list)
df

In [None]:
#save dataframe as excel 
df.to_excel("/home/gauss/Desktop/Revised_Data_Features/Pretrained baseline MobileNetV2 Bbox Size 90/MHB/Plate_7.xlsx")
