In [1]:
import cv2 as cv
import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy.cluster.vq import kmeans, whiten
import PIL.ImageColor as ImageColor

2023-03-08 21:08:43.948933: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-08 21:08:44.244850: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-03-08 21:08:45.312015: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/leonkl/anaconda3/envs/LAB/lib/python3.9/site-packages/cv2/../../lib64:
2023-03-08

In [2]:
train_index = ["A-7", "A-15", "B-11", "H-15", "F-9", "G-3"]
path_cores = "TMA_cores_M06_M07_panels/M06/Cores/"
path_mxIF = "Texts_small_coregistered/"

In [3]:
train_cores = [cv.imread(path_cores + index + ".png") for index in train_index]
train_mxIF = [pd.read_csv(path_mxIF + index + ".csv") for index in train_index]

In [4]:
BUFFER = 50
BATCH = 32
VAL_SPLIT = 0.04
CELL_SIZE = (32, 32)
MXIF_FEATURES = ["Nucleus PD1 (PPD520) Mean (Normalized Counts, Total Weighting)",
                 "Nucleus PD1 (PPD520) Max (Normalized Counts, Total Weighting)",
                 "Nucleus PD1 (PPD520) Std Dev (Normalized Counts, Total Weighting)",
                 "Nucleus FOXP3 (PPD540) Mean (Normalized Counts, Total Weighting)",
                 "Nucleus FOXP3 (PPD540) Max (Normalized Counts, Total Weighting)",
                 "Nucleus FOXP3 (PPD540) Std Dev (Normalized Counts, Total Weighting)",
                 "Nucleus CD20 (PPD620) Mean (Normalized Counts, Total Weighting)",
                 "Nucleus CD20 (PPD620) Max (Normalized Counts, Total Weighting)",
                 "Nucleus CD20 (PPD620) Std Dev (Normalized Counts, Total Weighting)",
                 "Nucleus CD3 (PPD650) Mean (Normalized Counts, Total Weighting)",
                 "Nucleus CD3 (PPD650) Max (Normalized Counts, Total Weighting)",
                 "Nucleus CD3 (PPD650) Std Dev (Normalized Counts, Total Weighting)",
                 "Nucleus PANCK (PPD690) Mean (Normalized Counts, Total Weighting)",
                 "Nucleus PANCK (PPD690) Max (Normalized Counts, Total Weighting)",
                 "Nucleus PANCK (PPD690) Std Dev (Normalized Counts, Total Weighting)",
                 "Cytoplasm PD1 (PPD520) Mean (Normalized Counts, Total Weighting)",
                 "Cytoplasm PD1 (PPD520) Max (Normalized Counts, Total Weighting)",
                 "Cytoplasm PD1 (PPD520) Std Dev (Normalized Counts, Total Weighting)",
                 "Cytoplasm FOXP3 (PPD540) Mean (Normalized Counts, Total Weighting)",
                 "Cytoplasm FOXP3 (PPD540) Max (Normalized Counts, Total Weighting)",
                 "Cytoplasm FOXP3 (PPD540) Std Dev (Normalized Counts, Total Weighting)",
                 "Cytoplasm CD20 (PPD620) Mean (Normalized Counts, Total Weighting)",
                 "Cytoplasm CD20 (PPD620) Max (Normalized Counts, Total Weighting)",
                 "Cytoplasm CD20 (PPD620) Std Dev (Normalized Counts, Total Weighting)",
                 "Cytoplasm CD3 (PPD650) Mean (Normalized Counts, Total Weighting)",
                 "Cytoplasm CD3 (PPD650) Max (Normalized Counts, Total Weighting)",
                 "Cytoplasm CD3 (PPD650) Std Dev (Normalized Counts, Total Weighting)",
                 "Cytoplasm PANCK (PPD690) Mean (Normalized Counts, Total Weighting)",
                 "Cytoplasm PANCK (PPD690) Max (Normalized Counts, Total Weighting)",
                 "Cytoplasm PANCK (PPD690) Std Dev (Normalized Counts, Total Weighting)"]

In [5]:
TOTAL_MAX = np.zeros(len(MXIF_FEATURES))
TOTAL_MIN = np.zeros(len(MXIF_FEATURES))

for i, feature in enumerate(MXIF_FEATURES):
    for core in train_mxIF:
        current_max = core.loc[:,feature].max()
        current_min = core.loc[:,feature].min()
        if current_max > TOTAL_MAX[i]:
            TOTAL_MAX[i] = current_max
        if current_min < TOTAL_MIN[i]:
            TOTAL_MIN[i] = current_min

In [6]:
def data_generator():
    for i in range(len(train_index)):
        X = train_mxIF[i].loc[:,'Cell X Position']
        Y = train_mxIF[i].loc[:,'Cell Y Position']

        for j,(x,y) in enumerate(zip(X, Y)):
            x = float(x)
            y = float(y)
            if np.isnan(x) or np.isnan(y):
                continue
            if round(x - CELL_SIZE[0]) < 0 or round(x + CELL_SIZE[0]) >= train_cores[i].shape[1]:
                continue
            if round(y - CELL_SIZE[1]) < 0 or round(y + CELL_SIZE[1]) >= train_cores[i].shape[0]:
                continue

            cell_image = train_cores[i][round(y-CELL_SIZE[1]):round(y+CELL_SIZE[1]),
                                        round(x-CELL_SIZE[0]):round(x+CELL_SIZE[0])] / 255
                
            cell_features = np.array(train_mxIF[i].loc[j, MXIF_FEATURES], dtype=np.float32)
            cell_features = (cell_features - TOTAL_MIN) / TOTAL_MAX
                
            if np.sum(np.isnan(cell_features)) != 0:
                continue

            yield (cell_image, cell_features)

In [7]:
def cell_coordinate_generator():
    for i in range(len(train_index)):
        X = train_mxIF[i].loc[:,'Cell X Position']
        Y = train_mxIF[i].loc[:,'Cell Y Position']

        for j,(x,y) in enumerate(zip(X, Y)):
            x = float(x)
            y = float(y)
            if np.isnan(x) or np.isnan(y):
                continue
            if round(x - CELL_SIZE[0]) < 0 or round(x + CELL_SIZE[0]) >= train_cores[i].shape[1]:
                continue
            if round(y - CELL_SIZE[1]) < 0 or round(y + CELL_SIZE[1]) >= train_cores[i].shape[0]:
                continue

            yield (x, y)

In [8]:
data = tf.data.Dataset.from_generator(data_generator,
                                       output_signature=(tf.TensorSpec(shape=(2*CELL_SIZE[1],2*CELL_SIZE[0],3), dtype=tf.float32),
                                                          tf.TensorSpec(shape=(len(MXIF_FEATURES)), dtype=tf.float32)))
data = data.batch(BATCH)

2023-03-08 21:08:55.146595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/leonkl/anaconda3/envs/LAB/lib/python3.9/site-packages/cv2/../../lib64:
2023-03-08 21:08:55.146642: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-03-08 21:08:55.146682: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (icme-gpu1): /proc/driver/nvidia/version does not exist
2023-03-08 21:08:55.147389: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorF

In [9]:
load_dir = "logs/autoencoder/baseline/20230307-012705"
loaded_model = tf.saved_model.load(load_dir)

In [10]:
data_latent = []
for i, elem in enumerate(data):
    latent_he = loaded_model.encoder_conv(elem[0])
    latent_mxIF = loaded_model.encoder_fnn(elem[1])
    latent_he = latent_he.numpy()
    latent_mxIF = latent_mxIF.numpy()
    data_latent.append(np.concatenate([latent_he, latent_mxIF], axis=1))
    if i % 500 == 0:
        print(i)

0
500
1000
1500
2000
2500


KeyboardInterrupt: 

In [None]:
K = 10
latent_vectors = np.concatenate(data_latent, axis=0)
latent_vectors = whiten(latent_vectors)
centroids, distortion = kmeans(latent_vectors, K)

In [None]:
clusters = np.zeros(latent_vectors.shape[0])
for i in range(latent_vectors.shape[0]):
    vector = latent_vectors[i,:]
    vector = vector[:,np.newaxis]
    dist = np.linalg.norm(np.repeat(vector, K, axis=1) - centroids.T, axis=0)
    clusters[i] = np.argmin(dist)

In [None]:
colormap = ['#0000FF', '#8A2BE2', '#FF4040', '#8A360F', '#98F5FF', '#FF6103', '#7FFF00', '#EEE8CD', '#FFB90F', '#556B2F', '#EE1289']
colormap = [ImageColor.getcolor(color, "RGB") for color in colormap]

In [None]:
X = train_mxIF[0].loc[:,'Cell X Position']
Y = train_mxIF[0].loc[:,'Cell Y Position']
src = train_cores[0]
cells = cell_coordinate_generator()
for i, (x, y) in enumerate(cells):
    cv.circle(src, (int(x),int(y)), radius=5, color=colormap[int(clusters[i])], thickness=-1)
    if i > 15000:
        break
    
cv.imwrite("test.png", src)

(16551, 144)