In [1]:
import os
import numpy as np
import pandas as pd

import tensorflow as tf
from micron2.codexutils import stream_dataset

In [2]:
from tensorflow.keras.layers import (Dense, Conv2D, Dropout, BatchNormalization, Conv2DTranspose)

In [3]:
!ls tests

dataset.hdf5  test_codexutils__pull_nuclei.ipynb  test_imports.ipynb


In [4]:
import h5py
f = h5py.File('tests/dataset.hdf5', 'r')
print(f['meta/channel_names'][:])
# fn = lambda x,y: print(x)
print(f.keys())
print(f['cells'].keys())
f.close()

[b'DAPI' b'OX40L' b'CD45' b'CD20' b'CD134' b'CD68' b'CD31' b'CD103'
 b'HLA-DR' b'CXCR5' b'IgG' b'CD3e' b'Ki-67' b'LAG3' b'CXCL13' b'IgA'
 b'CD89' b'PNaD' b'PD-L1' b'PD-1' b'CD11c' b'CD80' b'CD69' b'TIM3'
 b'CD45RO' b'CD40LG' b'FOXP3' b'CD64' b'GZMB' b'C1q' b'CD40' b'CD45RA'
 b'CD138' b'IL7R' b'IgM' b'PDGFRb' b'aSMA' b'CD8' b'CD4' b'PanCytoK']
<KeysViewHDF5 ['cells', 'meta']>
<KeysViewHDF5 ['C1q', 'CD103', 'CD11c', 'CD134', 'CD138', 'CD20', 'CD31', 'CD3e', 'CD4', 'CD40', 'CD40LG', 'CD45', 'CD45RA', 'CD45RO', 'CD64', 'CD68', 'CD69', 'CD8', 'CD80', 'CD89', 'CXCL13', 'CXCR5', 'DAPI', 'FOXP3', 'GZMB', 'HLA-DR', 'IL7R', 'IgA', 'IgG', 'IgM', 'Ki-67', 'LAG3', 'OX40L', 'PD-1', 'PD-L1', 'PDGFRb', 'PNaD', 'PanCytoK', 'TIM3', 'aSMA']>


In [5]:
# x = load_dataset('tests/dataset.hdf5', use_channels=['DAPI', 'CD45', 'PanCytoK', 'CD31', 'PDGFRb', 'aSMA', 'Ki-67'],
#                  verbose=True)
# print(x.shape)

def process(x):
    x = tf.cast(x, tf.float32)/255.
    return x
    
dataset = stream_dataset('tests/dataset.hdf5', use_channels=['DAPI', 'CD45', 'PanCytoK', 'CD31', 'PDGFRb', 'aSMA', 'Ki-67'],)
dataset = (dataset.repeat(10)
           .shuffle(4096)
           .map(process)
           .batch(32)
          )

for x in dataset:
    break

In [6]:
class Autoencoder(tf.keras.Model):
    def __init__(self, input_shape=[64, 64, 3]):
        super(Autoencoder, self).__init__()
        self.n_channels = input_shape[-1]
        self.conv_1 = tf.keras.applications.ResNet50V2(include_top=False, weights=None,
                                                       input_shape=input_shape,
                                                       pooling='average')
        self.deconv_1 = Conv2DTranspose(filters=256,  kernel_size=(2,2),
                                        strides=(2,2), padding='same')
        self.deconv_1_1 = Conv2DTranspose(filters=256,  kernel_size=(2,2),
                                        strides=(1,1), padding='same')
        self.deconv_2 = Conv2DTranspose(filters=128,  kernel_size=(4,4),
                                        strides=(2,2), padding='same')
        self.deconv_2_1 = Conv2DTranspose(filters=128,  kernel_size=(4,4),
                                        strides=(1,1), padding='same')
        self.deconv_3 = Conv2DTranspose(filters=64,  kernel_size=(5,5),
                                        strides=(2,2), padding='same')
        self.deconv_3_1 = Conv2DTranspose(filters=64,  kernel_size=(5,5),
                                        strides=(1,1), padding='same')
        self.deconv_4 = Conv2DTranspose(filters=64,  kernel_size=(5,5),
                                        strides=(2,2), padding='same')
        self.deconv_4_1 = Conv2DTranspose(filters=64,  kernel_size=(5,5),
                                        strides=(1,1), padding='same')
        self.deconv_5 = Conv2DTranspose(filters=self.n_channels,  kernel_size=(5,5),
                                        strides=(2,2), padding='same')
        
    def call(self, x):
        x = self.conv_1(x)
        x = self.deconv_1(x)
        x = self.deconv_1_1(x)
        x = self.deconv_2(x)
        x = self.deconv_2_1(x)
        x = self.deconv_3(x)
        x = self.deconv_3_1(x)
        x = self.deconv_4(x)
        x = self.deconv_4_1(x)
        x = self.deconv_5(x)
        return x
    
    def encode(self, x):
        x = self.conv_1(x)
        return tf.reduce_mean(x, axis=[1,2])
        
ae_model = Autoencoder(input_shape=x.shape[1:])
y = ae_model(x)
print(y.shape)
z = ae_model.encode(x)
print(z.shape)



(32, 64, 64, 7)
(32, 2048)


In [7]:
ae_model.summary()

Model: "autoencoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50v2 (Functional)      (None, 2, 2, 2048)        23577344  
_________________________________________________________________
conv2d_transpose (Conv2DTran multiple                  2097408   
_________________________________________________________________
conv2d_transpose_1 (Conv2DTr multiple                  262400    
_________________________________________________________________
conv2d_transpose_2 (Conv2DTr multiple                  524416    
_________________________________________________________________
conv2d_transpose_3 (Conv2DTr multiple                  262272    
_________________________________________________________________
conv2d_transpose_4 (Conv2DTr multiple                  204864    
_________________________________________________________________
conv2d_transpose_5 (Conv2DTr multiple                  

In [None]:
import tqdm.auto as tqdm
mse_fn = tf.keras.losses.MeanSquaredError()
optim = tf.keras.optimizers.Adam(learning_rate = 1e-4)

n_batches = x.shape[0] / 16

pbar = tqdm.tqdm(enumerate(dataset))

losses = []
for i, batch in pbar:
    with tf.GradientTape() as tape:
        xout = ae_model(batch)
        loss = mse_fn(batch, xout)
    losses.append(loss.numpy())
    grads = tape.gradient(loss, ae_model.trainable_variables)
    optim.apply_gradients(zip(grads, ae_model.trainable_variables))

    if i % 100 == 0:
        pbar.set_description(f'mean loss = {np.mean(losses):3.3e}')
        losses = []

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

In [None]:
def process(x):
    x = tf.cast(x, tf.float32)/255.
    return x
    
dataset = stream_dataset('tests/dataset.hdf5', use_channels=['DAPI', 'CD45', 'PanCytoK', 'CD31', 'PDGFRb', 'aSMA', 'Ki-67'],)
dataset = dataset.map(process)

z = []
for batch in dataset:
    z.append(ae_model.encode(batch).numpy().copy())
    
z = np.concatenate(z, axis=0)
print(z.shape)

In [None]:
np.save('z.npy', z)

In [None]:
tf.saved_model.save(ae_model, 'model.h5')