In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

from tensorflow.python.keras.layers import Input, Dense, Lambda, Flatten, Reshape, Concatenate
from tensorflow.python.keras.layers import Conv2D, Conv2DTranspose, BatchNormalization, Activation, LeakyReLU
from tensorflow.python.keras.models import Model
import tensorflow.keras.backend as K
from tensorflow.python.keras import metrics
from tensorflow.python.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.python.keras.datasets import mnist
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.python.keras.optimizers import Adam

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import pyarrow
from sklearn.model_selection import train_test_split

import tensorflow as tf 

In [4]:
#tensorflow.version

def lr_scheduler(initial_lr=1e-3, decay_factor=0.75, step_size=5, min_lr=1e-5):
    '''
    Wrapper function to create a LearningRateScheduler with step decay schedule.
    '''
    def schedule(epoch):
        lr = initial_lr * (decay_factor ** np.floor(epoch / step_size))
        if lr > min_lr:
            return lr
        return min_lr

    return LearningRateScheduler(schedule, verbose=1)


class Autoencoder():
    def __init__(self, shape_data = (11,1), intermediate_dim=128, latent_dim=32):
        self.data_size = shape_data
        #self.intermediate_dim = intermediate_dim
        #self.latent_dim = latent_dim
        self.input = Input(shape=self.data_size)
        #self.params = {
#             'enc_filters': [16, 32, 48, 16, 32],
#             'enc_kernels': [3, 3, 3, 4, 1],
#             'enc_strides': [2, 2, 2, 1, 1],
#             'dec_filters': [16, 16, 16, 32],
#             'dec_kernels': [4, 3, 3, 3],
#             'dec_strides': [1, 2, 2, 2],
#         }
        self.reconstruction_shape = []
        self.params = {'enc_dim': [8, 4] , 'dec_dim': [4, 8, 11] }

    def build_encoder(self, dims=[8, 4]): #, kernels=[3, 3, 3, 4, 1], strides=[2, 2, 2, 1, 1]):
        def f(x):
            for intermediate_dim in dims: #num_filter, kernel, stride in zip(filters, kernels, strides):
                x = Dense( units=intermediate_dim,
                           activation=tf.nn.sigmoid)(x)
                x = BatchNormalization()(x)
                self.reconstruction_shape += [x.get_shape().as_list()]
            return x
        return f

    def build_decoder(self, dims=[4, 8, 11]): #filters=[64, 64, 64, 32], kernels=[4, 3, 3, 3], strides=[1, 2, 2, 2]):
        def f(x):
            for intermediate_dim in dims: #for i, (num_filter, kernel, stride) in enumerate(zip(filters, kernels, strides)):
                x = Dense(units=intermediate_dim,
                           activation=tf.nn.sigmoid)(x)
                x = BatchNormalization()(x)
                decoder = x
            return decoder
        return f

    def build_model(self):
        hidden = self.build_encoder(
            dims=self.params['enc_dim'])(self.input)
        dec = self.build_decoder(
            dims=self.params['dec_dim'])(hidden)
        # instantiate VAE model
        vae = Model(self.input, dec)
        return vae


class ADAE(object):
    def __init__(self, data_size = (11, 1)):#image_size=(28, 28, 1), latent_dim=100):
        self.data_size = data_size
        #self.latent_dim = latent_dim

        self.input1 = Input(shape=data_size)
        self.input2 = Input(shape=data_size)
        
        # Build the generator
        self.generator = Autoencoder(shape_data = data_size).build_model()
        # Build and compile the discriminator
        self.discriminator = Autoencoder(shape_data = data_size).build_model()

        self.gx1 = self.generator(self.input1)      # g(x1)
        self.dx2 = self.discriminator(self.input2)  # d(x2)
        self.dgx1 = self.discriminator(self.gx)    # d(g(x1))
        
        #print(self.gx.shape, self.dx.shape, self.dgx.shape, self.input.shape )
        self.d_loss = Lambda(lambda x: K.mean(K.abs(x[0] - x[1]), axis=1) -
                             K.mean(K.abs(x[2] - x[3]), axis=1), name='d_loss')([self.input2, self.dx2, self.gx1, self.dgx1])
        
        self.g_loss = Lambda(lambda x: K.mean(K.abs(x[0] - x[1]), axis=1) +
                            K.mean(K.abs(x[1] - x[2]), axis=1), name='g_loss')([self.input1, self.gx1, self.dgx1])

        self.model = Model(inputs=[self.input1, self.input2], outputs=[self.g_loss, self.d_loss])
        self.model.summary()
        # self.generator.summary()
        # self.discriminator.summary()

    def get_anomaly_score(self):
#         """ Compute the anomaly score. Call it after training. """
#         score_out = Lambda(lambda x:
#                            K.mean(K.mean(K.mean((x[0] - x[1]) ** 2, axis=1), axis=1), axis=1)
#                            )([self.model.inputs[0], self.model.layers[2](self.model.layers[1](self.model.inputs[0]))])
#         return Model(self.model.inputs[0], score_out)

    def get_generator_trained_model(self):
        """ Get the generator to reconstruct the input. Call it after training. """
        return Model(self.model.inputs[0], self.model.layers[1](self.model.inputs[0]))

    def get_discrinminator_trained_model(self):
        """ Get the discrinminator to reconstruct the input. Call it after training. """
        return Model(self.model.inputs[0], self.model.layers[2](self.model.layers[1](self.model.inputs[0])))

    def train(self, x1_train, x2_train, x1_test, x2_test, epochs=1):
        self.model.add_loss(K.mean(self.g_loss))
        self.model.add_metric(self.g_loss, aggregation='mean', name="g_loss")
        self.model.add_loss(K.mean(self.d_loss))
        self.model.add_metric(self.d_loss, aggregation='mean', name="d_loss")

        for epoch in range(epochs):
            print('Epoch %d/%d' % (epoch + 1, epochs))
            # Train generator only
            self.model.layers[1].trainable = True
            self.model.layers[2].trainable = False
            self.model.compile('adam', loss_weights={'g_loss': 1, 'd_loss': 0})
            print('Training on Generator')
            self.model.fit(
                [x1_train, x2_train],
                batch_size=10, #64,
                steps_per_epoch=10, #200,
                epochs=epoch,
                callbacks=[
                        lr_scheduler(initial_lr=1e-3, decay_factor=0.75, step_size=10, min_lr=1e-5)
                ],
                initial_epoch=epoch - 1
            )
            # Train discriminator only
            self.model.layers[1].trainable = False
            self.model.layers[2].trainable = True
            self.model.compile('adam', loss_weights={'g_loss': 0, 'd_loss': 1})
            print('Training on Discriminator')
            self.model.fit(
                [x1_train, x2_train],
                batch_size=10, #64,
                steps_per_epoch=10, #200,
                epochs=epoch,
                callbacks=[
                    ModelCheckpoint(
                        'model_checkpoint/model_%d_gloss_{g_loss:.4f}_dloss_{d_loss:.4f}.h5' % epoch, 
                        verbose=1),
                    lr_scheduler(initial_lr=1e-3, decay_factor=0.75, step_size=10, min_lr=1e-5)
                ],
                initial_epoch=epoch - 1
            )

IndentationError: expected an indented block (<ipython-input-4-b4f065f01db4>, line 99)

In [None]:
# Load 
path = '/Users/laurieprelot/Documents/Projects/2019_Deep_learning/data/Chevrier-et-al/chevrier_data_pooled_nona.parquet'
#path = 'chevrier_data_pooled_nona.parquet'
chve = pd.read_parquet(path, engine='pyarrow')
chve.shape
#np.random.seed(123456)
#idx = np.random.choice(a = np.arange(chve.shape[0]), size = 2000, replace = False)
#chve_s = #chve.iloc[idx, ]
ID = 'rcc7'
select_cols = [col for col in chve.columns if not "metadata" in col]
chve = chve.loc[:,select_cols]
chve_s_patient = chve.reset_index()
chve_s_patient = chve_s_patient.rename({'level_0':'batch', 'level_1':'patient', 'level_2':'cell'} , axis = 1)
chve_s_patient = chve_s_patient.loc[chve_s_patient['patient'] == ID, :]

chve_s_patient_batch1 = chve_s_patient.loc[ chve_s_patient['batch'] == "experiment_101725_files"]
chve_s_patient_batch2 = chve_s_patient.loc[ chve_s_patient['batch'] == "experiment_102007_files"]

# target, split
chve_s_patient_batch1 = chve_s_patient_batch1.iloc[1:1000,:]
chve_s_patient_batch2 = chve_s_patient_batch2.iloc[1:1000,:]
y = chve_s_patient_batch1["batch"]
x1 = chve_s_patient_batch1.drop(["batch", 'cell', 'patient'], axis = 1) 
x2 = chve_s_patient_batch2.drop(["batch", 'cell', 'patient'], axis = 1) 
x1_train, x1_test, x2_train, x2_test = train_test_split(x1, x2, test_size=0.33, random_state=42)

In [None]:
adae = ADAE(data_size=(11,))
adae.train(x1_train.values, x2_train.values, x1_test.values, x2_test.values, epochs=15)

In [None]:
print(np.shape(x_train.values))
print(np.shape(y_train.values))
print(np.shape(x_test.values))
print(np.shape(y_test.values))

312    experiment_101725_files
606    experiment_101725_files
440    experiment_101725_files
1      experiment_101725_files
317    experiment_101725_files
                ...           
107    experiment_101725_files
271    experiment_101725_files
861    experiment_101725_files
436    experiment_101725_files
103    experiment_101725_files
Name: batch, Length: 669, dtype: object