In [None]:
%load_ext autoreload
%autoreload 2

import tensorflow as tf
from utils import load_data, split_data
import os

print(f"tensorflow: {tf.__version__}")
print(f"keras: {tf.keras.__version__}")

In [None]:
%env DATA_DIR ../data/GSE92742_Broad_LINCS

data_dir = os.environ['DATA_DIR']

data_fname = 'GSE92742_Broad_LINCS_Level4_ZSPCINF_mlr12k_n1319138x12328.gctx'
data_path = os.path.join(data_dir, data_fname)

sample_meta_fname = 'GSE92742_Broad_LINCS_inst_info.txt'
sample_meta_path = os.path.join(data_dir, sample_meta_fname)

In [None]:
# Read in raw data, selecting for cells by treatment
pert_types = [
    'trt_cp',       # treated with compound
    'ctl_vehicle',  # control for compound treatment (e.g DMSO) 
    'ctl_untrt'     # untreated samples
]

cell_ids = [
    'MCF7', 
    'PC3', 
    'HA1E', 
    'HCC515'
]

sample_meta, gene_labels, data = load_data(data_path, sample_meta_path, pert_types, cell_ids)
print(f"data size: {data.shape}")

In [None]:
# Split data into training, validation, and testing
train, val, test = split_data(data, sample_labs, 0.2)
print(f"training size: {train[0].shape[0]}")
print(f"validation size: {val[0].shape[0]}")
print(f"testing size: {test[0].shape[0]}")

In [None]:
from tensorflow.keras import Sequential, layers


def create_AE(hidden_layers):
    model = Sequential()
    
    for nunits in hidden_layers:
        model.add(layers.Dense(nunits, activation='relu'))
        
    model.add(layers.Dense(978, activation='linear'))
    return model

def create_dataset(X, y, shuffle_buffer_size=10_000, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((X,y))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
    dataset = dataset.batch(batch_size=32)
    # `prefetch` lets the dataset fetch batches, in the background while the model is training.
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset

In [None]:
train_data= create_dataset(train[0], train[0], train[0].shape[0])

In [None]:
model = create_AE([120, 32, 120])
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])