In [1]:
import h5py
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.layers import Input, Dense, Dropout
from keras import optimizers, regularizers, losses

from keras.models import Model
from keras import backend as K
from keras.callbacks import CSVLogger
from keras import metrics

import pickle
import keras

import sys

from sklearn.model_selection import ParameterGrid


import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

K.tensorflow_backend._get_available_gpus()



from configs import * 
from models import ignorenans_categorical_accuracy, ordloss, ignorenans_mse, ignorenans_scaled_mse
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [16]:
mode = "TEST"
# mode = "TRAIN"

# Point to model files

In [17]:

path_to_models = "../../AD_Project/analyses/MTL_variable_tasks/6vars-continuous/%s/models/"%SPECIFIC_FOLDER
path_to_models = "../../Pipeline_Outputs_Submitted/%s/models/"%SPECIFIC_FOLDER

path_to_split_data = path_to_MDAD_data_folders + "%s/%s"%(SPECIFIC_FOLDER, split_pca_dataset)

MTL_FINAL_MODELS = pickle.load(open(path_to_final_models_chosen + "MTL/folds.p", "rb" ) )
MLP_BASELINES_FINAL_MODELS =pickle.load(open(path_to_final_models_chosen + "MLP_baselines/folds.p", "rb" ) )

In [18]:
def get_model_layers(model_file, num_layers):
    
    # note: need to define custom functions for model in order to load, but these don't actually get used
    model = keras.models.load_model(model_file, custom_objects={"ordloss_cur_params": ordloss(0), \
            "ignorenans_mse": ignorenans_mse, "cat_acc": ignorenans_categorical_accuracy(0), \
            "ignorenans_scaled_mse": ignorenans_scaled_mse})
    
    # define new model that cuts off the last several layers
    newmodel = Model(inputs = model.input, outputs = model.layers[num_layers-1].output)
    
    # agian, need to specify these parameters, but they aren't used since we don't retrain the model
    opt = optimizers.adam()  
    newmodel.compile(optimizer=opt, loss= "mse")
    
    return newmodel


# get centroids for a new set of points
def kmeans_centroids_for_test(X_test, cluster_labels):
    n_clusters = len(np.unique(cluster_labels))
    n,d = X_test.shape
    
    new_centroids = np.zeros([n_clusters,n])
    for i in range(n_clusters):
        new_centroids[i] = np.mean(X_test.T[np.where(cluster_labels ==i)], axis=0).reshape([1,-1])

    return new_centroids

### Get model slice (up to last shared layer)

In [19]:
# last shared layer number 
layer_number = 3

### Transform data with models (MTL)

In [20]:
if mode == "TEST":
    path_to_new_files = "last_shared_layer_transformations/%s/MTL/"%SPECIFIC_FOLDER
else:
    path_to_new_files = "last_shared_layer_transformations_TRAIN/%s/MTL/"%SPECIFIC_FOLDER    
    
for fold_idx in range(25,30):

    hy_name = MTL_FINAL_MODELS[fold_idx]
    
    with h5py.File(path_to_split_data + "/" + str(fold_idx) + ".h5", 'r') as hf:
        X_train = hf["X_train_transformed"][:,:num_components].astype(np.float64)
        X_valid = hf["X_valid_transformed"][:,:num_components].astype(np.float64)
        labels_train = hf["y_train"][:]
        labels_valid = hf["y_valid"][:]
        labels_names = hf["labels_names"][:]


    model = get_model_layers(path_to_models + "MTL/ACT_MSBBRNA_ROSMAP_PCASplit/%s/%i/%i.hdf5"%(hy_name, fold_idx, 200), 4)

    print(path_to_new_files+ "%s/%i.h5"%(hy_name, fold_idx))
    if not os.path.isdir(path_to_new_files+ hy_name + "/"):
        os.makedirs(path_to_new_files+ hy_name + "/")

    with h5py.File(path_to_new_files + "%s/%i.h5"%(hy_name, fold_idx), 'w') as hf:
        if mode=="TEST":
            hf.create_dataset("labels", data=labels_valid)
            hf.create_dataset("outputs", data=model.predict(X_valid))
        else:
            hf.create_dataset("labels", data=labels_train)
            hf.create_dataset("outputs", data=model.predict(X_train))
        hf.create_dataset("labels_names", data=labels_names)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


last_shared_layer_transformations/origGE/MTL/200_relu_[500, 100]_[50, 10]_0.100000_0.000010_0.001000_[1, 1]_0.100000_20/25.h5
last_shared_layer_transformations/origGE/MTL/200_relu_[500, 100]_[50, 10]_0.100000_0.001000_0.001000_[1, 1]_0.100000_20/26.h5
last_shared_layer_transformations/origGE/MTL/200_relu_[500, 100]_[50, 10]_0.100000_0.001000_0.001000_[1, 1]_0.010000_20/27.h5
last_shared_layer_transformations/origGE/MTL/200_relu_[500, 100]_[50, 10]_0.100000_0.000010_0.001000_[1, 1]_0.100000_20/28.h5
last_shared_layer_transformations/origGE/MTL/200_relu_[500, 100]_[50, 10]_0.100000_0.001000_0.001000_[1, 1]_0.100000_20/29.h5


### Transform data with models (MLP baselines)

In [21]:
if mode == "TEST":
    path_to_new_files = "last_shared_layer_transformations/%s/MLP_baselines/"%SPECIFIC_FOLDER
else:
    path_to_new_files = "last_shared_layer_transformations_TRAIN/%s/MLP_baselines/"%SPECIFIC_FOLDER
    


for fold_idx in range(25,30):

    with h5py.File(path_to_split_data + "/" + str(fold_idx) + ".h5", 'r') as hf:
        X_train = hf["X_train_transformed"][:,:num_components].astype(np.float64)
        X_valid = hf["X_valid_transformed"][:,:num_components].astype(np.float64)
        labels_train = hf["y_train"][:]
        labels_valid = hf["y_valid"][:]
        labels_names = hf["labels_names"][:]


    for phenotype in ["ABETA_IHC", "TAU_IHC", "CERAD", "BRAAK", "PLAQUES", "TANGLES"]:  
        
        MLP_final_path = path_to_models + "MLP_baselines/" + split_pca_dataset + "/"
        
        hy_name = MLP_BASELINES_FINAL_MODELS[fold_idx][phenotype]

        model = get_model_layers(MLP_final_path + "%s/%s/%i/%i.hdf5"%(hy_name, phenotype, fold_idx, 200), 4)

        print(path_to_new_files+ "%s/%s/%i.h5"%(hy_name, phenotype, fold_idx))
        if not os.path.isdir(path_to_new_files+ hy_name + "/" + phenotype):
            os.makedirs(path_to_new_files+ hy_name + "/" + phenotype)

        with h5py.File(path_to_new_files + "%s/%s/%i.h5"%(hy_name, phenotype, fold_idx), 'w') as hf:
            if mode=="TEST":
                hf.create_dataset("labels", data=labels_valid)
                hf.create_dataset("outputs", data=model.predict(X_valid))
            else:
                hf.create_dataset("labels", data=labels_train)
                hf.create_dataset("outputs", data=model.predict(X_train))
            hf.create_dataset("labels_names", data=labels_names)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


last_shared_layer_transformations/origGE/MLP_baselines/200_relu_[500, 100]_[50, 10]_0.100000_0.001000_0.001000_[1, 1]_0.010000_20/ABETA_IHC/25.h5
last_shared_layer_transformations/origGE/MLP_baselines/200_relu_[500, 100]_[50, 10]_0.100000_0.000010_0.001000_[1, 1]_0.010000_20/TAU_IHC/25.h5
last_shared_layer_transformations/origGE/MLP_baselines/200_relu_[500, 100]_[50, 10]_0.100000_0.000010_0.001000_[1, 1]_0.100000_20/CERAD/25.h5
last_shared_layer_transformations/origGE/MLP_baselines/200_relu_[500, 100]_[50, 10]_0.100000_0.000010_0.001000_[1, 1]_0.010000_20/BRAAK/25.h5
last_shared_layer_transformations/origGE/MLP_baselines/200_relu_[500, 100]_[50, 10]_0.100000_0.000010_0.001000_[1, 1]_0.010000_20/PLAQUES/25.h5
last_shared_layer_transformations/origGE/MLP_baselines/200_relu_[500, 100]_[50, 10]_0.100000_0.000010_0.001000_[1, 1]_0.100000_20/TANGLES/25.h5
last_shared_layer_transformations/origGE/MLP_baselines/200_relu_[500, 100]_[50, 10]_0.100000_0.000010_0.001000_[1, 1]_0.010000_20/ABETA_IH

## Unsupervised Methods

In [22]:
if mode =="TRAIN":
    path_to_new_files = "last_shared_layer_transformations_TRAIN/%s/unsupervised_methods/"%SPECIFIC_FOLDER
else:
    path_to_new_files = "last_shared_layer_transformations/%s/unsupervised_methods/"%SPECIFIC_FOLDER
    
for fold_idx in range(25,30):
    
    with h5py.File(path_to_split_data + "/%i.h5"%fold_idx, 'r') as hf:
        if mode == "TRAIN":
            X = hf["X_train_transformed"][:,:num_components].astype(np.float64)
            labels = hf["y_train"][:]
        else:
            X = hf["X_valid_transformed"][:,:num_components].astype(np.float64)
            labels = hf["y_valid"][:]
            
        X_train =  hf["X_train_transformed"][:,:num_components].astype(np.float64)
        gene_symbols = hf["gene_symbols"][:]
        labels_names = hf["labels_names"][:]
    
    for transformation in ["KMeans", "PCA"]:
        
        num_dims = 100

        print(path_to_new_files+ "%s/%i.h5"%(transformation, fold_idx))
        if not os.path.isdir(path_to_new_files + transformation + "/"):
            os.makedirs(path_to_new_files + transformation + "/")

        if transformation == "KMeans":
            kmeans = KMeans(n_clusters=num_dims).fit(X_train.T)
            X_transformed = kmeans_centroids_for_test(X, kmeans.labels_).T
    
        elif transformation == "PCA":
            pca = PCA(n_components=num_dims)
            pca.fit(X_train)
            X_transformed = pca.transform(X)[:, :num_dims]


        print(labels.shape, X_transformed.shape)
        with h5py.File(path_to_new_files + "%s/%i.h5"%(transformation, fold_idx), 'w') as hf:
            hf.create_dataset("labels", data=labels)
            hf.create_dataset("outputs", data=X_transformed)
            hf.create_dataset("labels_names", data=labels_names)

last_shared_layer_transformations/origGE/unsupervised_methods/KMeans/25.h5
(339, 11) (339, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/PCA/25.h5
(339, 11) (339, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/KMeans/26.h5
(356, 11) (356, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/PCA/26.h5
(356, 11) (356, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/KMeans/27.h5
(358, 11) (358, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/PCA/27.h5
(358, 11) (358, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/KMeans/28.h5
(345, 11) (345, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/PCA/28.h5
(345, 11) (345, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/KMeans/29.h5
(360, 11) (360, 100)
last_shared_layer_transformations/origGE/unsupervised_methods/PCA/29.h5
(360, 11) (360, 100)
