In [1]:
import gc
import numpy as np
import pandas as pd
from keras import backend as K
from keras.callbacks import CSVLogger, ModelCheckpoint, Callback
from sklearn.model_selection import ParameterGrid
import datetime
import os
import numpy as np
import h5py

from models import MDAD_model, single_MLP_model, single_linear_model
from experiment_helpers import load_data_for_fold, save_MTL_predictions

os.environ["CUDA_VISIBLE_DEVICES"]="2"
K.tensorflow_backend._get_available_gpus()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


['/job:localhost/replica:0/task:0/device:GPU:0']

In [2]:
hyperparams = {"epochs": [200], \
               "nonlinearity": ["relu"], \
               "hidden_sizes_shared": [[500,100]], \
               "hidden_sizes_separate": [[50,10]],\
               "dropout":  [.1],\
               "k_reg": [.00001,.001],\
               "learning_rate": [.0001,.001],\
#                "learning_rate": [.001],\
               "loss_weights":  [[1, 1]],\
               "grad_clip_norm": [.01,.1],\
               "batch_size": [20]}

hy_dict_list = list(ParameterGrid(hyperparams))
len(hy_dict_list)

8

### MD-AD

In [7]:
specific_folder = "origGE"
path_to_results = "../../md-ad_public_repo_data/Modeling/%s/results/MTL/"%specific_folder
path_to_preds = "../../md-ad_public_repo_data/Modeling/%s/predictions/MTL/"%specific_folder
path_to_models = "../../md-ad_public_repo_data/Modeling/%s/models/MTL/"%specific_folder
phenotypes = ["CERAD", "PLAQUES", "ABETA_IHC", "BRAAK", "TANGLES", "TAU_IHC"]

for data_form in ['ACT_MSBBRNA_ROSMAP_PCASplit']:

    print(data_form)

    for fold_idx in range(30):
        
        print("FOLD:",fold_idx)
        
        X_train, X_valid, y_train, y_valid = load_data_for_fold(fold_idx)
        
        y_train_dict = {}
        y_valid_dict = {}
        for p in phenotypes:
            y_train_dict["%s_out"%p] = y_train[p]
            y_valid_dict["%s_out"%p] = y_valid[p]
        

        for hy_iteration in range(len(hy_dict_list)):

            print(datetime.datetime.now())

            print("HYPERPARAMETER ITERATION: %d"%hy_iteration)

            hy_dict = hy_dict_list[hy_iteration]

            title = "%d_%s_%s_%s_%f_%f_%f_%s_%f_%d"%(hy_dict["epochs"], hy_dict["nonlinearity"], 
                            str(hy_dict["hidden_sizes_shared"]), str(hy_dict["hidden_sizes_separate"]),
                            hy_dict["dropout"], hy_dict["k_reg"], hy_dict["learning_rate"], 
                            str(hy_dict["loss_weights"]),  hy_dict["grad_clip_norm"], hy_dict["batch_size"])
            print(title)

            res_dest = path_to_results + "/" + data_form + "/" + title + "/"
            preds_dest = path_to_preds + "/" + data_form + "/" + title + "/"
            modelpath =  path_to_models  + data_form + "/" + title + "/" + str(fold_idx) + "/"

            for path in [res_dest, preds_dest, modelpath]:
                if not os.path.isdir(path):
                    os.makedirs(path)
            

            model = MDAD_model(X_train, hy_dict)

            # https://stackoverflow.com/questions/36895627/python-keras-creating-a-callback-with-one-prediction-for-each-epoch
            class prediction_history(Callback):
                def __init__(self):
                    self.predhis = []
                def on_epoch_end(self, epoch, logs={}):
                    self.predhis.append(model.predict(X_valid))
            predictions=prediction_history()
            
            
            if fold_idx > 24:
                History = model.fit(x={'main_input': X_train}, y=y_train_dict, 
                          validation_data = ({'main_input': X_valid}, y_valid_dict),                                     
                          verbose=0,epochs=hy_dict["epochs"], batch_size=hy_dict["batch_size"], 
                        callbacks=[CSVLogger(res_dest+'%d.log'%fold_idx), predictions, 
                        # save model:
                        ModelCheckpoint(modelpath+"{epoch:02d}.hdf5", monitor='val_loss', verbose=0, \
                        save_best_only=False, save_weights_only=False, mode='auto', period=100)])
            else:
                History = model.fit(x={'main_input': X_train}, y=y_train_dict, 
                          validation_data = ({'main_input': X_valid}, y_valid_dict),    
                          verbose=0, epochs=hy_dict["epochs"], batch_size=hy_dict["batch_size"], 
                        callbacks=[CSVLogger(res_dest+'%d.log'%fold_idx), predictions])

            save_MTL_predictions(predictions, preds_dest, fold_idx, y_valid, phenotypes)
            
            K.clear_session()
            gc.collect()

### MLP and Linear Baselines

In [10]:
num_components = 500
phenotypes = ["CERAD", "PLAQUES", "ABETA_IHC", "BRAAK", "TANGLES", "TAU_IHC"]
data_form = 'ACT_MSBBRNA_ROSMAP_PCASplit'


for baseline_method in ["Linear_baselines", "MLP_baselines"]:
    print(baseline_method)

    path_to_results = "../../md-ad_public_repo_data/Modeling/%s/results/%s/"%(specific_folder, baseline_method)
    path_to_preds = "../../md-ad_public_repo_data/Modeling/%s/predictions/%s/"%(specific_folder, baseline_method)
    path_to_models = "../../md-ad_public_repo_data/Modeling/%s/models/%s/"%(specific_folder, baseline_method)
    
    for fold_idx in range(30):
        
        print("FOLD:",fold_idx)
        X_train, X_valid, y_train, y_valid = load_data_for_fold(fold_idx)

        for hy_iteration in range(len(hy_dict_list)):

            print(datetime.datetime.now())

            print("HYPERPARAMETER ITERATION: %d"%hy_iteration)

            hy_dict = hy_dict_list[hy_iteration]

            title = "%d_%s_%s_%s_%f_%f_%f_%s_%f_%d"%(hy_dict["epochs"], hy_dict["nonlinearity"], 
                            str(hy_dict["hidden_sizes_shared"]), str(hy_dict["hidden_sizes_separate"]),
                            hy_dict["dropout"], hy_dict["k_reg"], hy_dict["learning_rate"], 
                            str(hy_dict["loss_weights"]),  hy_dict["grad_clip_norm"], hy_dict["batch_size"])
            print(title)

            

            for phenotype in phenotypes:

                print(phenotype)
                print(datetime.datetime.now())
    
                res_dest = path_to_results + "/" + data_form + "/" + title + "/" + phenotype + "/"
                preds_dest = path_to_preds + "/" + data_form + "/" + title + "/" 
                modelpath =  path_to_models  + data_form + "/" + title + "/" + phenotype + "/" + str(fold_idx) + "/"

                for path in [res_dest, preds_dest, modelpath]:
                    if not os.path.isdir(path):
                        os.makedirs(path)           
                  
                
                if baseline_method == "MLP_baselines":
                    model = single_MLP_model(X_train, hy_dict)                                        
                else:
                    model = single_linear_model(X_train, hy_dict)
                
                print(X_train.shape, X_valid.shape)
                print(y_train[phenotype].shape, y_valid[phenotype].shape)


                # https://stackoverflow.com/questions/36895627/python-keras-creating-a-callback-with-one-prediction-for-each-epoch
                class prediction_history(Callback):
                    def __init__(self):
                        self.predhis = []
                    def on_epoch_end(self, epoch, logs={}):
                        self.predhis.append(model.predict(X_valid))

                predictions=prediction_history()


                csv_logger = CSVLogger(res_dest+ "/" + phenotype + "/" + '%d.log'%fold_idx)
                # And trained it via:
                if fold_idx > 24:
                    History = model.fit(x={'main_input': X_train}, y=y_train[phenotype], 
                              validation_data = ({'main_input': X_valid}, y_valid[phenotype]),                                     
                              verbose=0,epochs=hy_dict["epochs"], batch_size=hy_dict["batch_size"], 
                            callbacks=[CSVLogger(res_dest+'%d.log'%fold_idx), predictions, 
                            # save model:
                            ModelCheckpoint(modelpath+"{epoch:02d}.hdf5", monitor='val_loss', verbose=0, \
                            save_best_only=False, save_weights_only=False, mode='auto', period=100)])
                else:
                    History = model.fit(x={'main_input': X_train}, y=y_train[phenotype], 
                              validation_data = ({'main_input': X_valid}, y_valid[phenotype]),    
                              verbose=0, epochs=hy_dict["epochs"], batch_size=hy_dict["batch_size"], 
                            callbacks=[CSVLogger(res_dest+'%d.log'%fold_idx), predictions])

                    
                            # SAVE PREDICTIONS
                with h5py.File(preds_dest + "%d.h5"%fold_idx, 'a') as hf:
                    # loop through epochs -- one group is made per epoch
                    for i, ep in enumerate(predictions.predhis):
                        if "/%s/%s"%(str(i),phenotype) in hf:
                            del hf["/%s/%s"%(str(i),phenotype)]
                        hf.create_dataset("/%s/%s"%(str(i),phenotype), data=predictions.predhis[i], dtype=np.float32)
                    if "/y_true/"+phenotype in hf:
                        del hf["/y_true/"+phenotype]
                    hf.create_dataset("/y_true/"+phenotype, data=y_valid[phenotype], dtype=np.float32)


                K.clear_session()
                gc.collect()