In [1]:
#!/usr/bin/env python

'''
DESCRIPTION
-----------
    Traning proposed neural network
    
USAGE
-----
    [PROJECT_PATH]/$ python notebooks/04-pg-model-training.py -design                   {DESIGN NAME}
                                                              -first_hidden_layer_pbk   {BIOLOGICAL KNOWLEDGE}
                                                              -first_hidden_layer_dense {NUMBER of DENSE LAYER}
                                                              -second_hidden_layer      {SECOND HIDDEN LAYER}
                                                              -optimizer                {OPTIMIZER}
                                                              -ds                       {DATASET}
                                                              -split                    {DATASET SPLIT OPERATION}
                                                              -filtering_gene_space     {GENE SPACE FILTERING}
                                                              
RETURN
------
    {MODEL}.h5 : h5 file
        Trained model
    {MODEL-RESULT}.csv : csv file
        The model result with probabilities, prediction label and ground truth

EXPORTED FILE(s) LOCATION
-------------------------
    ./models/NN/{EXPERIMENT}/{MODEL}.h5
    ./models/NN/{EXPERIMENT}/{MODEL-RESULT}.csv
'''

# to get reproducible results
from numpy.random import seed
seed(91)
import tensorflow as tf
tf.random.set_seed(91)

# importing default libraries
import os, sys
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
os.chdir(ROOT_DIR)
sys.path.append(ROOT_DIR)

In [2]:

# importing scripts in scripts folder
from scripts import config as src

import numpy as np
import pandas as pd
import datetime as dt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold, RepeatedStratifiedKFold, train_test_split, LeaveOneGroupOut, LeavePGroupsOut
from sklearn.cluster import KMeans
import glob
from numba import cuda
from tensorflow import keras
import warnings
warnings.filterwarnings('ignore')
# DEFAULT VALUES
n_p_leave_out =[2,4,6,8]
p_out_iteration = 20
epochs_default=100 # for model design
batch_default=10   # for model design
val_split=0.1      # for model design
rand_state = 91    # dataset split
test_size = 0.3    # train_test_split


**** scripts/config.py IMPORTED!!!
**** PROJECT FOLDER ,  /home/pgundogdu/projects/signalization_prior_knowledge_based_nn


In [3]:
#     Reading dataset
df = pd.read_pickle('./data/processed/exper_mouse/mouse_learning_ss.pck')  
print('Dataset cell type         , ', df.groupby('cell_type').size().index.values)
print('Dataset shape             , ', df.shape)
#     Creating dense layer


ohe = OneHotEncoder()
X = df.iloc[:, :-1].values
y = df.iloc[:, -1:].values
y_ohe = ohe.fit_transform(y).toarray()
groups = df.iloc[:, -1].values

Dataset cell type         ,  ['16cell' '4cell' '8cell' 'BMDC' 'BXC' 'C57twocell' 'ES' 'PrE'
 'early2cell' 'earlyblast' 'fibroblast' 'late2cell' 'lateblast' 'mid2cell'
 'midblast' 'zy']
Dataset shape             ,  (402, 9438)


In [9]:
list_designs=['1_layer_dense100_'
              , '1_layer_metabolic_signaling_'
              , '1_layer_metabolic_signaling+100dense_'
              , '1_layer_ppi100_'
              , '1_layer_ppitf100_'
              , '1_layer_signaling_'
              , '1_layer_signaling+100dense_'
              , '2_layer_metabolic_signaling_'
              , '2_layer_signaling_']

In [26]:
model_folder_path = './models/exper_mouse/LeavePGroupsOut/cell_out_'
split='LeavePGroupsOut'
seed(91)

for i_p_out in n_p_leave_out[1:2]:
    folder_co = os.path.join(model_folder_path+str(i_p_out))
    print(folder_co)
    
    df_clustering_per_cell_out = pd.DataFrame()
    for i_design in list_designs[:2]:
        print(i_design)
        
        lpgo = LeavePGroupsOut(n_groups=i_p_out)
        ids = np.random.choice(len(list(lpgo.split(X, y, groups))), p_out_iteration).tolist()
        lpgo_split_random_selection = [list(lpgo.split(X, y, groups))[i] for i in ids]

        for i, indexes in enumerate(lpgo_split_random_selection):
            
#             print(split+' --- '+str(i+1)+'/'+str(p_out_iteration))
            model_path = glob.glob(os.path.join(folder_co, '*'+i_design+'*_'+str(i)+'_*'))[0]
#             print(model_path)
            _, model = src.loading_model(model_path, -1)
#             model.summary()
            
            train_index=indexes[0]
            print(train_index[:50])
#             test_index=indexes[1]
#             df_nn = df.iloc[train_index, :]
#             df_clustering = df.iloc[test_index, :]
            

./models/exper_mouse/LeavePGroupsOut/cell_out_4
1_layer_dense100_
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_0_SGD.h5
[18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
 66 67]
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_1_SGD.h5
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 25 27 28 32 33 34 35 36 37 38 40 42 43 44 45 48 50 52 53 54 55 56 57 59
 60 61]
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_2_SGD.h5
[18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
 66 67]
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_3_SGD.h5
[ 0  1  2  3  4 

In [27]:
model_folder_path = './models/exper_mouse/LeavePGroupsOut/cell_out_'
split='LeavePGroupsOut'
seed(91)

for i_p_out in n_p_leave_out[1:2]:
    folder_co = os.path.join(model_folder_path+str(i_p_out))
    print(folder_co)
    
    df_clustering_per_cell_out = pd.DataFrame()
    for i_design in list_designs:
        print(i_design)
        
        lpgo = LeavePGroupsOut(n_groups=i_p_out)
        ids = np.random.choice(len(list(lpgo.split(X, y, groups))), p_out_iteration).tolist()
        lpgo_split_random_selection = [list(lpgo.split(X, y, groups))[i] for i in ids]

        for i, indexes in enumerate(lpgo_split_random_selection):
            
#             print(split+' --- '+str(i+1)+'/'+str(p_out_iteration))
            model_path = glob.glob(os.path.join(folder_co, '*'+i_design+'*_'+str(i)+'_*'))[0]
#             print(model_path)
            _, model = src.loading_model(model_path, -1)
#             model.summary()
            
            train_index=indexes[0]
#             print(train_index[:50])
            test_index=indexes[1]
            df_nn = df.iloc[train_index, :]
            df_clustering = df.iloc[test_index, :]
            
            ohe_nn = OneHotEncoder()
            X_nn = df_nn.iloc[:, :-1].values
            y_nn = df_nn.iloc[:, -1:].values
            y_ohe_nn = ohe_nn.fit_transform(y_nn).toarray()
            groups_nn = df_nn.iloc[:, -1].values
            
            df_clustering['cell_type'] = df_clustering['cell_type'].astype('category')
            df_clustering['cell_type_cat'] = df_clustering['cell_type'].cat.codes
            df_clustering.head()

            X_clustering = df_clustering.iloc[:, :-2].values
            y_clustering = df_clustering.iloc[:, -1:].values

            nn_last_layer_training = model.predict(X_nn)
            nn_last_layer_testing = model.predict(X_clustering)

            kmeans = KMeans(n_clusters=i_p_out).fit(nn_last_layer_training)
            y_pred_clustering = kmeans.predict(nn_last_layer_testing)
            
            df_result = pd.DataFrame(y_pred_clustering, columns=['prediction'])
            df_result['ground_truth'] = y_clustering.reshape(1,-1)[0]
#             df_clustering = pd.concat([df_pred, df_ground_truth], axis=1)
            df_result['cell_out']='cell_out_'+str(i_p_out)
            df_result['experiment_index'] = i
            df_result['design'] = i_design
#             print(df_result)
            df_clustering_per_cell_out = pd.concat([df_clustering_per_cell_out , df_result], axis=0)
        
    df_clustering_per_cell_out.to_csv('./models/exper_mouse/LeavePGroupsOut/clustering_cell_out_'+str(i_p_out)+'.csv', index=False)
            

./models/exper_mouse/LeavePGroupsOut/cell_out_4
1_layer_dense100_
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_0_SGD.h5
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_1_SGD.h5
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_2_SGD.h5
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_3_SGD.h5
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_4_SGD.h5
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_5_SGD.h5
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_6_SGD.h5
Loaded model!! ./models/exper_mouse/LeavePGroupsOut/cell_out_4/design_1_layer_dense100_mouse_learning_ss_7_SGD.h5
Loaded model!! ./model