In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import copy
import os
import random
random.seed(555)  

# pathways process and network generation

In [3]:
#Read the pathway feature generated by GCN
pathway_feature = pd.read_csv('./data/Pathways_Feature.csv',index_col =0)

#pathway data loading
data = pd.read_csv('./data/ReactomePathwaysRelation_new_download.txt',sep = '\t',header=None)

data.columns = ['child','parent']
human_hierarchy = data[data['child'].str.contains('HSA')]  

# construct pathway graph
net = nx.from_pandas_edgelist(human_hierarchy, 'child', 'parent', create_using=nx.DiGraph()) 
net.name = 'reactome'

roots = [n for n, d in net.in_degree() if d == 0]  
root_node = 'root'
edges = [(root_node, n) for n in roots] 
net.add_edges_from(edges)  

In [4]:
#Get the node of the current layer
def get_nodes_at_level(net, distance):
    nodes = set(nx.ego_graph(net, 'root', radius=distance))
    if distance >= 1.:
        nodes -= set(nx.ego_graph(net, 'root', radius=distance - 1))
    return list(nodes)

def get_nodes(net,num):
    net_nodes = [] 
    
    for i in range(1,num+1):
        net_nodes.append(get_nodes_at_level(net,i))
        
    return net_nodes

#  define network size. for example,this network is five layer
net_num = 5
net_nodes = get_nodes(net,net_num)

In [5]:
#  Read the connection of a single node to the next layer
def add_node(net,net_nodes):
        
    for i in range(len(net_nodes)-2,-1,-1):
        
        data_temp = copy.deepcopy(net_nodes[i])
        
        for n in net_nodes[i]:
            nexts = net.successors(n)         
            temp = [ nex  for nex in nexts ] 
            if len(temp)==0:
                data_temp.remove(n)  # If the node of the current layer has no successor node, remove the node
            elif len(set(temp).intersection(set(net_nodes[i+1])))==0:   #if the subsequent node of the node of the current layer is not on the next layer, delete the node
                data_temp.remove(n)
            else:
                continue
        net_nodes[i] = data_temp
    return net_nodes

net_nodes  =  add_node(net,net_nodes)

In [6]:

def get_note_relation(net_nodes):
    node_mat = []
   
    for i in range(len(net_nodes)-1):
        dicts = {}
        for n in net_nodes[i]:
            nexts = net.successors(n)  
            x = [ nex   for nex in nexts if nex in net_nodes[i+1] ]
            dicts[n] = x

        mat = np.zeros((len(net_nodes[i]), len(net_nodes[i+1]))) 
        for p, gs in dicts.items():     
            g_inds = [net_nodes[i+1].index(g) for g in gs]
            p_ind = net_nodes[i].index(p)
            mat[p_ind, g_inds] = 1

        df = pd.DataFrame(mat, index=net_nodes[i], columns=net_nodes[i+1])
        node_mat.append(df.T)
    return node_mat

Get_Node_relation = get_note_relation(net_nodes)

In [7]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

In [8]:
#Link prediction only for adjacent pathways
for k in range(0,4):
    for col in Get_Node_relation[k].columns:
        if col in list(pathway_feature.index):
            for row in Get_Node_relation[k].index:
                if row in list(pathway_feature.index):
                    pos_c = pathway_feature.loc[col].values
                    pos_r = pathway_feature.loc[row].values
                    score = sigmoid(pos_c.dot(pos_r))

                    if score >0.9:  #The similarity is greater than 0.9
                        Get_Node_relation[k][col][row] = 1

In [9]:
np.array(Get_Node_relation[3].values.nonzero()).shape

(2, 1745)

In [10]:
# Read gene-pathway annotation relationships
import re
def load_data_dict(filename):

    data_dict_list = []
    dict = {}
    with open( filename) as gmt:
        data_list = gmt.readlines()

        # print data_list[0]
        for row in data_list:
            genes = row.split('\t')
            
            genes = [ i.replace('\n','') for i in genes]
            dict[genes[1]] = genes[3:]

    return dict

gene_data = load_data_dict('./data/ReactomePathways.gmt')


# Raw data preprocessing and multi omics data intergration

In [11]:
# loading snv data
snv_data = pd.read_csv("./data/TCGA-LUAD.varscan2_snv.csv",index_col = 0)


#loading cnv data
cnv_data = pd.read_csv("./data/TCGA-LUAD_cnv.csv",index_col = 0)


#loading label
response  = pd.read_csv('./data/response_paper.csv',index_col=0)

#Disrupted data set
response = response.sample(frac=1)

snv_data = snv_data.sample(frac=1)

cnv_data = cnv_data.sample(frac=1)

In [12]:
#split copy number variation data
import copy
cnv_amp = copy.deepcopy(cnv_data)

#cnv_amp
cnv_amp[cnv_amp <= 0] = 0.
cnv_amp[cnv_amp > 0 ] = 1.

#cnv_del
cnv_data[cnv_data >= 0] = 0.
cnv_data[cnv_data < 0 ] = 1.
cnv_del = cnv_data


In [13]:
print(np.array(cnv_amp.values.nonzero()).shape)
print(np.array(cnv_data.values.nonzero()).shape)
print(np.array(snv_data.values.nonzero()).shape)

(2, 453943)
(2, 317421)
(2, 162637)


In [14]:
#feature selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
#snv
tol_snv = snv_data.join(response,how='inner')
model = SelectKBest(chi2, k=3000)
x_data1 = model.fit_transform(tol_snv.values[:,0:-1],tol_snv.values[:,-1])
fea = model.get_support()
snv_data = snv_data.loc[:,fea]

#cnv_amp
tol_amp = cnv_amp.join(response,how='inner')
model = SelectKBest(chi2, k=3000)
x_data1 = model.fit_transform(tol_amp.values[:,0:-1],tol_amp.values[:,-1])
fea = model.get_support()
cnv_amp = cnv_amp.loc[:,fea]

#cnv_del
tol_del = cnv_del.join(response,how='inner')
model = SelectKBest(chi2, k=3000)
x_data1 = model.fit_transform(tol_del.values[:,0:-1],tol_del.values[:,-1])
fea = model.get_support()
cnv_del = cnv_del.loc[:,fea]

In [15]:
print(snv_data.shape)
print(cnv_amp.shape)
print(cnv_del.shape)

(561, 3000)
(514, 3000)
(514, 3000)


In [16]:
#loading Load pre-training data
response = pd.read_csv('./data/response.csv',index_col = 0)
snv_data = snv_data.loc[response.index]
cnv_amp = cnv_amp.loc[response.index]
cnv_del = cnv_del.loc[response.index]

In [17]:
x_list = []
y_list = []
rows_list = []
cols_list = []

data_type_list =['snv_data','cnv_amp','cnv_del']

for ind in [snv_data,cnv_amp,cnv_del]: 
    get_data = ind.join(response,how='inner')
    del get_data['response']
    
    row = get_data.index      
    col = get_data.columns     
    resp = response.loc[row]   
    
    x_list.append(ind)
    y_list.append(resp)
    rows_list.append(row)
    cols_list.append(col)

In [18]:
#Merge data set
def combine(x_list, y_list, rows_list, cols_list, data_type_list, combine_type, use_coding_genes_only=True):
    
    cols_list_set = [set(list(c)) for c in cols_list]  
    
    print('cols_list_set',len(cols_list_set))

    if combine_type == 'intersection':    
        cols = set.intersection(*cols_list_set)
    else:
        cols = set.union(*cols_list_set) 
    
    print('intersection_cols',len(cols))

    if use_coding_genes_only: #true
        coding_genes_df = pd.read_csv('./data/protein-coding_gene_with_coordinate_minimal.txt', sep='\t', header=None)
        coding_genes_df.columns = ['chr', 'start', 'end', 'name']
        coding_genes = set(coding_genes_df['name'].unique())     
        cols = cols.intersection(coding_genes)  
        print('protein-coding_genes',len(coding_genes))
   
    print('finally_cols',len(cols))
   
    all_cols = list(cols)

    all_cols_df = pd.DataFrame(index=all_cols) 

    df_list = []

    for x, y, r, c in zip(x_list, y_list, rows_list, cols_list):
        df = pd.DataFrame(x, columns=c, index=r)
        df = df.T.join(all_cols_df, how='right')  
        df = df.T
        df = df.fillna(0)
        df_list.append(df)

    all_data = pd.concat(df_list, keys=data_type_list, join='inner', axis=1, )  

   
    all_data = all_data.swaplevel(i=0, j=1, axis=1)


    order = all_data.columns.levels[0] 

    all_data = all_data.reindex(columns=order, level=0)  

    x = all_data

    reordering_df = pd.DataFrame(index=all_data.index)  
    y = reordering_df.join(y, how='left')   

    y = y.values   
    cols = all_data.columns   
    rows = all_data.index      
    print(
        'After combining, loaded data %d samples, %d variables, %d responses ' % (x.shape[0], x.shape[1], y.shape[0]))

    return x, y, rows, cols

In [19]:
x, y, rows, cols = combine(x_list, y_list, rows_list, cols_list, data_type_list, combine_type = 'union')

cols_list_set 3
intersection_cols 7722
protein-coding_genes 19045
finally_cols 7034
After combining, loaded data 505 samples, 21102 variables, 505 responses 


In [20]:
rows

Index(['TCGA-55-8206', 'TCGA-05-4382', 'TCGA-69-7761', 'TCGA-50-5944',
       'TCGA-44-A479', 'TCGA-55-8620', 'TCGA-91-A4BC', 'TCGA-78-7536',
       'TCGA-55-8087', 'TCGA-55-7907',
       ...
       'TCGA-71-6725', 'TCGA-44-8117', 'TCGA-91-8499', 'TCGA-73-4659',
       'TCGA-75-6212', 'TCGA-97-8176', 'TCGA-86-8280', 'TCGA-55-6543',
       'TCGA-50-6594', 'TCGA-38-4632'],
      dtype='object', length=505)

# Annotated relationships between genes and pathways

In [21]:
pathways  = list(gene_data.keys())  
pathway_union = list(set(Get_Node_relation[3].index).intersection(set(pathways)))

print(len(pathway_union))
Get_Node_relation[3] = Get_Node_relation[3].loc[pathway_union]

union_gene = list(cols.levels[0])
len(union_gene)

550


7034

In [22]:
pathways_gene = np.zeros((len(pathway_union), len(union_gene))) 

for p  in pathway_union:
    gs = gene_data[p]      
    g_inds = [union_gene.index(g) for g in gs if g in union_gene]  
    p_ind = pathway_union.index(p)
    pathways_gene[p_ind, g_inds] = 1
gene_pathway_df = pd.DataFrame(pathways_gene, index=pathway_union, columns=union_gene)


#Drop genes that are not in the pathway
gene_pathway_df = gene_pathway_df.loc[:, (gene_pathway_df != 0).any(axis=0)]

In [23]:
print(gene_pathway_df.shape)
gene_pathway_df.head()

(550, 1542)


Unnamed: 0,SMOX,DEFB106A,C1R,ST3GAL2,PDIA3,ESRP1,CTDNEP1,NET1,ACKR4,KIFC2,...,PRKAB1,SLC39A7,LAMTOR1,PLA2G4E,PRKAB2,NR1H3,KPNA7,LIN37,MYL12B,IDI1
R-HSA-2465910,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R-HSA-1483076,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R-HSA-5218920,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R-HSA-844456,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R-HSA-3656243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
Omics_data = x[list(gene_pathway_df.columns)] 
Omics_data.shape

(505, 4626)

In [25]:
cols = gene_pathway_df.columns  
mapp = gene_pathway_df.values          

In [26]:
mapp

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

# Model construction

In [27]:
from keras.layers import Dense, Dropout, Activation, BatchNormalization, multiply
from keras.regularizers import l2
from keras import Input
from keras.engine import Model
from keras import backend as K

Using TensorFlow backend.


In [28]:
import keras
import numpy as np
from keras import regularizers
from keras.engine import Layer
# from keras import initializations
from keras.initializers import glorot_uniform, Initializer
from keras.layers import activations, initializers, constraints
# our layer will take input shape (nb_samples, 1)
from keras.regularizers import Regularizer
import tensorflow as tf

In [36]:
class M_Nets(Layer):   
    def __init__(self, units, activation=None,
                 use_bias=True,
                 kernel_initializer='lecun_uniform',
                 bias_initializer='zeros',
                 W_regularizer=None,
                 b_regularizer=None,
                 **kwargs):
        self.units = units
        self.activation = activation
        self.activation_fn = activations.get(activation)
        self.use_bias = use_bias
        self.bias_initializer = initializers.get(bias_initializer)
        self.bias_regularize = regularizers.get(b_regularizer)
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.kernel_regularizer = regularizers.get(W_regularizer)
        
        super(M_Nets, self).__init__(**kwargs)


    def build(self, input_shape):  

        input_dimension = input_shape[1]
        self.kernel_shape = (input_dimension, self.units)  
        self.n_inputs_per_node = input_dimension / self.units

        rows = np.arange(input_dimension) 
        cols = np.arange(self.units)    
        cols = np.repeat(cols, self.n_inputs_per_node) 
        self.nonzero_ind = np.column_stack((rows, cols)) 

        self.kernel = self.add_weight(name='kernel',
                                      shape=(input_dimension,),  
                                      initializer=self.kernel_initializer,
                                      regularizer=self.kernel_regularizer,
                                      trainable=True)

        if self.use_bias:
            self.bias = self.add_weight(shape=(self.units,),
                                        initializer=self.bias_initializer,
                                        name='bias',
                                        trainable=True
                                        
                                       )
        else:
            self.bias = None

        super(M_Nets, self).build(input_shape)  

    def call(self, x, mask=None):
        
        n_features = x.shape[1]


        kernel = K.reshape(self.kernel, (1, n_features))
        mult = x * kernel
        mult = K.reshape(mult, (-1, int(self.n_inputs_per_node)))
        mult = K.sum(mult, axis=1)
        output = K.reshape(mult, (-1, self.units))

        if self.use_bias:
            output = K.bias_add(output, self.bias)
        if self.activation_fn is not None:
            output = self.activation_fn(output)
        return output

    def get_config(self):
        config = {
            'units': self.units,
            'activation': self.activation,
            'kernel_initializer' : self.kernel_initializer,
            'bias_initializer' : self.bias_initializer,
            'use_bias': self.use_bias
        }
        base_config = super(M_Nets, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.units)

In [37]:
class Nets(Layer):
    def __init__(self, units, mapp=None, nonzero_ind=None, kernel_initializer='glorot_uniform', W_regularizer=None,
                 activation='tanh', use_bias=True,bias_initializer='zeros', bias_regularizer=None,
                 bias_constraint=None,**kwargs):
        
        self.units = units
        self.activation = activation
        self.mapp = mapp
        self.nonzero_ind = nonzero_ind
        self.use_bias = use_bias
        
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.kernel_regularizer = regularizers.get(W_regularizer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activation_fn = activations.get(activation)
        super(Nets, self).__init__(**kwargs)

        
    def build(self, input_shape):
        
        input_dim = input_shape[1]
   

        if not self.mapp is None:
            self.mapp = self.mapp.astype(np.float32)

   
        if self.nonzero_ind is None:
            nonzero_ind = np.array(np.nonzero(self.mapp)).T
            self.nonzero_ind = nonzero_ind

        self.kernel_shape = (input_dim, self.units)
        

        nonzero_count = self.nonzero_ind.shape[0]  


        self.kernel_vector = self.add_weight(name='kernel_vector',
                                             shape=(nonzero_count,),
                                             initializer=self.kernel_initializer,
                                             regularizer=self.kernel_regularizer,
                                             trainable=True)
        if self.use_bias:
            self.bias = self.add_weight(shape=(self.units,),
                                        initializer=self.bias_initializer,
                                        name='bias',
                                        regularizer=self.bias_regularizer
                                        )
        else:
            self.bias = None

        super(Nets, self).build(input_shape)  
      

    def call(self, inputs):
        
        
        temp_t = tf.scatter_nd(tf.constant(self.nonzero_ind, tf.int32), self.kernel_vector,
                           tf.constant(list(self.kernel_shape)))
    
        output = K.dot(inputs, temp_t)
        
    
        if self.use_bias:
            output = K.bias_add(output, self.bias)
            
        if self.activation_fn is not None:
            output = self.activation_fn(output)

        return output

    def get_config(self):
        config = {
            'units': self.units,
            'activation': self.activation,
            'use_bias': self.use_bias,
            'nonzero_ind': np.array(self.nonzero_ind),
          
            'bias_initializer': initializers.serialize(self.bias_initializer),
            'bias_regularizer': regularizers.serialize(self.bias_regularizer),

            'kernel_initializer': initializers.serialize(self.kernel_initializer),
            'W_regularizer': regularizers.serialize(self.kernel_regularizer),

        }
        base_config = super(Nets, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))



    def compute_output_shape(self, input_shape):
      
        return (input_shape[0], self.units)


In [38]:
def create_model(Omics_data):
    
    S_inputs = Input(shape=(Omics_data.shape[1],), dtype='float32',name= 'inputs')

    h_0 = M_Nets(cols.shape[0], activation='tanh',name='h_0')(S_inputs)
    drop_layer1 =keras.layers.Dropout(0.5)(h_0)

    h0 = Nets(gene_pathway_df.shape[0],mapp =gene_pathway_df.values.T, name = 'h0')(drop_layer1)
    drop0 = keras.layers.Dropout(0.1)(h0)

    h1 = Nets(Get_Node_relation[3].shape[1],mapp =Get_Node_relation[3].values ,name = 'h1')(drop0)
    drop_h1 = keras.layers.Dropout(0.1)(h1)


    h2 = Nets(Get_Node_relation[2].shape[1],mapp =Get_Node_relation[2].values, name = 'h2')(drop_h1)
    drop2 = keras.layers.Dropout(0.1)(h2)


    h3 = Nets(Get_Node_relation[1].shape[1],mapp =Get_Node_relation[1].values, name = 'h3')(drop2)
    drop3 = keras.layers.Dropout(0.1)(h3)

    
    h4 = Nets(Get_Node_relation[0].shape[1],mapp =Get_Node_relation[0].values, name = 'h4')(drop3)
    drop4 = keras.layers.Dropout(0.1)(h4)

    Output = keras.layers.Dense(1,activation='sigmoid')(drop4)

    model = Model(inputs=S_inputs, outputs=Output)

    model.summary()

    opt = keras.optimizers.Adam(lr = 0.001) #,decay=-0.0001 ,decay=0.001
    model.compile(optimizer=opt,
                  loss='binary_crossentropy',
                  metrics=['acc'])
    return model

In [39]:
# 计算auc
# from lifelines.utils import concordance_index
import sklearn
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import average_precision_score
   

from sklearn.metrics import precision_recall_curve
def evaluates(y_test, y_pred):
    
    auc = metrics.roc_auc_score(y_test,y_pred)
    
    aupr = average_precision_score(y_test, y_pred)
    
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred)    
    auprc  = metrics.auc(recall, precision)
    
    pp = [1 if index>=0.5  else 0 for index in  y_pred ]
    
    pre = metrics.precision_score(y_test,pp)
    
    f1 = metrics.f1_score(y_test,pp)
    
    rec = metrics.recall_score(y_test,pp)
    
    acc = metrics.accuracy_score(y_test,pp)
    
    print(confusion_matrix(y_test,pp))
    
    return pre,acc,rec,f1,auc,aupr,auprc


In [59]:
#梯度信息处理

In [83]:
from deepexplain.model_utils import get_layers, plot_history, get_coef_importance

def get_coef_importances(model, X_train, y_train, target=-1, feature_importance='deepexplain_grad*input'):

    coef_ = get_coef_importance(model, X_train, y_train, target, feature_importance, detailed=False)
    return coef_

In [34]:
from keras.callbacks import LearningRateScheduler
def myScheduler(epoch):

    if epoch % 150 == 0 and epoch != 0:

        lr = K.get_value(model.optimizer.lr)

        K.set_value(model.optimizer.lr, lr * 0.5)
    return K.get_value(model.optimizer.lr)
 
myReduce_lr = LearningRateScheduler(myScheduler)
 

In [86]:
# Omics_data.columns

In [88]:
#交叉验证

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=10590) 


kfscore = []

p = 0
x = Omics_data.values
y = y.reshape(-1)
x_0 =  0.68
x_1 =  1.48

for train_index, test_index in skf.split(x, y):

    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = create_model(x)

    history = model.fit(X_train,y_train,validation_data=[X_test,y_test],epochs=120,class_weight={0:x_0,1:x_1},batch_size = 32)  #epochs=50,class_weight={0:0.5,1:1.35},batch_size = 64


    y_pred = model.predict(X_test)
    kfscore.append(evaluates(y_test, y_pred))
    results = evaluates(y_test, y_pred)
    print("results :  pre = {},acc = {},rec = {},f1 = {},auc = {},aupr = {},auprc = {}".format(round(results[0],3),round(results[1],3),round(results[2],3),round(results[3],3),round(results[4],3),round(results[5],3),round(results[6],3)))

    # feature importance
    explain_x = X_train[np.where(y_train!=0)]
    explain_y = y_train[np.where(y_train!=0)]
    coef_ = get_coef_importance(model,explain_x, explain_y, target=-1,feature_importance='deepexplain_deeplift')
    cof_values = ['inputs','h_0','h0','h1','h2','h3','h4']
    name = [np.array(Omics_data.columns),gene_pathway_df.columns,Get_Node_relation[3].index,Get_Node_relation[2].index,Get_Node_relation[1].index,Get_Node_relation[0].index,Get_Node_relation[0].columns]
#     os.mkdir('./data/coef/h{}/'.format(p))
    for i in range(0,7):
        X = pd.DataFrame()
        X['name'] = name[i]
        X['values'] = coef_[0][cof_values[i]]
        X.to_csv('./data/coef/h{}/{}.csv'.format(p,cof_values[i]),index=False,encoding='UTF-8')
    p =p+1
#avrrage
kfscores = np.array(kfscore).sum(axis= 0)/5.0
print("average value :  pre = {},acc = {},rec = {},f1 = {},auc = {},aupr = {},auprc = {}".format(round(kfscores[0],3),round(kfscores[1],3),round(kfscores[2],3),round(kfscores[3],3),round(kfscores[4],3),round(kfscores[5],3),round(kfscores[6],3)))



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 4626)              0         
_________________________________________________________________
h_0 (M_Nets)                 (None, 1542)              6168      
_________________________________________________________________
dropout_355 (Dropout)        (None, 1542)              0         
_________________________________________________________________
h0 (Nets)                    (None, 550)               4844      
_________________________________________________________________
dropout_356 (Dropout)        (None, 550)               0         
_________________________________________________________________
h1 (Nets)                    (None, 243)               1775      
_________________________________________________________________
dropout_357 (Dropout)        (None, 243)               0         
__________

Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120


Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[67  8]
 [ 7 19]]
[[67  8]
 [ 7 19]]
results :  pre = 0.704,acc = 0.851,rec = 0.731,f1 = 0.717,auc = 0.903,aupr = 0.857,auprc = 0.855
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 4626)              0         
_________________________________________________________________
h_0 (M_Nets)                 (None, 1542)              6168      
_________________________________________________________________
dropout_361 (Dropout)        (None, 1542)              0         
_________________________________________________________________
h0 (Nets)                    (None, 550)               4844      
_________________________________________________________________
dropout_3

Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120


Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[68  6]
 [ 8 19]]
[[68  6]
 [ 8 19]]
results :  pre = 0.76,acc = 0.861,rec = 0.704,f1 = 0.731,auc = 0.847,aupr = 0.768,auprc = 0.764
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 4626)              0         
_________________________________________________________________
h_0 (M_Nets)                 (None, 1542)              6168      
_________________________________________________________________
dropout_367 (Dropout)        (None, 1542)              0   

Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120


Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[56 18]
 [ 7 20]]
[[56 18]
 [ 7 20]]
results :  pre = 0.526,acc = 0.752,rec = 0.741,f1 = 0.615,auc = 0.856,aupr = 0.745,auprc = 0.742
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 4626)              0         
________________________________________________

Train on 404 samples, validate on 101 samples
Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120


Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[73  1]
 [14 13]]
[[73  1]
 [14 13]]
results :  pre = 0.929,acc = 0.851,rec = 0.481,f1 = 0.634,auc = 0.933,aupr = 0.828,auprc = 0.823
_________________________________________________________________
Layer (type

Train on 404 samples, validate on 101 samples
Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120


Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[68  6]
 [13 14]]
[[68  6]
 [13 14]]
results :  pre = 0.7,acc = 0.812,rec = 0.519,f1 = 0.596,auc = 0.892,aupr = 0.743,auprc = 0.737
average value :  pre = 0.724,acc = 0.826,rec = 0.635,f1 = 0.659,auc = 0.886,aup

In [None]:
#  average  five result
file_name = ['inputs.csv','h_0.csv','h0.csv','h1.csv','h2.csv','h3.csv','h4.csv']
os.mkdir('./data/coef/average')
for j in file_name:
    result0 =pd.DataFrame()
    for i in range(0,5):
         result  = pd.read_csv('./data/coef/h{}/{}'.format(i,j))
         result0 = result0.append(result)
    results = pd.DataFrame(result0.groupby('name')['values'].mean()).reset_index().sort_values('values',ascending=False)
    results.to_csv('./data/coef/average/{}'.format(j),index = False)
    print(results.shape)

# 单一组学

In [51]:
def create_single_model(omics_data):
    S_inputs = Input(shape=(omics_data.shape[1],), dtype='float32',name= 'inputs')
    #gene and pathway

    h0 = SparseTF(gene_pathway_df.shape[0],mapp =gene_pathway_df.values.T, name = 'h0')(S_inputs)
    drop0 = keras.layers.Dropout(0.5)(h0)

    h1 = SparseTF(Get_Node_relation[3].shape[1],mapp =Get_Node_relation[3].values ,name = 'h1')(drop0)
    drop_h1 = keras.layers.Dropout(0.1)(h1)


    h2 = SparseTF(Get_Node_relation[2].shape[1],mapp =Get_Node_relation[2].values, name = 'h2')(drop_h1)
    drop2 = keras.layers.Dropout(0.1)(h2)


    h3 = SparseTF(Get_Node_relation[1].shape[1],mapp =Get_Node_relation[1].values, name = 'h3')(drop2)
    drop3 = keras.layers.Dropout(0.1)(h3)

    
    h4 = SparseTF(Get_Node_relation[0].shape[1],mapp =Get_Node_relation[0].values, name = 'h4')(drop3)
    drop4 = keras.layers.Dropout(0.1)(h4)

    Output = keras.layers.Dense(1,activation='sigmoid')(drop4)

    model = Model(inputs=S_inputs, outputs=Output)

    model.summary()

    opt = keras.optimizers.Adam(lr = 0.001) #,decay=-0.0001
    model.compile(optimizer=opt,
                  loss='binary_crossentropy',
                  metrics=['acc'])
    return model

In [52]:
#Single omics
#snv_data; cnv_amp; cnv_del
single_snv =Omics_data.swaplevel(i=0, j=1, axis=1)['snv_data']
single_snv = single_snv.join(response,how='inner')
single_snv.shape

single_x = single_snv.values[:,0:-1]
single_y =  single_snv.values[:,-1]

In [56]:
import random
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=10590)

# random.seed(5)
# total_score = []
# for i in range(0,5):
kfscore = []
for train_index, test_index in skf.split(single_x, single_y):
    X_train, X_test = single_x[train_index], single_x[test_index]
    y_train, y_test = single_y[train_index], single_y[test_index]

    single_model = create_model(single_x)

    single_model.fit(X_train,y_train,validation_data=[X_test,y_test],epochs=120,class_weight={0:0.68,1:1.48},batch_size = 32)  #epochs=50,class_weight={0:0.5,1:1.35},batch_size = 64
    y_pred = single_model.predict(X_test)
    kfscore.append(evaluates(y_test, y_pred))
    results = evaluates(y_test, y_pred)
    print("results :  pre = {},acc = {},rec = {},f1 = {},auc = {},aupr = {},auprc = {}".format(round(results[0],3),round(results[1],3),round(results[2],3),round(results[3],3),round(results[4],3),round(results[5],3),round(results[6],3)))


#平均值
kfscores = np.array(kfscore).sum(axis= 0)/5.0
print("average value :  pre = {},acc = {},rec = {},f1 = {},auc = {},aupr = {},auprc = {}".format(round(kfscores[0],3),round(kfscores[1],3),round(kfscores[2],3),round(kfscores[3],3),round(kfscores[4],3),round(kfscores[5],3),round(kfscores[6],3)))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1542)              0         
_________________________________________________________________
h_0 (M_Nets)                 (None, 1542)              3084      
_________________________________________________________________
dropout_211 (Dropout)        (None, 1542)              0         
_________________________________________________________________
h0 (Nets)                    (None, 550)               4844      
_________________________________________________________________
dropout_212 (Dropout)        (None, 550)               0         
_________________________________________________________________
h1 (Nets)                    (None, 243)               1775      
_________________________________________________________________
dropout_213 (Dropout)        (None, 243)               0         
__________

Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120


Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[65 10]
 [ 5 21]]
[[65 10]
 [ 5 21]]
results :  pre = 0.677,acc = 0.851,rec = 0.808,f1 = 0.737,auc = 0.928,aupr = 0.826,auprc = 0.822
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1542)              0         
_________________________________________________________________
h_0 (M_Nets)                 (None, 1542)              3084      
_________________________________________________________________
dropout_217 (Dropout)        (None, 1542)              0         
_________________________________________________________________
h0 (Nets)                    (None, 550)               4844      
_________________________________________________________________
dropout_2

Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120


Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[73  1]
 [18  9]]
[[73  1]
 [18  9]]
results :  pre = 0.9,acc = 0.812,rec = 0.333,f1 = 0.486,auc = 0.896,aupr = 0.791,auprc = 0.787
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1542)              0         
_________________________________________________________________
h_0 (M_Nets)                 (None, 1542)              3084      
_________________________________________________________________
dropout_223 (Dropout)        (None, 1542)      

Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120


Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[65  9]
 [ 8 19]]
[[65  9]
 [ 8 19]]
results :  pre = 0.679,acc = 0.832,rec = 0.704,f1 = 0.691,auc = 0.896,aupr = 0.746,auprc = 0.747
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1542)              0         
________________________________________________

Train on 404 samples, validate on 101 samples
Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120


Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[66  8]
 [ 6 21]]
[[66  8]
 [ 6 21]]
results :  pre = 0.724,acc = 0.861,rec = 0.778,f1 = 0.75,auc = 0.875,aupr = 0.775,auprc = 0.772


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 1542)              0         
_________________________________________________________________
h_0 (M_Nets)                 (None, 1542)              3084      
_________________________________________________________________
dropout_235 (Dropout)        (None, 1542)              0         
_________________________________________________________________
h0 (Nets)                    (None, 550)               4844      
_________________________________________________________________
dropout_236 (Dropout)        (None, 550)               0         
_________________________________________________________________
h1 (Nets)                    (None, 243)               1775      
_________________________________________________________________
dropout_237 (Dropout)        (None, 243)               0         
__________

Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120
Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120


Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
[[70  4]
 [12 15]]
[[70  4]
 [12 15]]
results :  pre = 0.789,acc = 0.842,rec = 0.556,f1 = 0.652,auc = 0.896,aupr = 0.766,auprc = 0.764
average value :  pre = 0.754,acc = 0.84,rec = 0.636,f1 = 0.663,auc = 0.898,aupr = 0.781,auprc = 0.779


# 两种组学

In [None]:
# express_data 

In [46]:
snv_amp = Omics_data.swaplevel(i=0, j=1, axis=1)[['snv_data','cnv_amp']].swaplevel(i=0, j=1, axis=1)
snv_amp_order = snv_amp.columns.levels[0]
snv_amp = snv_amp.reindex(columns=snv_amp_order, level=0)
multi_x  = snv_amp.values

In [89]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=10590) 
kfscore = []
for train_index, test_index in skf.split(multi_x, y):

    X_train, X_test = multi_x[train_index], multi_x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = create_model(multi_x)

    model.fit(X_train,y_train,validation_data=[X_test,y_test],epochs=120,class_weight={0:0.68,1:1.48},batch_size = 32)  #epochs=50,class_weight={0:0.5,1:1.35},batch_size = 64
    y_pred = model.predict(X_test)
    kfscore.append(evaluates(y_test, y_pred))
    results = evaluates(y_test, y_pred)
    print("results :  pre = {},acc = {},rec = {},f1 = {},auc = {},aupr = {},auprc = {}".format(round(results[0],3),round(results[1],3),round(results[2],3),round(results[3],3),round(results[4],3),round(results[5],3),round(results[6],3)))

    
kfscores = np.array(kfscore).sum(axis= 0)/5.0
print("average value :  pre = {},acc = {},rec = {},f1 = {},auc = {},aupr = {},auprc = {}".format(round(kfscores[0],3),round(kfscores[1],3),round(kfscores[2],3),round(kfscores[3],3),round(kfscores[4],3),round(kfscores[5],3),round(kfscores[6],3)))

In [76]:
snv_del = Omics_data.swaplevel(i=0, j=1, axis=1)[['snv_data','cnv_del']].swaplevel(i=0, j=1, axis=1)
snv_del_order = snv_del.columns.levels[0]
snv_del = snv_del.reindex(columns=snv_del_order, level=0)

In [77]:
snv_del_x  = snv_del.values

In [None]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=10590) #,shuffle=True


total_score  = []
for i in range(0,5):
    kfscore = []
    for train_index, test_index in skf.split(snv_del_x, y):

        X_train, X_test = snv_del_x[train_index], snv_del_x[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model = create__multi_model(snv_del_x)



        model.fit(X_train,y_train,validation_data=[X_test,y_test],epochs=120,class_weight={0:0.68,1:1.48},batch_size = 32)  #epochs=50,class_weight={0:0.5,1:1.35},batch_size = 64
        y_pred = model.predict(X_test)
        kfscore.append(evaluates(y_test, y_pred))
        print(evaluates(y_test, y_pred))

        temp_pd =pd.DataFrame()
        temp_pd['sample'] = sample_pro.loc[test_index]['sample'].values

        temp_pd['values'] = y_pred

        total_pd = pd.concat([total_pd,temp_pd],axis=0)

    kfscore = np.array(kfscore).sum(axis= 0)/5.0
    total_score.append(kfscore)

In [58]:
class_weight = {0:x_0,1:x_1}
compare_models = [
    {
        'type': 'sgd',
        'id': 'L2 Logistic Regression',
        'params': {'loss': 'log', 'penalty': 'l2', 'alpha': 0.01, 'class_weight': class_weight}
    },

    {
        'type': 'svc',
        'id': 'RBF Support Vector Machine ',
        'params': {'kernel': 'rbf', 'gamma': 0.001, 'probability': True, 'class_weight': class_weight}
    },

    {
        'type': 'svc', 'id':
        'Linear Support Vector Machine ',
        'params': { 'kernel': 'linear','C': 0.1, 'probability': True, 'class_weight': class_weight}  
    },

    {
        'type': 'random_forest',
        'id': 'Random Forest',
        'params': {'max_depth': None, 'n_estimators': 50, 'bootstrap': False, 'class_weight': class_weight}
    },

    {
        'type': 'adaboost',
        'id': 'Adaptive Boosting',
        'params': {'learning_rate': 0.1, 'n_estimators': 50}
    },

    {
        'type': 'decision_tree',
        'id': 'Decision Tree',
        'params': {'min_samples_split': 10, 'max_depth': 10}
    },

]

In [66]:
#指标
def get_metrics(true_score,pre_score,pre_probe):
    
  
    fpr, tpr, thresholds = metrics.roc_curve(true_score, pre_probe, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    
    aupr = average_precision_score(true_score, pre_probe)
    
    precision1, recall1, thresholds = precision_recall_curve(true_score, pre_probe)    
    auprc  = metrics.auc(recall1, precision1)
    
    accuracy = accuracy_score(true_score,pre_score)
    
    f1 = metrics.f1_score(true_score, pre_score)
    
    precision = metrics.precision_score(true_score,pre_score)
    
    recall = metrics.recall_score(true_score,pre_score)
    
    return precision,accuracy,recall,f1,auc,aupr,auprc

In [59]:
#===============随机梯度下降法分类===============

from sklearn.linear_model import SGDClassifier

def Creat_SGD(whole_data_x,whole_data_y,train_index,test_index):
    
    model = SGDClassifier(**compare_models[0]['params'] )  #参数
    
    model.fit(whole_data_x[train_index],whole_data_y[train_index] )  # 训练模型
    
    true_score = whole_data_y[test_index] #真实标签
    
    pre_score = model.predict(whole_data_x[test_index]) #预测
    
    pre_probe = model.predict_proba(whole_data_x[test_index])[:, 1] #预测的概率
    

    precision,accuracy,recall,f1,auc,aupr,auprc = get_metrics(true_score,pre_score,pre_probe) #验证模型，获得指标
    
    return precision,accuracy,recall,f1,auc,aupr,auprc

In [60]:
# ===============随机森林分类===============

from sklearn.ensemble import RandomForestClassifier

def Creat_RDF(whole_data_x,whole_data_y,train_index,test_index):
     
    model = RandomForestClassifier(**compare_models[3]['params'])  #定义模型

    model.fit(whole_data_x[train_index],whole_data_y[train_index] )  # 训练模型
    
    true_score = whole_data_y[test_index] #真实标签
    
    pre_score = model.predict(whole_data_x[test_index]) #预测
    
    pre_probe = model.predict_proba(whole_data_x[test_index])[:, 1] #预测的概率
    

    precision,accuracy,recall,f1,auc,aupr,auprc = get_metrics(true_score,pre_score,pre_probe) #验证模型，获得指标
    
    return precision,accuracy,recall,f1,auc,aupr,auprc


In [61]:
#===============逻辑回归分类===============
from sklearn.linear_model import LogisticRegression


def Creat_LR(whole_data_x,whole_data_y,train_index,test_index):
    
    model =  LogisticRegression() 

    model.fit(whole_data_x[train_index],whole_data_y[train_index] )  # 训练模型
    
    true_score = whole_data_y[test_index] #真实标签
    
    pre_score = model.predict(whole_data_x[test_index]) #预测
    
    pre_probe = model.predict_proba(whole_data_x[test_index])[:, 1] #预测的概率
    

    precision,accuracy,recall,f1,auc,aupr,auprc = get_metrics(true_score,pre_score,pre_probe) #验证模型，获得指标
    
    return precision,accuracy,recall,f1,auc,aupr,auprc

In [62]:
#===============决策树分类===============
from sklearn import tree
 
def Creat_DTC(whole_data_x,whole_data_y,train_index,test_index):
    
    DTC_model = tree.DecisionTreeClassifier() #实例化 #max_depth = 10
    
    DTC_model.fit(whole_data_x[train_index],whole_data_y[train_index] )  # 训练模型
    
    true_score = whole_data_y[test_index] #真实标签
    
    pre_score = DTC_model.predict(whole_data_x[test_index]) #预测
    
    pre_probe = DTC_model.predict_proba(whole_data_x[test_index])[:, 1] #预测的概率
    

    precision,accuracy,recall,f1,auc,aupr,auprc = get_metrics(true_score,pre_score,pre_probe) #验证模型，获得指标
    
    return precision,accuracy,recall,f1,auc,aupr,auprc

In [63]:
#===============SVM分类===============
from sklearn.svm import SVC, NuSVC, LinearSVC

def Creat_RBFSVM(whole_data_x,whole_data_y,train_index,test_index):

    RBFSVM = NuSVC(**compare_models[1]['params'])

    RBFSVM.fit(whole_data_x[train_index],whole_data_y[train_index] )  # 训练模型
    
    true_score = whole_data_y[test_index] #真实标签
    
    pre_score = RBFSVM.predict(whole_data_x[test_index]) #预测
    
    pre_probe = RBFSVM.predict_proba(whole_data_x[test_index])[:, 1] #预测的概率
    

    precision,accuracy,recall,f1,auc,aupr,auprc = get_metrics(true_score,pre_score,pre_probe) #验证模型，获得指标
    
    return precision,accuracy,recall,f1,auc,aupr,auprc

In [64]:
from sklearn.svm import SVC, NuSVC, LinearSVC

def Creat_LinearSVC(whole_data_x,whole_data_y,train_index,test_index):

    LSVC = SVC(**compare_models[2]['params'])

    LSVC.fit(whole_data_x[train_index],whole_data_y[train_index] )  # 训练模型
    
    true_score = whole_data_y[test_index] #真实标签
    
    pre_score = LSVC.predict(whole_data_x[test_index]) #预测
    
    pre_probe = LSVC.predict_proba(whole_data_x[test_index])[:, 1] #预测的概率
    

    precision,accuracy,recall,f1,auc,aupr,auprc = get_metrics(true_score,pre_score,pre_probe) #验证模型，获得指标
    
    return precision,accuracy,recall,f1,auc,aupr,auprc

In [73]:
#Comparison algorithm five-fold cross-validation
import random
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5,shuffle=True,random_state=10590) #,random_state=10590

kfscore = []

for train_index, test_index in skf.split(x,y):
         
    score = list(Creat_DTC(x,y,train_index,test_index))
    print(score)                        
    kfscore.append(score)
                                             

kfscores = np.array(kfscore).sum(axis= 0)/5.0     
print("average value :  pre = {},acc = {},rec = {},f1 = {},auc = {},aupr = {},auprc = {}".format(round(kfscores[0],3),round(kfscores[1],3),round(kfscores[2],3),round(kfscores[3],3),round(kfscores[4],3),round(kfscores[5],3),round(kfscores[6],3)))

[0.34375, 0.6435643564356436, 0.4230769230769231, 0.3793103448275862, 0.5592307692307692, 0.29721160646284406, 0.4459100614731803]
[0.3939393939393939, 0.6633663366336634, 0.48148148148148145, 0.43333333333333324, 0.6141141141141142, 0.3409720056973018, 0.5031261296064247]
[0.5555555555555556, 0.7524752475247525, 0.37037037037037035, 0.4444444444444445, 0.6476476476476477, 0.3856196535832921, 0.5598230388340978]
[0.21212121212121213, 0.5445544554455446, 0.25925925925925924, 0.23333333333333334, 0.5007507507507507, 0.2567876628932735, 0.3597149794344514]
[0.375, 0.6732673267326733, 0.3333333333333333, 0.35294117647058826, 0.5985985985985987, 0.3219389921448285, 0.45105234207631295]
average value :  pre = 0.376,acc = 0.655,rec = 0.374,f1 = 0.369,auc = 0.584,aupr = 0.321,auprc = 0.464
