In [1]:
# Kernel = ADomics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import keras
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, Concatenate, Conv1D, GlobalMaxPool1D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers
from sklearn.decomposition import PCA
import optuna 
from optuna.samplers import TPESampler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

seed = 123
np.random.seed(seed)
tf.random.set_seed(seed)
keras.utils.set_random_seed(seed)

2024-02-18 01:18:28.117739: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-18 01:18:28.423369: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-18 01:18:28.423424: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-18 01:18:28.452798: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-18 01:18:28.535083: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-18 01:18:28.537275: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
def get_train_test(df):
    train_df = df[df['Split']==1].drop('Split', axis=1)
    test_df = df[df['Split']==0].drop('Split', axis=1)
    y_train = train_df['Label']
    y_test = test_df['Label']
    return train_df.drop('Label', axis=1), test_df.drop('Label', axis=1), y_train, y_test


In [3]:
def correlation(dataset, threshold):
    col_corr = set()  # Set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
                colname = corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    return col_corr

In [4]:
BASE_PATH = '../data/ROSMAP/'
methy_path = os.path.join(BASE_PATH, 'methy.csv')
mirna_path = os.path.join(BASE_PATH, 'mirna.csv')
mrna_path = os.path.join(BASE_PATH, 'mrna.csv')
if not os.path.exists(methy_path) or not os.path.exists(mirna_path) or not os.path.exists(mrna_path):
    raise Exception('File not found')

methy_df = pd.read_csv(methy_path, index_col=0)
mirna_df = pd.read_csv(mirna_path, index_col=0)
mrna_df = pd.read_csv(mrna_path, index_col=0)

methy_train_df, methy_test_df, methy_y_train, methy_y_test = get_train_test(methy_df)
mirna_train_df, mirna_test_df, mirna_y_train, mirna_y_test = get_train_test(mirna_df)
mrna_train_df, mrna_test_df, mrna_y_train, mrna_y_test = get_train_test(mrna_df)

methy_corr_features = correlation(methy_train_df, 0.85)
print('methy correlated features: ', len(methy_corr_features))
mirna_corr_features = correlation(mirna_train_df, 0.85)
print('mirna correlated features: ', len(mirna_corr_features))
mrna_corr_features = correlation(mrna_train_df, 0.85)
print('mrna correlated features: ', len(mrna_corr_features))

methy_train_df.drop(methy_corr_features, axis=1, inplace=True)
mirna_train_df.drop(mirna_corr_features, axis=1, inplace=True)
mrna_train_df.drop(mrna_corr_features, axis=1, inplace=True)

methy_test_df.drop(methy_corr_features, axis=1, inplace=True)
mirna_test_df.drop(mirna_corr_features, axis=1, inplace=True)
mrna_test_df.drop(mrna_corr_features, axis=1, inplace=True)

scaler_methy = StandardScaler()
scaler_mirna = StandardScaler()
scaler_mrna = StandardScaler()

methy_train_scaled = scaler_methy.fit_transform(methy_train_df)
methy_test_scaled = scaler_methy.transform(methy_test_df)

mirna_train_scaled = scaler_mirna.fit_transform(mirna_train_df)
mirna_test_scaled = scaler_mirna.transform(mirna_test_df)

mrna_train_scaled = scaler_mrna.fit_transform(mrna_train_df)
mrna_test_scaled = scaler_mrna.transform(mrna_test_df)

methy correlated features:  18
mirna correlated features:  6
mrna correlated features:  0


In [5]:
def create_branch(input_layer):
    dense_methy = Dense(128, activation='relu', kernel_regularizer=regularizers.l1(0.001))(input_layer)
    dense_methy = Dropout(0.5)(dense_methy)
    dense_methy = Dense(64, activation='relu', kernel_regularizer=regularizers.l1(0.001))(dense_methy)
    dense_methy = Dropout(0.3)(dense_methy)
    dense_methy = Dense(32, activation='relu', kernel_regularizer=regularizers.l1(0.001))(dense_methy)
    dense_methy = Dropout(0.2)(dense_methy)
    return dense_methy

In [6]:
all_trials_df_path = "/home/dhakal/MoBI/media/new_trials.csv"
all_trials_df = pd.read_csv(all_trials_df_path)
all_trials_df.head()

Unnamed: 0.1,Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_learning_rate,params_merged_1_dropout,params_merged_1_size,params_merged_2_dropout,params_merged_2_size,params_merged_3_dropout,params_merged_3_size,state
0,0,0,0.518868,2024-01-31 12:56:04.730488,2024-01-31 12:56:29.358266,0 days 00:00:24.627778,0.083764,0.320526,198,0.387788,59,0.269243,27,COMPLETE
1,1,1,0.801887,2024-01-31 12:56:29.359104,2024-01-31 12:57:10.503498,0 days 00:00:41.144394,1.7e-05,0.237271,196,0.39162,78,0.275429,35,COMPLETE
2,2,2,0.764151,2024-01-31 12:57:10.504533,2024-01-31 12:57:25.629094,0 days 00:00:15.124561,0.003448,0.170181,140,0.312621,103,0.312731,24,COMPLETE
3,3,3,0.754717,2024-01-31 12:57:25.630214,2024-01-31 12:58:07.609649,0 days 00:00:41.979435,8.2e-05,0.388977,227,0.229184,102,0.244715,45,COMPLETE
4,4,4,0.773585,2024-01-31 12:58:07.610695,2024-01-31 12:58:27.567769,0 days 00:00:19.957074,0.000505,0.27348,120,0.272345,93,0.297474,20,COMPLETE


In [9]:
# get the column where values is highest 
best_trial = all_trials_df.loc[all_trials_df['value'].idxmax()]
print(best_trial)

Unnamed: 0                                        869
number                                            869
value                                        0.877358
datetime_start             2024-01-31 20:50:24.486755
datetime_complete          2024-01-31 20:50:35.066971
duration                       0 days 00:00:10.580216
params_learning_rate                         0.016711
params_merged_1_dropout                      0.308161
params_merged_1_size                              215
params_merged_2_dropout                      0.184207
params_merged_2_size                               76
params_merged_3_dropout                      0.145381
params_merged_3_size                               64
state                                        COMPLETE
Name: 869, dtype: object


In [10]:
def get_model(trial):
    merged_1_size = trial.params_merged_1_size
    merged_2_size = trial.params_merged_2_size
    merged_3_size = trial.params_merged_3_size
    merged_1_dropout = trial.params_merged_1_dropout
    merged_2_dropout = trial.params_merged_2_dropout
    merged_3_dropout = trial.params_merged_3_dropout
    learning_rate = trial.params_learning_rate


    input_methy = Input(shape=(methy_train_scaled.shape[1],), name='methy')
    input_mirna = Input(shape=(mirna_train_scaled.shape[1],), name='mirna')
    input_mrna = Input(shape=(mrna_train_scaled.shape[1],), name='mrna')
    methy_branch = create_branch(input_methy)
    mirna_branch = create_branch(input_mirna)
    mrna_branch = create_branch(input_mrna)
    merged = Concatenate()([methy_branch, mirna_branch, mrna_branch])
    merged_dense = Dense(merged_1_size, activation='relu', kernel_regularizer=regularizers.l1(0.001),name = "merged_1" )(merged)
    merged_dense = Dropout(merged_1_dropout)(merged_dense)
    merged_dense = Dense(merged_2_size, activation='relu', kernel_regularizer=regularizers.l1(0.001), name = "merged_2")(merged_dense)
    merged_dense = Dropout(merged_2_dropout)(merged_dense)
    merged_dense = Dense(merged_3_size, activation='relu', kernel_regularizer=regularizers.l1(0.001), name = "merged_3")(merged_dense)
    merged_dense = Dropout(merged_3_dropout)(merged_dense)
    # merged_dense = Dense(16, activation='relu', kernel_regularizer=regularizers.l1(0.001))(merged_dense)
    # merged_dense = Dropout(0.2)(merged_dense)

    output = Dense(1, activation='sigmoid', name = "merged_out")(merged_dense)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model = Model(inputs=[input_methy, input_mirna, input_mrna], outputs=output)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [11]:
model = get_model(best_trial)

2024-02-18 01:22:44.615218: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [12]:
weights_path = "/home/dhakal/MoBI/media/best_model.h5"
model.load_weights(weights_path)

In [13]:
y_pred = model.predict([methy_test_scaled, mirna_test_scaled, mrna_test_scaled])
y_pred = np.where(y_pred > 0.5, 1, 0)
acc = accuracy_score(methy_y_test, y_pred)
print(acc)

0.8773584905660378
