In [6]:
## IMPORT LIBRARIES
# General libraries
import random
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import seaborn as sns
import pandas as pd
import os
from natsort import natsorted
import sys
import re
from matplotlib import gridspec
import math
from collections import Counter

# Tensorflow / Keras
import tensorflow as tf
from tensorflow import keras # for building Neural Networks
print('Tensorflow/Keras: %s' % keras.__version__) # print version
from tensorflow.keras.models import Sequential # for creating a linear stack of layers for our Neural Network
import tensorflow.python.keras.metrics 
from tensorflow.keras import Input # for instantiating a keras tensor
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dense, Activation, Dropout # for creating regular densely-connected NN layers.
from tensorflow.keras.callbacks import History
import keras_tuner as kt 

# Data manipulation
import pandas as pd # for data manipulation
print('pandas: %s' % pd.__version__) # print version
import numpy as np # for data manipulation
print('numpy: %s' % np.__version__) # print version
from imblearn.under_sampling import RandomUnderSampler

# Sklearn
import sklearn # for model evaluation
print('sklearn: %s' % sklearn.__version__) # print version
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix,plot_confusion_matrix
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif , chi2

# Visualization
import plotly 
import plotly.express as px
import plotly.graph_objects as go
print('plotly: %s' % plotly.__version__) # print version
#from pyts.image import RecurrencePlot
#from PIL import Image
#from matplotlib import cm
#from ripser import Rips
#from persim import PersImage
#from persim import PersistenceImager
%matplotlib inline

# Feature importance
import shap
import eli5
from eli5.sklearn import PermutationImportance

# prepare target
def prepare_targets(y_train, y_test):
    le = LabelEncoder();
    le.fit(y_train)
    y_train_enc = le.transform(y_train)
    y_test_enc = le.transform(y_test)
    return y_train_enc, y_test_enc

##seed for reproducibility
seed_value = 1
# Set the `python` built-in pseudo-random generator at a fixed value
random.seed(seed_value)

# Set the `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)

# Set the `tensorflow` pseudo-random generator at a fixed value
tf.random.set_seed(seed_value)

Tensorflow/Keras: 2.9.0
pandas: 1.4.4
numpy: 1.22.3
sklearn: 0.23.2
plotly: 5.11.0


In [7]:
dev_num_all = [1,2,3,4,5,6,7]
conditions = ['Control_100mV','CTPR16_100mV']
#dev_num_test =  [6,7,8]

##### Select data for modeling

print(f'dev: {dev_num_all}, classes : {conditions}')
full_data = pd.concat(map(pd.read_csv,[f'./dev_{dev_num_all[0]}_{conditions[0]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[0]}_{conditions[1]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[1]}_{conditions[0]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[1]}_{conditions[1]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[2]}_{conditions[0]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[2]}_{conditions[1]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[3]}_{conditions[0]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[3]}_{conditions[1]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[4]}_{conditions[0]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[4]}_{conditions[1]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[5]}_{conditions[0]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[5]}_{conditions[1]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[6]}_{conditions[0]}_all_extracted_features.csv',
                                       f'./dev_{dev_num_all[6]}_{conditions[1]}_all_extracted_features.csv',
                                       #f'./dev_{dev_num_all[7]}_{conditions[0]}_all_extracted_features.csv',
                                       #f'./dev_{dev_num_all[7]}_{conditions[1]}_all_extracted_features.csv'
                                       #f'./dev_{dev_num_train[8]}_{conditions[0]}_all_extracted_features.csv',
                                       #f'./dev_{dev_num_train[8]}_{conditions[1]}_all_extracted_features.csv'
                                       ]),ignore_index=True).sample(frac=1,random_state=seed_value)

print(len(full_data))

nan_cols = [i for i in full_data.columns if full_data[i].isnull().any()]

print(len(nan_cols))

##### Create training and testing samples
full_data_wo_nan = full_data.drop(nan_cols,axis=1)

X = full_data_wo_nan.drop(['label','dev_label_id'],axis=1).values

y = full_data_wo_nan['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

print('X TRAIN',X_train.shape)
print('X TEST',X_test.shape)

print('Y TRAIN',y_train.shape)
print('Y TEST',y_test.shape)

##### Data Scaling

scaler = MinMaxScaler()  #StandardScaler, MinMaxScaler
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled_df = pd.DataFrame(X_train_scaled) #, columns = X_train.columns.tolist())
X_test_scaled_df = pd.DataFrame(X_test_scaled) #, columns = X_train.columns.tolist())
#print(X_train_scaled_df.head())

# prepare target labels as 0 or 1
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)
num_top_feat = 50

# Define feature selection
fs = SelectKBest(score_func=chi2, k=num_top_feat) 
### apply feature selection on scaled data
X_train_selected = fs.fit_transform(X_train_scaled, y_train_enc)
#print('X_train_selected shape :', X_train_selected.shape)
##X_test_selected = fs.transform(X_test_scaled)
##print(X_test_selected.shape)

# Get columns to keep and create new dataframe with those only
cols = fs.get_support(indices=True)
X_train_selected_df = X_train_scaled_df.iloc[:,cols]
X_test_selected_df = X_test_scaled_df.iloc[:,cols]
#print(X_train_selected_df.head())
    
#pca = PCA(n_components=10)
#train_pca = pca.fit_transform(X_train_scaled)
#test_pca = pca.transform(X_test_scaled)
#X_train_pca = pd.DataFrame(data = train_pca, columns = ['pc1','pc2','pc3','pc4','pc5','pc6','pc7','pc8','pc9','pc10'])
#X_test_pca = pd.DataFrame(data = test_pca, columns = ['pc1','pc2','pc3','pc4','pc5','pc6','pc7','pc8','pc9','pc10'])
#
#print('X TRAIN PCA',X_train_pca.info)
#print('X TEST PCA',X_test_pca.info)

dev: [1, 2, 3, 4, 5, 6, 7], classes : ['Control_100mV', 'CTPR16_100mV']
24090
6
X TRAIN (19272, 777)
X TEST (4818, 777)
Y TRAIN (19272,)
Y TEST (4818,)


In [None]:
#def lr_step_decay(epoch, lr):
#    drop_rate = 0.1
#    epochs_drop = 20.0
#    return initial_learning_rate * math.pow(drop_rate, math.floor(epoch/epochs_drop))

In [8]:
class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        model = Sequential()
        # Tune the number of units in the first Dense layer
        # Choose an optimal value between 256-1024
        hp_units = hp.Int('units', min_value=256, max_value=1024, step=32)
        model.add(Input(shape=(50,),name="Input-layer")) #777
        model.add(Dense(units=hp_units, activation='relu',name="layer1"))
        model.add(Dense(units=hp_units, activation='relu',name="layer2"))
        model.add(Dense(units=hp_units, activation='relu',name="layer3"))
        model.add(Dense(1, activation='sigmoid', name='Output-Layer'))
      
        # Tune the learning rate for the optimizer
        # Choose an optimal value from 0.01, 0.001, or 0.0001
        hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 5e-3,1e-3])
      
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                      loss='binary_crossentropy',
                      metrics=['Accuracy', 'Precision', 'Recall'])
        return model
    # Tune the epoch and batch size for the fit
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            batch_size = hp.Int('batch_size', 32, 256, step=32),
            epochs = hp.Int('epochs', 10, 50, step = 10),
            **kwargs)

tuner = kt.BayesianOptimization(MyHyperModel(), objective="val_Accuracy", max_trials=5,seed=seed_value, overwrite=True, 
                                directory=os.path.normpath('C:/Users/goura/Desktop'), project_name="hp_tune_mm_1")
##use short path names to save the project in dir

tuner.search(X_train_selected_df, y_train_enc,validation_split=0.2)

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in each densely-connected layer is 
{best_hps.get('units')}, the optimal learning rate for the optimizer is {best_hps.get('learning_rate')}, the 
best number of epochs is {best_hps.get('epochs')} and the best batch size is {best_hps.get('batch_size')}.
""")

Trial 5 Complete [00h 02m 19s]
val_Accuracy: 0.9613488912582397

Best val_Accuracy So Far: 0.9693903923034668
Total elapsed time: 00h 06m 38s
INFO:tensorflow:Oracle triggered exit

The hyperparameter search is complete. The optimal number of units in each densely-connected layer is 
1024, the optimal learning rate for the optimizer is 0.001, the 
best number of epochs is 50 and the best batch size is 256.



In [9]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_selected_df,y_train_enc,validation_split=0.2)



In [10]:
val_acc_per_epoch = history.history['val_Accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Best epoch: 1


In [11]:
hypermodel = tuner.hypermodel.build(best_hps)
# Retrain the model
hypermodel.fit(X_train_selected_df, y_train_enc,epochs=best_epoch, validation_split=0.2)



<keras.callbacks.History at 0x170a72d43a0>

In [12]:
eval_result = hypermodel.evaluate(X_test_selected_df, y_test_enc)
print("[test loss, test accuracy,test precision, test recall]:", eval_result)

[test loss, test accuracy,test precision, test recall]: [0.3259658217430115, 0.8686177134513855, 0.8318732976913452, 0.9485209584236145]
