## Architecture

In [None]:
from keras.models import load_model
CBLANE = load_model("/content/CBLANE_global_dataset.keras")

In [None]:
from tensorflow.keras.layers import Input, Dense, Conv1D, BatchNormalization, LSTM, Flatten, MultiHeadAttention, MaxPooling1D
from tensorflow.keras.layers import PReLU, SpatialDropout1D, Bidirectional, Multiply
from tensorflow.keras.layers import ZeroPadding1D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.models import Model

input_shape_conv = (101, 4)
input_layer = Input(shape=input_shape_conv, name='Input_Layer')
padding_layer = ZeroPadding1D(padding=3, input_shape=(101,4),name="zero_padding_layer")(input_layer)
convolution_layer_1 = Conv1D(filters = 256,kernel_size=8,padding="valid",name='Conv_0')(padding_layer)
convolution_layer_1 = PReLU(name='PReLU_0')(convolution_layer_1)
convolution_layer_1 = SpatialDropout1D(0.01, name='SpatialDropout_0')(convolution_layer_1)
convolution_layer_1 = MaxPooling1D(pool_size=1, name='MaxPooling_0')(convolution_layer_1)
convolution_layer_1 = BatchNormalization(name='BatchNormalization_0')(convolution_layer_1)

Convolutional_Block_0 = Sequential([Model(inputs=input_layer,outputs=convolution_layer_1)],name='Convolutional_Block_0')

convolution_layer_2 = Conv1D(filters=128, kernel_size=4, padding="same", name='Conv_1')(convolution_layer_1)
convolution_layer_2 = PReLU(name='PReLU_1')(convolution_layer_2)
convolution_layer_2 = SpatialDropout1D(0.01, name='SpatialDropout_1')(convolution_layer_2)
convolution_layer_2 = MaxPooling1D(pool_size=1, name='MaxPooling_1')(convolution_layer_2)
convolution_layer_2 = BatchNormalization(name='BatchNormalization_1')(convolution_layer_2)

Convolutional_Block_1 = Sequential([Model(inputs=convolution_layer_1,outputs=convolution_layer_2)],name='Convolutional_Block_1')

convolution_layer_3 = Conv1D(filters=64, kernel_size=2, padding="same", name='Conv_2')(convolution_layer_2)
convolution_layer_3 = PReLU(name='PReLU_2')(convolution_layer_3)
convolution_layer_3 = SpatialDropout1D(0.01, name='SpatialDropout_2')(convolution_layer_3)
convolution_layer_3 = MaxPooling1D(pool_size=2, name='MaxPooling_2')(convolution_layer_3)
convolution_layer_3 = BatchNormalization(name='BatchNormalization_2')(convolution_layer_3)

Convolutional_Block_2 = Sequential([Model(inputs=convolution_layer_2,outputs=convolution_layer_3)],name='Convolutional_Block_2')

convolution_layer_4 = Conv1D(filters=64, kernel_size=2, padding="same", name='Conv_3')(convolution_layer_3)
convolution_layer_4 = PReLU(name='PReLU_3')(convolution_layer_4)
convolution_layer_4 = SpatialDropout1D(0.01, name='SpatialDropout_3')(convolution_layer_4)
convolution_layer_4 = MaxPooling1D(pool_size=2, name='MaxPooling_3')(convolution_layer_4)
convolution_layer_4 = BatchNormalization(name='BatchNormalization_3')(convolution_layer_4)

Convolutional_Block_3 = Sequential([Model(inputs=convolution_layer_3,outputs=convolution_layer_4)],name='Convolutional_Block_3')

Convolutional_Block = Sequential([Convolutional_Block_0,Convolutional_Block_1,Convolutional_Block_2,Convolutional_Block_3],name='Convolutional_Block')

Query = Conv1D(filters=64, padding="same", kernel_size=8, name=f'Query')(convolution_layer_4)

heads = 8
self_attention_layer,attention_scores = MultiHeadAttention(num_heads=heads,key_dim=64,name=f'MultiHeadAttention')(query=Query ,value=convolution_layer_4,return_attention_scores=True)

self_attention_layer = Multiply()([self_attention_layer,convolution_layer_4])

Attention_Block = Sequential([Model(inputs=convolution_layer_4,outputs=self_attention_layer)],name='Attention_Layer')
Attention_scores = Sequential([Model(inputs=input_layer,outputs=attention_scores)],name='Attention_scores')

bilstm_layer = Bidirectional(LSTM(64, return_sequences=True), merge_mode="sum", name='Bidirectional_LSTM')(self_attention_layer)
lstm_layer = LSTM(64, dropout=0.1, name='LSTM')(bilstm_layer)

Recurrent_Block = Sequential([Model(inputs=convolution_layer_4,outputs=lstm_layer)],name='Recurrent_Block')
Ensemble = Model(inputs=convolution_layer_4,outputs=lstm_layer)
Encoder = Sequential([Convolutional_Block,Ensemble],name='CEBLANE')
output_layer = Dense(1, activation="sigmoid", name='finalOutput')(Flatten(name='flattenOutput')(lstm_layer))

Output_block = Sequential([Model(inputs=lstm_layer,outputs=output_layer)],name='Output_block')
CBLANE = Sequential([Encoder,Output_block],name='CBLANE')

In [None]:
CBLANE.summary(expand_nested=True,show_trainable=True)

In [None]:
from keras.utils import plot_model
plot_model(CBLANE,
           to_file="Horizontal_model.png",
           show_shapes=True,
           show_dtype=True,
           show_layer_names=True,
           rankdir="LR",
           expand_nested=True,
           dpi=300,
           show_layer_activations=True,
           show_trainable=True,
)

In [None]:
from keras.utils import plot_model
plot_model(CBLANE,
           to_file="Vertical_model.png",
           show_shapes=True,
           show_dtype=True,
           show_layer_names=True,
           rankdir="TB",
           expand_nested=True,
           dpi=300,
           show_layer_activations=True,
           show_trainable=True,
)

# Training

In [None]:
import numpy as np
def save_or_load_numpy(option,file,labels=None,sequences=None):
  if option=="save":
    np.savez(file,labels = labels,sequences = sequences)
    return None,None
  if option == "load":
    loaded_array = np.load(file)
    sequences = loaded_array['sequences']
    labels = loaded_array['labels']
    return sequences,labels

train_labels,train_features = save_or_load_numpy("load","train.npz")
test_labels,test_features = save_or_load_numpy("load","test.npz")
validation_labels,validation_features = save_or_load_numpy("load","validation.npz")

In [None]:
CBLANE.compile(loss='binary_crossentropy',
                             optimizer=Adam(learning_rate=0.001),
                             metrics=[BinaryAccuracy(),
                                      Precision(),
                                      Recall(),
                                      AUC(),
                                      SensitivityAtSpecificity(0.5),
                                      SpecificityAtSensitivity(0.5),
                                      ]
                             )
history = CBLANE.fit(tf.constant(train_features,dtype=tf.bool),
                                   tf.constant(train_labels,dtype=tf.bool),
                                   batch_size=4096,
                                   epochs=20,
                                   verbose=1,
                                   validation_data=(tf.constant(validation_features,dtype = tf.bool),
                                                    tf.constant(validation_labels,dtype = tf.bool)),
                                   callbacks=([SaveSubModels()]),
                                  validation_batch_size=4096)

In [None]:
CBLANE.evaluate(test_features,test_labels,batch_size=4096)

# Metrics


In [None]:
test_prob = CBLANE.predict(test_features,batch_size=4096)
test_pred = (test_prob>0.5).astype(int)

In [None]:
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score,
    matthews_corrcoef, cohen_kappa_score,
    hamming_loss, roc_auc_score,brier_score_loss, jaccard_score,
    average_precision_score,
)
# Calculate evaluation metrics
accuracy = accuracy_score(test_labels, test_pred)  # Accuracy
f1 = f1_score(test_labels, test_pred)  # F1 Score
precision = precision_score(test_labels, test_pred)  # Precision
recall = recall_score(test_labels, test_pred)  # Recall
matthews_corr = matthews_corrcoef(test_labels, test_pred)  # Matthews Correlation Coefficient
hamming = hamming_loss(test_labels, test_pred)  # Hamming Loss
auc = roc_auc_score(test_labels, test_prob)  # ROC AUC Score
auc_pr = average_precision_score(test_labels, test_prob) # PR AUC Score
brier_score = brier_score_loss(test_labels, test_prob) # Brier Score Loss
jacc_score = jaccard_score(test_labels,test_pred) # Jaccard Score

# Print evaluation metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'F1-score: {f1:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'Matthews Correlation Coefficient: {matthews_corr:.4f}')
print(f'Hamming Loss: {hamming:.4f}')
print(f'ROC AUC: {auc:.4f}')
print(f'PR AUC: {auc_pr:.4f}')
print(f'Brier Score Loss: {brier_score:.4f}')
print(f'Jaccard Score: {jacc_score:.4f}')

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Display confusion matrix
confusion_mat = confusion_matrix(test_labels, test_pred)

#Convert to percentage labels
row_sums = confusion_mat.sum()
confusion_mat_percentage = confusion_mat / row_sums * 100

# Display the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mat_percentage, annot=True, fmt=".2f", cmap="Blues", cbar=False)
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

In [None]:
import plotly.express as px
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score
import numpy as np

validation_prob = CBLANE.predict(validation_features,batch_size=1024)
validation_pred = (validation_prob > 0.5).astype(int)

# Calculate metrics for testing and validation sets
metrics = {
    'Accuracy': [accuracy_score(test_labels, test_pred), accuracy_score(validation_labels, validation_pred)],
    'Precision': [precision_score(test_labels, test_pred), precision_score(validation_labels, validation_pred)],
    'Recall': [recall_score(test_labels, test_pred), recall_score(validation_labels, validation_pred)],
    'F1-Score': [f1_score(test_labels, test_pred), f1_score(validation_labels, validation_pred)],
    'ROC AUC': [roc_auc_score(test_labels, test_prob), roc_auc_score(validation_labels, validation_prob)],
}

# Create a DataFrame for Plotly bar graph
data = {
    'Metric': [],
    'Set': [],
    'Score': [],
}

for metric, scores in metrics.items():
    data['Metric'].extend([metric] * 2)
    data['Set'].extend(['Testing', 'Validation'])
    data['Score'].extend(scores)

# Create bar graph using Plotly Express with grouped bars
fig_bar = px.bar(
    data, x='Metric', y='Score', color='Set', barmode='group',
    labels={'Metric': 'Metrics', 'Score': 'Score'}
)
fig_bar.update_layout(title='Metric Scores for Testing and Validation Sets',
                      xaxis={'categoryorder': 'total descending'},
                      yaxis=dict(range=[0, 1]),  # Set y-axis range from 0 to 1
                      width=1000,
                      height=600)
fig_bar.update_yaxes(type='linear')

# Show the bar graph
fig_bar.show()

# Save the bar graph as an HTML file
fig_bar.write_html("metric_scores.html")

In [None]:
from sklearn.metrics import precision_recall_curve, auc, roc_curve

fpr, tpr, thresholds = roc_curve(test_labels, test_prob)
precision, recall, thresholds = precision_recall_curve(test_labels, test_prob)

fig = px.area(
    x=recall, y=precision,
    title=f'Precision-Recall Curve (AUC={auc(fpr, tpr):.4f})',
    labels=dict(x='Recall', y='Precision'),
    width=700, height=700
)
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=1, y1=0
)
fig.update_yaxes(scaleanchor="x", scaleratio=1)
fig.update_xaxes(constrain='domain')

fig.show()

fig.write_html("PR.html", auto_open=False)

In [None]:
fig = px.area(
    x=fpr, y=tpr,
    title=f'ROC Curve (AUC={auc(fpr, tpr):.4f})',
    labels=dict(x='False Positive Rate', y='True Positive Rate'),
    width=700, height=700
)
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

fig.update_yaxes(scaleanchor="x", scaleratio=1)
fig.update_xaxes(constrain='domain')
fig.show()
fig.write_html("ROC.html", auto_open=False)

# 4 Cell Line Dataset

## A549 Cell Line Dataset

In [4]:
import numpy as np

# Load the data
loaded_data = np.load('a549.npz')

# Extract the arrays
train_sequences = loaded_data['train_sequences']
train_labels = loaded_data['train_labels']
test_sequences = loaded_data['test_sequences']
test_labels = loaded_data['test_labels']
validation_sequences = loaded_data['validation_sequences']
validation_labels = loaded_data['validation_labels']

# Optionally, you can print the shapes of the loaded arrays
print("Train Sequences Shape:", train_sequences.shape)
print("Train Labels Shape:", train_labels.shape)
print("Test Sequences Shape:", test_sequences.shape)
print("Test Labels Shape:", test_labels.shape)
print("Validation Sequences Shape:", validation_sequences.shape)
print("Validation Labels Shape:", validation_labels.shape)

Train Sequences Shape: (735370, 101, 4)
Train Labels Shape: (735370,)
Test Sequences Shape: (229822, 101, 4)
Test Labels Shape: (229822,)
Validation Sequences Shape: (183842, 101, 4)
Validation Labels Shape: (183842,)


In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.callbacks import Callback
import os
from keras.models import save_model
reduce_lr = ReduceLROnPlateau(monitor='val_loss',mode="min", factor=0.1, patience=2, min_lr=1e-20)
early_stop = EarlyStopping(monitor='val_loss',mode="min", patience=10, restore_best_weights=True)

class SaveSubModels(Callback):
    def on_epoch_end(self, epoch, logs=None):
        model_directories = [f'model/{epoch}/CBLANE_A549.keras']

        for directory in set(os.path.dirname(model_path) for model_path in model_directories):
            if not os.path.exists(directory):
                os.makedirs(directory)

        save_model(CBLANE, model_directories[0])

In [None]:
from keras.metrics import BinaryAccuracy, Precision, Recall, AUC, SensitivityAtSpecificity, SpecificityAtSensitivity
from keras.optimizers import Adam
import tensorflow as tf

CBLANE = load_model("CBLANE_global_dataset.keras")

CBLANE.compile(loss='binary_crossentropy',
                             optimizer=Adam(learning_rate=0.0001),
                             metrics=[BinaryAccuracy(),
                                      Precision(),
                                      Recall(),
                                      AUC(),
                                      SensitivityAtSpecificity(0.5),
                                      SpecificityAtSensitivity(0.5),
                                      ]
                             )
history = CBLANE.fit(tf.constant(train_sequences,dtype=tf.bool),
                                   tf.constant(train_labels,dtype=tf.bool),
                                   batch_size=128,
                                   epochs=20,
                                   verbose=1,
                                   validation_data=(tf.constant(validation_sequences,dtype = tf.bool),
                                                    tf.constant(validation_labels,dtype = tf.bool)),
                                  validation_batch_size=4096,
                                  callbacks=[reduce_lr,
                                             SaveSubModels()])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [5]:
from keras.models import load_model
CBLANE = load_model("/content/global_A549.keras")
CBLANE.evaluate(test_sequences,test_labels,batch_size=4096)



[0.2939949035644531,
 0.8760606050491333,
 0.891639232635498,
 0.8558421730995178,
 0.9472177624702454,
 0.9815119504928589,
 0.9939241409301758]

## Huvec Cell Line Dataset

In [6]:
import numpy as np

# Load the data
loaded_data = np.load('huvec.npz')

# Extract the arrays
train_sequences = loaded_data['train_sequences']
train_labels = loaded_data['train_labels']
test_sequences = loaded_data['test_sequences']
test_labels = loaded_data['test_labels']
validation_sequences = loaded_data['validation_sequences']
validation_labels = loaded_data['validation_labels']

# Optionally, you can print the shapes of the loaded arrays
print("Train Sequences Shape:", train_sequences.shape)
print("Train Labels Shape:", train_labels.shape)
print("Test Sequences Shape:", test_sequences.shape)
print("Test Labels Shape:", test_labels.shape)
print("Validation Sequences Shape:", validation_sequences.shape)
print("Validation Labels Shape:", validation_labels.shape)

Train Sequences Shape: (409395, 101, 4)
Train Labels Shape: (409395,)
Test Sequences Shape: (127941, 101, 4)
Test Labels Shape: (127941,)
Validation Sequences Shape: (102348, 101, 4)
Validation Labels Shape: (102348,)


In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.callbacks import Callback
import os
from keras.models import save_model
reduce_lr = ReduceLROnPlateau(monitor='val_loss',mode="min", factor=0.1, patience=2, min_lr=1e-20)
early_stop = EarlyStopping(monitor='val_loss',mode="min", patience=10, restore_best_weights=True)

class SaveSubModels(Callback):
    def on_epoch_end(self, epoch, logs=None):
        model_directories = [f'model/{epoch}/CBLANE_A549.keras']

        for directory in set(os.path.dirname(model_path) for model_path in model_directories):
            if not os.path.exists(directory):
                os.makedirs(directory)

        save_model(CBLANE, model_directories[0])

In [None]:
from keras.metrics import BinaryAccuracy, Precision, Recall, AUC, SensitivityAtSpecificity, SpecificityAtSensitivity
from keras.optimizers import Adam
import tensorflow as tf
from keras.models import load_model
CBLANE = load_model("CBLANE_global_dataset.keras")
CBLANE.compile(loss='binary_crossentropy',
                             optimizer=Adam(learning_rate=0.0001),
                             metrics=[BinaryAccuracy(),
                                      Precision(),
                                      Recall(),
                                      AUC(),
                                      SensitivityAtSpecificity(0.5),
                                      SpecificityAtSensitivity(0.5),
                                      ]
                             )
history = CBLANE.fit(tf.constant(train_sequences,dtype=tf.bool),
                                   tf.constant(train_labels,dtype=tf.bool),
                                   batch_size=128,
                                   epochs=20,
                                   verbose=1,
                                   validation_data=(tf.constant(validation_sequences,dtype = tf.bool),
                                                    tf.constant(validation_labels,dtype = tf.bool)),
                                  validation_batch_size=4096,
                                  callbacks=[reduce_lr,SaveSubModels()],
                     )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:
from keras.models import load_model
CBLANE = load_model("/content/huvecglobal.keras")
CBLANE.evaluate(test_sequences,test_labels,batch_size=4096)



[0.28220033645629883,
 0.8813672065734863,
 0.8923183679580688,
 0.8671590089797974,
 0.9504679441452026,
 0.9817558526992798,
 0.9950023293495178]

## MCF7 Cell Line Dataset

In [11]:
import numpy as np

# Load the data
loaded_data = np.load('mf7.npz')

# Extract the arrays
train_sequences = loaded_data['train_sequences']
train_labels = loaded_data['train_labels']
test_sequences = loaded_data['test_sequences']
test_labels = loaded_data['test_labels']
validation_sequences = loaded_data['validation_sequences']
validation_labels = loaded_data['validation_labels']

# Optionally, you can print the shapes of the loaded arrays
print("Train Sequences Shape:", train_sequences.shape)
print("Train Labels Shape:", train_labels.shape)
print("Test Sequences Shape:", test_sequences.shape)
print("Test Labels Shape:", test_labels.shape)
print("Validation Sequences Shape:", validation_sequences.shape)
print("Validation Labels Shape:", validation_labels.shape)

Train Sequences Shape: (694552, 101, 4)
Train Labels Shape: (694552,)
Test Sequences Shape: (217057, 101, 4)
Test Labels Shape: (217057,)
Validation Sequences Shape: (173637, 101, 4)
Validation Labels Shape: (173637,)


In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.callbacks import Callback
import os
from keras.models import save_model
reduce_lr = ReduceLROnPlateau(monitor='val_loss',mode="min", factor=0.1, patience=2, min_lr=1e-20)
early_stop = EarlyStopping(monitor='val_loss',mode="min", patience=10, restore_best_weights=True)

class SaveSubModels(Callback):
    def on_epoch_end(self, epoch, logs=None):
        model_directories = [f'model/{epoch}/CBLANE_A549.keras']

        for directory in set(os.path.dirname(model_path) for model_path in model_directories):
            if not os.path.exists(directory):
                os.makedirs(directory)

        save_model(CBLANE, model_directories[0])

In [None]:
from keras.metrics import BinaryAccuracy, Precision, Recall, AUC, SensitivityAtSpecificity, SpecificityAtSensitivity
from keras.optimizers import Adam
import tensorflow as tf
from keras.models import load_model

CBLANE = load_model(f"CBLANE_global_dataset.keras")

CBLANE.compile(loss='binary_crossentropy',
                             optimizer=Adam(learning_rate=0.0001),
                             metrics=[BinaryAccuracy(),
                                      Precision(),
                                      Recall(),
                                      AUC(),
                                      SensitivityAtSpecificity(0.5),
                                      SpecificityAtSensitivity(0.5),
                                      ]
                             )
history = CBLANE.fit(tf.constant(train_sequences,dtype=tf.bool),
                                   tf.constant(train_labels,dtype=tf.bool),
                                   batch_size=128,
                                   epochs=20,
                                   verbose=1,
                                   validation_data=(tf.constant(validation_sequences,dtype = tf.bool),
                                                    tf.constant(validation_labels,dtype = tf.bool)),
                                  validation_batch_size=4096,
                                  callbacks=[reduce_lr,
                                             SaveSubModels()])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [12]:
import keras
CBLANE = keras.models.load_model("/content/mcf7global.keras")
CBLANE.evaluate(test_sequences,test_labels,batch_size=4096)



[0.28302115201950073,
 0.8805567026138306,
 0.8950002193450928,
 0.8629516363143921,
 0.9507551789283752,
 0.9825828671455383,
 0.9957138895988464]

## H1Hesc Cell Line Dataset

In [None]:
import numpy as np

# Load the data
loaded_data = np.load('H1hesc.npz')

# Extract the arrays
train_sequences = loaded_data['train_sequences']
train_labels = loaded_data['train_labels']
test_sequences = loaded_data['test_sequences']
test_labels = loaded_data['test_labels']
validation_sequences = loaded_data['validation_sequences']
validation_labels = loaded_data['validation_labels']

# Optionally, you can print the shapes of the loaded arrays
print("Train Sequences Shape:", train_sequences.shape)
print("Train Labels Shape:", train_labels.shape)
print("Test Sequences Shape:", test_sequences.shape)
print("Test Labels Shape:", test_labels.shape)
print("Validation Sequences Shape:", validation_sequences.shape)
print("Validation Labels Shape:", validation_labels.shape)

Train Sequences Shape: (972688, 101, 4)
Train Labels Shape: (972688,)
Test Sequences Shape: (303996, 101, 4)
Test Labels Shape: (303996,)
Validation Sequences Shape: (243172, 101, 4)
Validation Labels Shape: (243172,)


In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.callbacks import Callback
import os
from keras.models import save_model
reduce_lr = ReduceLROnPlateau(monitor='val_loss',mode="min", factor=0.1, patience=2, min_lr=1e-20)
early_stop = EarlyStopping(monitor='val_loss',mode="min", patience=10, restore_best_weights=True)

class SaveSubModels(Callback):
    def on_epoch_end(self, epoch, logs=None):
        model_directories = [f'model/{epoch}/CBLANE_A549.keras']

        for directory in set(os.path.dirname(model_path) for model_path in model_directories):
            if not os.path.exists(directory):
                os.makedirs(directory)

        save_model(CBLANE, model_directories[0])

In [None]:
from keras.metrics import BinaryAccuracy, Precision, Recall, AUC, SensitivityAtSpecificity, SpecificityAtSensitivity
from keras.optimizers import Adam
import tensorflow as tf
from keras.models import load_model
CBLANE = load_model("CBLANE_global_dataset.keras")

CBLANE.compile(loss='binary_crossentropy',
                             optimizer=Adam(learning_rate=0.0001),
                             metrics=[BinaryAccuracy(),
                                      Precision(),
                                      Recall(),
                                      AUC(),
                                      SensitivityAtSpecificity(0.5),
                                      SpecificityAtSensitivity(0.5),
                                      ]
                             )
history = CBLANE.fit(tf.constant(train_sequences,dtype=tf.bool),
                                   tf.constant(train_labels,dtype=tf.bool),
                                   batch_size=128,
                                   epochs=20,
                                   verbose=1,
                                   validation_data=(tf.constant(validation_sequences,dtype = tf.bool),
                                                    tf.constant(validation_labels,dtype = tf.bool)),
                                  validation_batch_size=4096,
                                  callbacks=[reduce_lr,
                                             SaveSubModels()])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
from keras.models import load_model
CBLANE = load_model(f"/content/model/5/CBLANE_A549.keras")
CBLANE.evaluate(test_sequences,test_labels,batch_size=4096)



[0.30429914593696594,
 0.868942379951477,
 0.8808803558349609,
 0.8535835146903992,
 0.9422830939292908,
 0.977332353591919,
 0.9936051368713379]

# Attention

In [None]:
import numpy as np
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import load_model
CBLANE.summary(expand_nested=True)

## Attention Result

In [None]:
Convolutional_Block = CEBLANE.get_layer('Convolutional_Block')
Convolutional_Block_output = Convolutional_Block.predict(test_features[0:1]) #False Labelled Sequence
sns.heatmap(Convolutional_Block_output.reshape((25,64)), cmap='Blues')
plt.title('Heatmap of Encoded Sequence Before Attention')
plt.show()

BiLSTM_Attention_block = CEBLANE.get_layer('model_213')
attention_model = Model(inputs=BiLSTM_Attention_block.input, outputs=BiLSTM_Attention_block.layers[-4].output)
attention_scores = attention_model.predict(Convolutional_Block_output)
sns.heatmap(attention_scores[0].reshape((25,64)), cmap='Blues')
plt.title('Heatmap of Encoded Sequence After Attention')
plt.show()

In [None]:
Convolutional_Block = CEBLANE.get_layer('Convolutional_Block')
Convolutional_Block_output = Convolutional_Block.predict(test_features[2:3]) #True Labelled Sequence
sns.heatmap(Convolutional_Block_output.reshape((25,64)), cmap='Blues')
plt.title('Heatmap of Encoded Sequence Before Attention')
plt.show()

BiLSTM_Attention_block = CEBLANE.get_layer('model_213')
attention_model = Model(inputs=BiLSTM_Attention_block.input, outputs=BiLSTM_Attention_block.layers[-4].output)
attention_scores = attention_model.predict(Convolutional_Block_output)
sns.heatmap(attention_scores[0].reshape((25,64)), cmap='Blues')
plt.title('Heatmap of Encoded Sequence After Attention')
plt.show()

## Attention Head Scores

In [None]:
attention_scores = att[1].reshape((8,25,25))
for i,attention_score in enumerate(attention_scores):
  sns.heatmap(attention_score, cmap='Blues')
  plt.title(f'Heatmap of Attention Scores of Attention Head {i+1}')
  plt.show()

## Attention Output

In [None]:
print(test_labels[:10])

fig, ax = plt.subplots(figsize=(8, 6))
Encoder_output = CEBLANE.predict(test_features[1:3]) #False and True Labeled Sequence

false_score = test_prob[0].astype(float)
true_score = test_prob[2].astype(float)

sns.heatmap(Encoder_output, cmap='Blues', yticklabels=[f'False {false_score}',f'True {true_score}'], ax=ax, fmt=".2f", linewidths=.5, annot_kws={"size": 10})

plt.title('Heatmap of Encoded Sequence')
plt.show()