In [2]:
import os
import numpy as np
import tensorflow as tf
from time import time
from sklearn import metrics
from tensorflow.keras import layers, Model
from tensorflow.keras.utils import Sequence


# =========================
#  GPU MEMORY CONFIGURATION
# =========================
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Enabled memory growth on GPUs")
    except RuntimeError as e:
        print("Could not set GPU memory growth:", e)
else:
    print("No GPU detected, running on CPU.")



# =========================
#  PARAMETERS
# =========================
NUM_DEPENDENT = 7
MAXSEQ        = NUM_DEPENDENT * 2 + 1  # 15
NUM_FEATURE   = 1024
NUM_FILTER    = 128
NUM_HIDDEN    = 1000
BATCH_SIZE    = 32
WINDOW_SIZES  = [4, 6, 8, 10, 12]
NUM_CLASSES   = 2
EPOCHS        = 20

# Paths
# DATA_DIR     = '/content/drive/MyDrive/s1116049'
DATA_DIR     = 'dataset'
ROC_SAVE_DIR = os.path.join(DATA_DIR, 'Roc')
os.makedirs(ROC_SAVE_DIR, exist_ok=True)

# =========================
#  DATA LOADING
# =========================
x_train = np.load(os.path.join(DATA_DIR, 'TR573_rag_common_data.npy'), allow_pickle=True)
y_train = np.load(os.path.join(DATA_DIR, 'TR573_label.npy'), allow_pickle=True)
x_test  = np.load(os.path.join(DATA_DIR, 'TE181_rag_data.npy'), allow_pickle=True)
y_test  = np.load(os.path.join(DATA_DIR, 'TE181_label.npy'), allow_pickle=True)

print(f"Training set: X={x_train.shape}, y={y_train.shape}")
print(f"   Testing set: X={x_test.shape},  y={y_test.shape}")

# =========================
#  DATA GENERATOR
# =========================
class DataGenerator(Sequence):
    def __init__(self, X, y, batch_size):
        self.X = X
        self.y = y
        self.bs = batch_size
        self.indexes = np.arange(len(X))

    def __len__(self):
        return int(np.ceil(len(self.X) / self.bs))

    def __getitem__(self, idx):
        batch_idxs = self.indexes[idx * self.bs:(idx + 1) * self.bs]
        return self.X[batch_idxs], self.y[batch_idxs]

# =========================
#  MODEL DEFINITION
# =========================
class DeepScan(Model):
    def __init__(self,
                 input_shape=(1, MAXSEQ, NUM_FEATURE),
                 window_sizes=WINDOW_SIZES,
                 num_filters=NUM_FILTER,
                 num_hidden=NUM_HIDDEN):
        super().__init__()

        self.convs = []
        self.pools = []
        for w in window_sizes:
            self.convs.append(
                layers.SeparableConv2D(filters=num_filters,
                              kernel_size=(1, w),
                              activation='relu',
                              padding='valid')
            )
            self.pools.append(
                layers.MaxPooling2D(
                    pool_size=(1, MAXSEQ - w + 1),
                    strides=(1, MAXSEQ)
                )
            )

        self.flatten = layers.Flatten()
        self.dropout = layers.Dropout(0.7)
        self.dense1  = layers.Dense(num_hidden, activation='relu')
        self.dense2  = layers.Dense(NUM_CLASSES, activation='softmax',
                                    kernel_regularizer=tf.keras.regularizers.l2(1e-3))

    def call(self, x, training=False):
        features = []
        for conv, pool in zip(self.convs, self.pools):
            h = conv(x)
            h = pool(h)
            features.append(self.flatten(h))
        x = tf.concat(features, axis=1)
        x = self.dropout(x, training=training)
        x = self.dense1(x)
        return self.dense2(x)

# =========================
#  ROC SAVE & EVALUATION
# =========================
def save_roc_npz(fpr, tpr, thresholds, auc, model_name="DeepScan"):
    timestamp = int(time())
    filename = os.path.join(ROC_SAVE_DIR, f"{model_name}_{timestamp}.npz")
    np.savez(
        filename,
        fpr=fpr,
        tpr=tpr,
        thresholds=thresholds,
        auc=auc
    )
    print(f"Saved ROC data to: {filename}")


def model_test(model, X, y, model_name="DeepScan"):
    preds = model.predict(X, batch_size=BATCH_SIZE)
    fpr, tpr, thresholds = metrics.roc_curve(y[:, 1], preds[:, 1])
    auc_value = metrics.auc(fpr, tpr)
    save_roc_npz(fpr, tpr, thresholds, auc_value, model_name)

    gmeans = np.sqrt(tpr * (1 - fpr))
    ix = np.argmax(gmeans)
    best_thresh, best_g = thresholds[ix], gmeans[ix]
    y_pred = (preds[:, 1] >= best_thresh).astype(int)
    TN, FP, FN, TP = metrics.confusion_matrix(y[:, 1], y_pred).ravel()

    Sens = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    Spec = TN / (TN + FP) if (TN + FP) > 0 else 0.0
    Acc  = (TP + TN) / (TP + TN + FP + FN)
    Prec = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    MCC  = metrics.matthews_corrcoef(y[:, 1], y_pred)
    F1   = metrics.f1_score(y[:, 1], y_pred)

    print(f"\n=== {model_name} Evaluation ===")
    print(f"Best thresh: {best_thresh:.4f} (G-Mean={best_g:.4f}), AUC={auc_value:.4f}")
    print(f"TP={TP}, FP={FP}, TN={TN}, FN={FN}")
    print(f"Sensitivity={Sens:.4f}, Specificity={Spec:.4f}")
    print(f"Accuracy={Acc:.4f}, Precision={Prec:.4f}, F1={F1:.4f}, MCC={MCC:.4f}\n")

    return {
        "TP": TP, "FP": FP, "TN": TN, "FN": FN,
        "Sensitivity": Sens, "Specificity": Spec,
        "Accuracy": Acc, "Precision": Prec,
        "F1": F1, "MCC": MCC, "AUC": auc_value
    }

# =========================
#  MAIN: TRAIN & EVALUATE
# =========================
if __name__ == "__main__":
    train_gen = DataGenerator(x_train, y_train, BATCH_SIZE)

    model = DeepScan()
    model.build(input_shape=(None, 1, MAXSEQ, NUM_FEATURE))
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )


    model.fit(
        train_gen,
        epochs=EPOCHS,
        shuffle=True
    )

    # Free memory
    del x_train, y_train, train_gen
    import gc; gc.collect()

    # Test
    results = model_test(model, x_test, y_test, model_name="DeepScan_RAG")
    print("Final results:", results)


Enabled memory growth on GPUs
Training set: X=(159883, 1, 15, 1024), y=(159883, 2)
   Testing set: X=(75258, 1, 15, 1024),  y=(75258, 2)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Saved ROC data to: dataset\Roc\DeepScan_RAG_1772262976.npz

=== DeepScan_RAG Evaluation ===
Best thresh: 0.0171 (G-Mean=0.8066), AUC=0.8864
TP=2586, FP=13903, TN=58147, FN=622
Sensitivity=0.8061, Specificity=0.8070
Accuracy=0.8070, Precision=0.1568, F1=0.2626, MCC=0.2995

Final results: {'TP': 2586, 'FP': 13903, 'TN': 58147, 'FN': 622, 'Sensitivity': 0.8061097256857855, 'Specificity': 0.8070367800138792, 'Accuracy': 0.8069972627494751, 'Precision': 0.1568318272787919, 'F1': 0.26257805757221914, 'MCC': 0.2994519327513624, 'AUC': 0.8863966752964917}


In [5]:
import os
import numpy as np
import tensorflow as tf
from time import time
from sklearn import metrics
from tensorflow.keras import layers, Model
from tensorflow.keras.utils import Sequence


# =========================
#  GPU MEMORY CONFIGURATION
# =========================
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Enabled memory growth on GPUs")
    except RuntimeError as e:
        print("Could not set GPU memory growth:", e)
else:
    print("No GPU detected, running on CPU.")



# =========================
#  PARAMETERS
# =========================
NUM_DEPENDENT = 7
MAXSEQ        = NUM_DEPENDENT * 2 + 1  # 15
NUM_FEATURE   = 1024
NUM_FILTER    = 128
NUM_HIDDEN    = 1000
BATCH_SIZE    = 32
WINDOW_SIZES  = [4, 6, 8, 10, 12]
NUM_CLASSES   = 2
EPOCHS        = 20

# Paths
# DATA_DIR     = '/content/drive/MyDrive/s1116049'
DATA_DIR     = 'dataset'
ROC_SAVE_DIR = os.path.join(DATA_DIR, 'Roc')
os.makedirs(ROC_SAVE_DIR, exist_ok=True)

# =========================
#  DATA LOADING
# =========================
x_train = np.load(os.path.join(DATA_DIR, 'TR573_rag_common_data.npy'), allow_pickle=True)
y_train = np.load(os.path.join(DATA_DIR, 'TR573_label.npy'), allow_pickle=True)
x_test  = np.load(os.path.join(DATA_DIR, 'TE129_rag_data.npy'), allow_pickle=True)
y_test  = np.load(os.path.join(DATA_DIR, 'TE129_label.npy'), allow_pickle=True)

print(f"Training set: X={x_train.shape}, y={y_train.shape}")
print(f"   Testing set: X={x_test.shape},  y={y_test.shape}")

# =========================
#  DATA GENERATOR
# =========================
class DataGenerator(Sequence):
    def __init__(self, X, y, batch_size):
        self.X = X
        self.y = y
        self.bs = batch_size
        self.indexes = np.arange(len(X))

    def __len__(self):
        return int(np.ceil(len(self.X) / self.bs))

    def __getitem__(self, idx):
        batch_idxs = self.indexes[idx * self.bs:(idx + 1) * self.bs]
        return self.X[batch_idxs], self.y[batch_idxs]

# =========================
#  MODEL DEFINITION
# =========================
class DeepScan(Model):
    def __init__(self,
                 input_shape=(1, MAXSEQ, NUM_FEATURE),
                 window_sizes=WINDOW_SIZES,
                 num_filters=NUM_FILTER,
                 num_hidden=NUM_HIDDEN):
        super().__init__()

        self.convs = []
        self.pools = []
        for w in window_sizes:
            self.convs.append(
                layers.SeparableConv2D(filters=num_filters,
                              kernel_size=(1, w),
                              activation='relu',
                              padding='valid')
            )
            self.pools.append(
                layers.MaxPooling2D(
                    pool_size=(1, MAXSEQ - w + 1),
                    strides=(1, MAXSEQ)
                )
            )

        self.flatten = layers.Flatten()
        self.dropout = layers.Dropout(0.7)
        self.dense1  = layers.Dense(num_hidden, activation='relu')
        self.dense2  = layers.Dense(NUM_CLASSES, activation='softmax',
                                    kernel_regularizer=tf.keras.regularizers.l2(1e-3))

    def call(self, x, training=False):
        features = []
        for conv, pool in zip(self.convs, self.pools):
            h = conv(x)
            h = pool(h)
            features.append(self.flatten(h))
        x = tf.concat(features, axis=1)
        x = self.dropout(x, training=training)
        x = self.dense1(x)
        return self.dense2(x)

# =========================
#  ROC SAVE & EVALUATION
# =========================
def save_roc_npz(fpr, tpr, thresholds, auc, model_name="DeepScan"):
    timestamp = int(time())
    filename = os.path.join(ROC_SAVE_DIR, f"{model_name}_{timestamp}.npz")
    np.savez(
        filename,
        fpr=fpr,
        tpr=tpr,
        thresholds=thresholds,
        auc=auc
    )
    print(f"Saved ROC data to: {filename}")


def model_test(model, X, y, model_name="DeepScan"):
    preds = model.predict(X, batch_size=BATCH_SIZE)
    fpr, tpr, thresholds = metrics.roc_curve(y[:, 1], preds[:, 1])
    auc_value = metrics.auc(fpr, tpr)
    save_roc_npz(fpr, tpr, thresholds, auc_value, model_name)

    gmeans = np.sqrt(tpr * (1 - fpr))
    ix = np.argmax(gmeans)
    best_thresh, best_g = thresholds[ix], gmeans[ix]
    y_pred = (preds[:, 1] >= best_thresh).astype(int)
    TN, FP, FN, TP = metrics.confusion_matrix(y[:, 1], y_pred).ravel()

    Sens = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    Spec = TN / (TN + FP) if (TN + FP) > 0 else 0.0
    Acc  = (TP + TN) / (TP + TN + FP + FN)
    Prec = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    MCC  = metrics.matthews_corrcoef(y[:, 1], y_pred)
    F1   = metrics.f1_score(y[:, 1], y_pred)

    print(f"\n=== {model_name} Evaluation ===")
    print(f"Best thresh: {best_thresh:.4f} (G-Mean={best_g:.4f}), AUC={auc_value:.4f}")
    print(f"TP={TP}, FP={FP}, TN={TN}, FN={FN}")
    print(f"Sensitivity={Sens:.4f}, Specificity={Spec:.4f}")
    print(f"Accuracy={Acc:.4f}, Precision={Prec:.4f}, F1={F1:.4f}, MCC={MCC:.4f}\n")

    return {
        "TP": TP, "FP": FP, "TN": TN, "FN": FN,
        "Sensitivity": Sens, "Specificity": Spec,
        "Accuracy": Acc, "Precision": Prec,
        "F1": F1, "MCC": MCC, "AUC": auc_value
    }

# =========================
#  MAIN: TRAIN & EVALUATE
# =========================
if __name__ == "__main__":
    train_gen = DataGenerator(x_train, y_train, BATCH_SIZE)

    model = DeepScan()
    model.build(input_shape=(None, 1, MAXSEQ, NUM_FEATURE))
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )


    model.fit(
        train_gen,
        epochs=EPOCHS,
        shuffle=True
    )

    # Free memory
    del x_train, y_train, train_gen
    import gc; gc.collect()

    # Test
    results = model_test(model, x_test, y_test, model_name="DeepScan_RAG")
    print("Final results:", results)


Enabled memory growth on GPUs
Training set: X=(159883, 1, 15, 1024), y=(159883, 2)
   Testing set: X=(37515, 1, 15, 1024),  y=(37515, 2)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Saved ROC data to: dataset\Roc\DeepScan_RAG_1772265238.npz

=== DeepScan_RAG Evaluation ===
Best thresh: 0.0151 (G-Mean=0.8383), AUC=0.9131
TP=1882, FP=5769, TN=29506, FN=358
Sensitivity=0.8402, Specificity=0.8365
Accuracy=0.8367, Precision=0.2460, F1=0.3805, MCC=0.3979

Final results: {'TP': 1882, 'FP': 5769, 'TN': 29506, 'FN': 358, 'Sensitivity': 0.8401785714285714, 'Specificity': 0.8364564138908576, 'Accuracy': 0.8366786618685859, 'Precision': 0.24598091752712065, 'F1': 0.3805479729046608, 'MCC': 0.39790452692835954, 'AUC': 0.9130950693530425}


In [2]:
import os
import numpy as np
import tensorflow as tf
from time import time
from sklearn import metrics
from tensorflow.keras import layers, Model
from tensorflow.keras.utils import Sequence


# =========================
#  GPU MEMORY CONFIGURATION
# =========================
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Enabled memory growth on GPUs")
    except RuntimeError as e:
        print("Could not set GPU memory growth:", e)
else:
    print("No GPU detected, running on CPU.")



# =========================
#  PARAMETERS
# =========================
NUM_DEPENDENT = 7
MAXSEQ        = NUM_DEPENDENT * 2 + 1  # 15
NUM_FEATURE   = 1024
NUM_FILTER    = 128
NUM_HIDDEN    = 1000
BATCH_SIZE    = 32
WINDOW_SIZES  = [4, 6, 8, 10, 12]
NUM_CLASSES   = 2
EPOCHS        = 20

# Paths
# DATA_DIR     = '/content/drive/MyDrive/s1116049'
DATA_DIR     = 'dataset'
ROC_SAVE_DIR = os.path.join(DATA_DIR, 'Roc')
os.makedirs(ROC_SAVE_DIR, exist_ok=True)

# =========================
#  DATA LOADING
# =========================
x_train = np.load(os.path.join(DATA_DIR, 'TR646_rag_common_data.npy'), allow_pickle=True)
y_train = np.load(os.path.join(DATA_DIR, 'TR646_label.npy'), allow_pickle=True)
x_test  = np.load(os.path.join(DATA_DIR, 'TE46_rag_data.npy'), allow_pickle=True)
y_test  = np.load(os.path.join(DATA_DIR, 'TE46_label.npy'), allow_pickle=True)

print(f"Training set: X={x_train.shape}, y={y_train.shape}")
print(f"   Testing set: X={x_test.shape},  y={y_test.shape}")

# =========================
#  DATA GENERATOR
# =========================
class DataGenerator(Sequence):
    def __init__(self, X, y, batch_size):
        self.X = X
        self.y = y
        self.bs = batch_size
        self.indexes = np.arange(len(X))

    def __len__(self):
        return int(np.ceil(len(self.X) / self.bs))

    def __getitem__(self, idx):
        batch_idxs = self.indexes[idx * self.bs:(idx + 1) * self.bs]
        return self.X[batch_idxs], self.y[batch_idxs]

# =========================
#  MODEL DEFINITION
# =========================
class DeepScan(Model):
    def __init__(self,
                 input_shape=(1, MAXSEQ, NUM_FEATURE),
                 window_sizes=WINDOW_SIZES,
                 num_filters=NUM_FILTER,
                 num_hidden=NUM_HIDDEN):
        super().__init__()

        self.convs = []
        self.pools = []
        for w in window_sizes:
            self.convs.append(
                layers.SeparableConv2D(filters=num_filters,
                              kernel_size=(1, w),
                              activation='relu',
                              padding='valid')
            )
            self.pools.append(
                layers.MaxPooling2D(
                    pool_size=(1, MAXSEQ - w + 1),
                    strides=(1, MAXSEQ)
                )
            )

        self.flatten = layers.Flatten()
        self.dropout = layers.Dropout(0.7)
        self.dense1  = layers.Dense(num_hidden, activation='relu')
        self.dense2  = layers.Dense(NUM_CLASSES, activation='softmax',
                                    kernel_regularizer=tf.keras.regularizers.l2(1e-3))

    def call(self, x, training=False):
        features = []
        for conv, pool in zip(self.convs, self.pools):
            h = conv(x)
            h = pool(h)
            features.append(self.flatten(h))
        x = tf.concat(features, axis=1)
        x = self.dropout(x, training=training)
        x = self.dense1(x)
        return self.dense2(x)

# =========================
#  ROC SAVE & EVALUATION
# =========================
def save_roc_npz(fpr, tpr, thresholds, auc, model_name="DeepScan"):
    timestamp = int(time())
    filename = os.path.join(ROC_SAVE_DIR, f"{model_name}_{timestamp}.npz")
    np.savez(
        filename,
        fpr=fpr,
        tpr=tpr,
        thresholds=thresholds,
        auc=auc
    )
    print(f"Saved ROC data to: {filename}")


def model_test(model, X, y, model_name="DeepScan"):
    preds = model.predict(X, batch_size=BATCH_SIZE)
    fpr, tpr, thresholds = metrics.roc_curve(y[:, 1], preds[:, 1])
    auc_value = metrics.auc(fpr, tpr)
    save_roc_npz(fpr, tpr, thresholds, auc_value, model_name)

    gmeans = np.sqrt(tpr * (1 - fpr))
    ix = np.argmax(gmeans)
    best_thresh, best_g = thresholds[ix], gmeans[ix]
    y_pred = (preds[:, 1] >= best_thresh).astype(int)
    TN, FP, FN, TP = metrics.confusion_matrix(y[:, 1], y_pred).ravel()

    Sens = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    Spec = TN / (TN + FP) if (TN + FP) > 0 else 0.0
    Acc  = (TP + TN) / (TP + TN + FP + FN)
    Prec = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    MCC  = metrics.matthews_corrcoef(y[:, 1], y_pred)
    F1   = metrics.f1_score(y[:, 1], y_pred)

    print(f"\n=== {model_name} Evaluation ===")
    print(f"Best thresh: {best_thresh:.4f} (G-Mean={best_g:.4f}), AUC={auc_value:.4f}")
    print(f"TP={TP}, FP={FP}, TN={TN}, FN={FN}")
    print(f"Sensitivity={Sens:.4f}, Specificity={Spec:.4f}")
    print(f"Accuracy={Acc:.4f}, Precision={Prec:.4f}, F1={F1:.4f}, MCC={MCC:.4f}\n")

    return {
        "TP": TP, "FP": FP, "TN": TN, "FN": FN,
        "Sensitivity": Sens, "Specificity": Spec,
        "Accuracy": Acc, "Precision": Prec,
        "F1": F1, "MCC": MCC, "AUC": auc_value
    }

# =========================
#  MAIN: TRAIN & EVALUATE
# =========================
if __name__ == "__main__":
    train_gen = DataGenerator(x_train, y_train, BATCH_SIZE)

    model = DeepScan()
    model.build(input_shape=(None, 1, MAXSEQ, NUM_FEATURE))
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )


    model.fit(
        train_gen,
        epochs=EPOCHS,
        shuffle=True
    )

    # Free memory
    del x_train, y_train, train_gen
    import gc; gc.collect()

    # Test
    results = model_test(model, x_test, y_test, model_name="DeepScan_RAG")
    print("Final results:", results)


Enabled memory growth on GPUs
Training set: X=(314139, 1, 15, 1024), y=(314139, 2)
   Testing set: X=(10876, 1, 15, 1024),  y=(10876, 2)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Saved ROC data to: dataset\Roc\DeepScan_RAG_1772255552.npz

=== DeepScan_RAG Evaluation ===
Best thresh: 0.0446 (G-Mean=0.8093), AUC=0.8833
TP=778, FP=1859, TN=8052, FN=187
Sensitivity=0.8062, Specificity=0.8124
Accuracy=0.8119, Precision=0.2950, F1=0.4320, MCC=0.4105

Final results: {'TP': 778, 'FP': 1859, 'TN': 8052, 'FN': 187, 'Sensitivity': 0.8062176165803109, 'Specificity': 0.8124306326304107, 'Accuracy': 0.8118793674144906, 'Precision': 0.2950322335987865, 'F1': 0.4319822320932815, 'MCC': 0.41046285975768865, 'AUC': 0.8832644212245461}
