In [1]:
import pickle
import random
from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import collections
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten, Input, LeakyReLU, GlobalAveragePooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import BatchNormalization
from sklearn.metrics import precision_recall_curve, roc_curve, auc, average_precision_score
from sklearn.model_selection import StratifiedKFold

In [2]:
with open('TCGA_new_pre_second.pckl', 'rb') as file_second:
    (
        dropped_genes_final,
        dropped_gene_name,
        dropped_Ens_id,
        samp_id_new,
        diag_name_new,
        project_ids_new
    ) = pd.compat.pickle_compat.load(file_second)

with open('TCGA_new_pre_first.pckl', 'rb') as file_first:
    _, _, _, _, remain_cancer_ids_ind, remain_normal_ids_ind = pickle.load(file_first)

# 2. Encode Labels
# Integer Encoding
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(project_ids_new)
# One-Hot Encoding
onehot_encoder = OneHotEncoder(sparse_output=False)
integer_encoded_reshaped = integer_encoded.reshape(-1, 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded_reshaped)
# 3. Prepare Cancer Samples
X_cancer_samples = dropped_genes_final.iloc[:, remain_cancer_ids_ind].T.values
onehot_encoded_cancer_samples = onehot_encoded[remain_cancer_ids_ind]
# Add Nine Zeros to Each Sample
X_cancer_samples_mat = np.concatenate(
    (X_cancer_samples, np.zeros((X_cancer_samples.shape[0], 9))),
    axis=1
)
# Ensure the total number of features is divisible by 71 and 100
assert X_cancer_samples_mat.shape[1] % (71 * 100) == 0, "Reshape not possible with current dimensions."
# Reshape to (num_samples, 71, 100)
X_cancer_samples_mat = X_cancer_samples_mat.reshape(-1, 71, 100)

# 4. Split Data into Training and Testing Sets
# Use integer labels for stratification

y_labels = integer_encoded[remain_cancer_ids_ind]
x_train, x_test, y_train, y_test = train_test_split(
    X_cancer_samples_mat,
    onehot_encoded_cancer_samples,
    stratify=y_labels,  # Corrected stratify parameter
    test_size=0.25,
    random_state=42
)


# 5. Define Parameters
img_rows, img_cols = x_test.shape[1], x_test.shape[2]
num_classes = y_train.shape[1]
batch_size = 128
epochs = 10
seed = 7
np.random.seed(seed)

# 6. Reshape Data for CNN
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1).astype('float32')
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1).astype('float32')

# 7. Build the CNN Model
def convolutional_model():
    model = Sequential()
    # First Convolutional Layer
    model.add(
        Conv2D(
            filters=32,
            kernel_size=(1, 71),
            strides=(1, 1),
            input_shape=(img_rows, img_cols, 1)  # Ensure input_shape is correctly set
        )
    )
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(1, 2)))

    # Flatten and Dense Layers for Classification
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    return model
# 8. Compile the Model
model = convolutional_model()
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['categorical_accuracy']
)

# Display Model Architecture
model.summary()
# 9. Set Up Early Stopping
callbacks = [EarlyStopping(monitor='categorical_accuracy', patience=3, verbose=0)]
# 10. Train the Model
history = model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs,verbose=1,callbacks=callbacks,validation_data=(x_test, y_test))

# 11. Evaluate the Model
scores = model.evaluate(x_test, y_test, verbose=0)
print(f"Categorical Accuracy: {scores[1] * 100:.2f}%")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 55ms/step - categorical_accuracy: 0.3586 - loss: 3.9936 - val_categorical_accuracy: 0.8836 - val_loss: 0.3923
Epoch 2/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 55ms/step - categorical_accuracy: 0.9088 - loss: 0.3035 - val_categorical_accuracy: 0.9133 - val_loss: 0.2549
Epoch 3/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 52ms/step - categorical_accuracy: 0.9376 - loss: 0.2027 - val_categorical_accuracy: 0.9168 - val_loss: 0.2393
Epoch 4/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 52ms/step - categorical_accuracy: 0.9538 - loss: 0.1532 - val_categorical_accuracy: 0.9455 - val_loss: 0.1828
Epoch 5/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 53ms/step - categorical_accuracy: 0.9667 - loss: 0.1048 - val_categorical_accuracy: 0.9420 - val_loss: 0.1700
Epoch 6/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 

In [3]:
epochs = 10
def small_convolutional_model():
    model = Sequential()
    # First Convolutional Layer
    model.add(
        Conv2D(
            filters=1,
            kernel_size= (1, 17),
            strides=(1, 1),
            input_shape=(img_rows, img_cols, 1)  # Ensure input_shape is correctly set
        )
    )
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(1, 2)))

    # Flatten and Dense Layers for Classification
    model.add(Flatten())
    model.add(Dense(33, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    return model
# 8. Compile the Model
small_model = small_convolutional_model()
small_model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['categorical_accuracy']
)

# (Optional) Explicitly build the model
# model.build(input_shape=(None, img_rows, img_cols, 1))

# Display Model Architecture
small_model.summary()
# 9. Set Up Early Stopping
callbacks = [EarlyStopping(monitor='categorical_accuracy', patience=3, verbose=0)]
# 10. Train the Model
small_history = small_model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs,verbose=1,callbacks=callbacks,validation_data=(x_test, y_test))

# 11. Evaluate the Model
small_scores = small_model.evaluate(x_test, y_test, verbose=0)
print(f"Categorical Accuracy: {small_scores[1] * 100:.2f}%")

Epoch 1/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - categorical_accuracy: 0.0697 - loss: 4.5520 - val_categorical_accuracy: 0.1509 - val_loss: 2.9322
Epoch 2/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - categorical_accuracy: 0.1858 - loss: 2.8521 - val_categorical_accuracy: 0.2213 - val_loss: 2.6705
Epoch 3/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - categorical_accuracy: 0.2128 - loss: 2.6301 - val_categorical_accuracy: 0.2120 - val_loss: 2.5173
Epoch 4/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - categorical_accuracy: 0.2196 - loss: 2.4793 - val_categorical_accuracy: 0.2190 - val_loss: 2.4237
Epoch 5/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - categorical_accuracy: 0.2280 - loss: 2.4028 - val_categorical_accuracy: 0.2561 - val_loss: 2.3418
Epoch 6/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 

In [4]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(128)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(128)

In [15]:
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)

@tf.function

def attention_transfer_loss_cosine_similarity(teacher_saliency_map, student_saliency_map):
    # Ensure tensors are float32
    teacher_saliency_map = tf.cast(teacher_saliency_map, tf.float32)
    student_saliency_map = tf.cast(student_saliency_map, tf.float32)
    # Compute dot product and magnitudes
    dot_product = tf.reduce_sum(teacher_saliency_map * student_saliency_map, axis=[1,2,3])
    magnitude_teacher = tf.sqrt(tf.reduce_sum(tf.square(teacher_saliency_map), axis=[1,2,3]))
    magnitude_student = tf.sqrt(tf.reduce_sum(tf.square(student_saliency_map), axis=[1,2,3]))
    # Compute cosine similarity
    cosine_similarity = dot_product / (magnitude_teacher * magnitude_student + 1e-8)  # Adding epsilon for numerical stability
    return -tf.reduce_mean(cosine_similarity)

@tf.function

def attention_transfer_loss_top10_euclidean_distance_both(teacher_saliency_map, student_saliency_map):
    # Ensure tensors are float32
    teacher_saliency_map = tf.cast(teacher_saliency_map, tf.float32)
    student_saliency_map = tf.cast(student_saliency_map, tf.float32)
    # Flatten the spatial dimensions
    batch_size = tf.shape(teacher_saliency_map)[0]
    teacher_flat = tf.reshape(teacher_saliency_map, [batch_size, -1])
    student_flat = tf.reshape(student_saliency_map, [batch_size, -1])
    num_pixels = tf.shape(teacher_flat)[1]
    num_top_values = tf.cast(0.99 * tf.cast(num_pixels, tf.float32), tf.int32)
    num_bottom_values = tf.cast(0.01 * tf.cast(num_pixels, tf.float32), tf.int32)
    # Use top_k for top values
    top_values, _ = tf.math.top_k(teacher_flat, k=num_top_values, sorted=True)
    # For bottom values, get the smallest k values by negating
    bottom_values, _ = tf.math.top_k(-teacher_flat, k=num_bottom_values, sorted=True)
    bottom_values = -bottom_values  # Revert to original values
    # Gather corresponding student values
    # To find indices, use argsort
    sorted_indices = tf.argsort(teacher_flat, axis=1, direction='ASCENDING')
    top_indices = sorted_indices[:, -num_top_values:]
    bottom_indices = sorted_indices[:, :num_bottom_values]
    # Use batch gather
    batch_indices = tf.reshape(tf.range(batch_size), [-1, 1])
    batch_indices = tf.tile(batch_indices, [1, num_top_values])
    top_gather_indices = tf.stack([batch_indices, top_indices], axis=-1)
    student_top_values = tf.gather_nd(student_flat, top_gather_indices)
    batch_indices_bottom = tf.reshape(tf.range(batch_size), [-1, 1])
    batch_indices_bottom = tf.tile(batch_indices_bottom, [1, num_bottom_values])
    bottom_gather_indices = tf.stack([batch_indices_bottom, bottom_indices], axis=-1)
    student_bottom_values = tf.gather_nd(student_flat, bottom_gather_indices)
    # Calculate Euclidean distance for top and bottom values
    euclidean_top = tf.sqrt(tf.reduce_sum(tf.square(top_values - student_top_values), axis=1))
    euclidean_bottom = tf.sqrt(tf.reduce_sum(tf.square(bottom_values - student_bottom_values), axis=1))
    # Total loss
    total_euclidean_distance = tf.reduce_mean(euclidean_top + euclidean_bottom)
    return total_euclidean_distance

# Precompute teacher saliency maps using vectorized operations

teacher_saliency_map = []
for batch_inputs, batch_labels in train_dataset:
    with tf.GradientTape() as tape:
        tape.watch(batch_inputs)
        teacher_outputs = model(batch_inputs, training=True)
        teacher_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(batch_labels, teacher_outputs))
    teacher_grads = tape.gradient(teacher_loss, batch_inputs)
    teacher_saliency_map.append(teacher_grads)

# Define and compile the student model
conv_model2 = convolutional_model()
conv_model2.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['categorical_accuracy']
)
# Optimizer
optimizer = tf.keras.optimizers.Adam()
# Training loop with @tf.function for graph execution

@tf.function

def train_step(conv_model, batch_inputs, batch_labels, teacher_saliency):
    with tf.GradientTape() as tape:
        # Forward pass for student
        student_outputs = conv_model(batch_inputs, training=True)
        student_loss = tf.reduce_mean(
            tf.keras.losses.categorical_crossentropy(batch_labels, student_outputs)
        )

        # Compute student saliency maps
        with tf.GradientTape() as tape_student:
            tape_student.watch(batch_inputs)
            student_outputs = conv_model(batch_inputs, training=True)
            student_loss_inner = tf.reduce_mean(
                tf.keras.losses.categorical_crossentropy(batch_labels, student_outputs)
            )
        student_grads = tape_student.gradient(student_loss_inner, batch_inputs)

        # Compute attention loss
        att_loss = attention_transfer_loss_top10_euclidean_distance_both(teacher_saliency, student_grads)*1000

        # Total loss
        total_loss = student_loss + att_loss

    # Compute gradients and apply
    gradients = tape.gradient(total_loss, conv_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, conv_model.trainable_variables))
    return total_loss, student_loss, att_loss


# Training loop
EPOCHS = 50
for epoch in range(EPOCHS):
    print(f"Starting epoch {epoch+1}/{EPOCHS}")
    batch_index = 0
    for batch_inputs, batch_labels in train_dataset:
        # Fetch precomputed teacher saliency map
        teacher_saliency = teacher_saliency_map[batch_index]
        # Perform a training step
        total_loss, student_loss, att_loss = train_step(conv_model2, batch_inputs, batch_labels, teacher_saliency)
        if batch_index % 100 == 0:
            print(f"Batch {batch_index}: Total Loss: {total_loss.numpy()}, "
                  f"Student Loss: {student_loss.numpy()}, Attention Loss: {att_loss.numpy()}")
        batch_index += 1
    # Evaluation on test dataset
    test_loss = 0.0
    test_accuracy = 0.0
    num_batches = 0
    for test_inputs, test_labels in test_dataset:
        test_outputs = conv_model2(test_inputs, training=False)
        loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(test_labels, test_outputs))
        accuracy = tf.reduce_mean(
            tf.keras.metrics.categorical_accuracy(test_labels, test_outputs)
        )
        test_loss += loss
        test_accuracy += accuracy
        num_batches += 1
    test_loss /= num_batches
    test_accuracy /= num_batches
    print(f'Epoch {epoch + 1}, Test Loss: {test_loss.numpy():.4f}, Test Accuracy: {test_accuracy.numpy():.2%}')
    '''if test_accuracy.numpy() > 0.85:
        print("Reached target accuracy. Stopping training.")
        break'''

Starting epoch 1/50
Batch 0: Total Loss: 5.847992420196533, Student Loss: 3.6622066497802734, Attention Loss: 2.1857857704162598
Epoch 1, Test Loss: 0.4360, Test Accuracy: 88.57%
Starting epoch 2/50
Batch 0: Total Loss: 1.1259129047393799, Student Loss: 0.5283489227294922, Attention Loss: 0.5975640416145325
Epoch 2, Test Loss: 0.2710, Test Accuracy: 91.27%
Starting epoch 3/50
Batch 0: Total Loss: 0.8776256442070007, Student Loss: 0.32863134145736694, Attention Loss: 0.5489943027496338
Epoch 3, Test Loss: 0.2230, Test Accuracy: 92.82%
Starting epoch 4/50
Batch 0: Total Loss: 0.8457457423210144, Student Loss: 0.2890107035636902, Attention Loss: 0.5567350387573242
Epoch 4, Test Loss: 0.1944, Test Accuracy: 93.67%
Starting epoch 5/50
Batch 0: Total Loss: 0.7785842418670654, Student Loss: 0.24774879217147827, Attention Loss: 0.5308354496955872
Epoch 5, Test Loss: 0.1968, Test Accuracy: 93.59%
Starting epoch 6/50
Batch 0: Total Loss: 0.6658088564872742, Student Loss: 0.1741873174905777, Atte

2024-11-14 17:00:01.056838: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 25, Test Loss: 0.1602, Test Accuracy: 95.38%
Starting epoch 26/50
Batch 0: Total Loss: 0.37355947494506836, Student Loss: 0.04627083241939545, Attention Loss: 0.3272886276245117
Epoch 26, Test Loss: 0.1852, Test Accuracy: 95.27%
Starting epoch 27/50
Batch 0: Total Loss: 0.3659989833831787, Student Loss: 0.03867688402533531, Attention Loss: 0.3273220956325531
Epoch 27, Test Loss: 0.1873, Test Accuracy: 94.89%
Starting epoch 28/50
Batch 0: Total Loss: 0.3727739751338959, Student Loss: 0.040163762867450714, Attention Loss: 0.33261021971702576
Epoch 28, Test Loss: 0.1873, Test Accuracy: 95.05%
Starting epoch 29/50
Batch 0: Total Loss: 0.3967060446739197, Student Loss: 0.05508936569094658, Attention Loss: 0.341616690158844
Epoch 29, Test Loss: 0.1846, Test Accuracy: 94.82%
Starting epoch 30/50
Batch 0: Total Loss: 0.39831361174583435, Student Loss: 0.045671701431274414, Attention Loss: 0.35264191031455994
Epoch 30, Test Loss: 0.1956, Test Accuracy: 95.04%
Starting epoch 31/50
Batch 0: