In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, backend as K
from sklearn.model_selection import train_test_split
import pandas as pd
import os
import re

2024-07-18 13:32:59.335922: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-18 13:32:59.338667: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-18 13:32:59.434952: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-18 13:32:59.809053: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

In [3]:
path = 'data/'

# Function to extract numbers from filenames
def extract_number(filename):
    return int(re.search(r'\d+', filename).group())

class_files = sorted([f for f in os.listdir(path) if f.endswith(".npz")], key=extract_number)
class_files

['combi-cells_c0.npz',
 'combi-cells_c1.npz',
 'combi-cells_c2.npz',
 'combi-cells_c3.npz',
 'combi-cells_c4.npz',
 'combi-cells_c5.npz',
 'combi-cells_c6.npz',
 'combi-cells_c7.npz',
 'combi-cells_c8.npz',
 'combi-cells_c9.npz',
 'combi-cells_c10.npz']

In [4]:
# Initialize lists for data and labels
data = []
labels = []

# Load data and assign labels for each class file
for i, class_file in enumerate(class_files):
    loaded = np.load(path + class_file)
    class_data = np.stack([loaded[key] for key in loaded.files])
    class_labels = np.ones(class_data.shape[0]) * i
    
    data.append(class_data)
    labels.append(class_labels)

    
    print("Loading done: ", i)

# Concatenate data and labels
data = np.concatenate(data, axis=0)
labels = np.concatenate(labels, axis=0)
data[np.isnan(data)] = 0

print(f"Total data shape: {data.shape}")
print(f"Total labels shape: {labels.shape}")

Loading done:  0
Loading done:  1
Loading done:  2
Loading done:  3
Loading done:  4
Loading done:  5
Loading done:  6
Loading done:  7
Loading done:  8
Loading done:  9
Loading done:  10
Total data shape: (5500, 100, 13048, 2)
Total labels shape: (5500,)


In [5]:
# Split the data into training and validation sets
X_tmp, test_data, Y_tmp, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)
train_data, val_data, train_labels, val_labels = train_test_split(X_tmp, Y_tmp, test_size=0.2, random_state=44)

num_lrpair = data.shape[2]
num_classes = len(class_files)
print(num_lrpair, num_classes)

13048 11


In [6]:
# Create the CNN model
model = tf.keras.Sequential([
    # First Conv2D layer for reducing channels
    layers.Conv2D(8, (1, 1), activation='relu', kernel_initializer='he_normal', 
                  input_shape=(100, num_lrpair, 2)),
    # Second Conv2D layer
    layers.Conv2D(16, (10, 1), strides=(10, 1), activation='relu', kernel_initializer='he_normal'),
    layers.BatchNormalization(),
    # Additional Conv2D layers or other layers
    layers.Conv2D(16, (10, 1), strides=(10, 1), activation='relu', kernel_initializer='he_normal'),
    layers.MaxPooling2D((1, 4)),
    layers.Conv2D(32, (1, 4), activation='relu', kernel_initializer='he_normal'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((1, 2)),
    # Global Average Pooling layer
    layers.GlobalAveragePooling2D(),
    # Fully connected layers
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy', f1_m, precision_m, recall_m])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-07-18 13:47:11.284501: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [None]:
# Train the model
history = model.fit(train_data, train_labels, 
                    validation_data=(val_data, val_labels), 
                    epochs=12, batch_size=32)

In [None]:
# Evaluate the model
loss, accuracy, f1_score, precision, recall = model.evaluate(test_data, test_labels, verbose=1)

In [None]:
# Save the trained model
save_path = "cnn_model/"
if not os.path.exists(save_path):
    os.makedirs(save_path)

model.save(save_path + 'data_cnn-model_v01.h5')

## GradCAM++

In [None]:
from tf_keras_vis.gradcam import GradcamPlusPlus
from tf_keras_vis.utils.scores import CategoricalScore
from tf_keras_vis.utils.model_modifiers import ReplaceToLinear

# load data
model_paths = "cnn_model/"
model_names = sorted([f for f in os.listdir(model_paths) if f.endswith('.h5')])

class_names = ['Tip_Cells', 'activated capillary', 'Immature_Phenotype',
               'capillary_I', 'capillary_II', 'Activated_EC', 'TandNK', 
               'Epithelial', 'Myeloid', 'Fibro_Peri', 'B']

# Load L-R interaction gene data
file_path = 'DB/'
gene_list_df = pd.read_csv(file_path + 'CCIdb.csv')
genes = gene_list_df['TumorGene'].tolist()
genes1 = gene_list_df['OtherGene'].tolist()

In [11]:
# Ensure the save path exists
save_path = 'res/'
if not os.path.exists(save_path):
    os.makedirs(save_path)

for model_name in model_names:
    version_suffix = model_name.split('_')[-1].replace('.h5', '')
    model = tf.keras.models.load_model(model_paths + model_name,
                                  custom_objects={'f1_m':f1_m, 
                                                  'precision_m':precision_m, 
                                                  'recall_m':recall_m})

   # Create GradCAM++ object
    gradcam = GradcamPlusPlus(model, model_modifier=ReplaceToLinear(), clone=True)

    for class_index, class_name in enumerate(class_names):
        # Select data for the class
        start_index = class_index * 500  
        end_index = start_index + 500
        class_data = data[start_index:end_index]
        class_labels = np.full((500,), class_index)  
        
        # Generate heatmap
        cam_tot = np.mean([
            gradcam(CategoricalScore(label), np.expand_dims(sample, axis=0), penultimate_layer=-1)
            for label, sample in zip(class_labels, class_data)
        ], axis=0)
        
        # Normalize cam_tot
        cam_sum = np.mean(cam_tot[0], axis=0)
        cam_max, cam_min = cam_sum.max(), cam_sum.min()
        cam_mod = (cam_sum - cam_min) / (cam_max - cam_min)
        
        # Save results to file
        line_1st = f'TumorCell\t{class_name}\tNormalized_Weight'
        line_ext = [f'{genes[i]}\t{genes1[i]}\t{str(cam_mod[i])}' for i in range(len(genes))]
        
        file_name = f"gcamplus_result_{class_name}_{version_suffix}.txt"
        full_path = os.path.join(save_path, file_name)
        with open(full_path, "w") as f_out:
            f_out.write(line_1st + '\n')
            f_out.write('\n'.join(line_ext))
        
        print(f"Completed: {file_name}")

Completed: gcamplus_result_Tip_Cells_v08.txt
Completed: gcamplus_result_activated capillary_v08.txt
Completed: gcamplus_result_Immature_Phenotype_v08.txt
Completed: gcamplus_result_capillary_I_v08.txt
Completed: gcamplus_result_capillary_II_v08.txt
Completed: gcamplus_result_Activated_EC_v08.txt
Completed: gcamplus_result_TandNK_v08.txt
Completed: gcamplus_result_Epithelial_v08.txt
Completed: gcamplus_result_Myeloid_v08.txt
Completed: gcamplus_result_Fibro_Peri_v08.txt
Completed: gcamplus_result_B_v08.txt
