In [None]:
import numpy as np
import pandas as pd 

import os
import matplotlib as mpl
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from skimage.feature import graycomatrix, graycoprops

from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import EfficientNetB0
import tensorflow as tf
print(f'The tensorflow version is {tf.__version__}')
from tensorflow import keras
from tensorflow.keras import layers, mixed_precision
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.callbacks import ModelCheckpoint

import warnings
warnings.filterwarnings("ignore")


In [None]:
data_dir = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set'
filepaths = []
labels = []

folds = os.listdir(data_dir)
for fold in folds:
    foldpath = os.path.join(data_dir, fold)
    flists = os.listdir(foldpath)
    for f in flists:
        f_paths = os.path.join(foldpath, f)
        filelist = os.listdir(f_paths)
        for file in filelist:
            fpath = os.path.join(f_paths, file)
            filepaths.append(fpath)
            if f == 'colon_aca':
                labels.append('Colon Adenocarcinoma')
            elif f == 'colon_n':
                labels.append('Colon Benign Tissue')
            elif f == 'lung_aca':
                labels.append('Lung Adenocarcinoma')
            elif f == 'lung_n':
                labels.append('Lung Benign Tissue')
            elif f == 'lung_scc':
                labels.append('Lung Squamous Cell Carcinoma')

Fseries = pd.Series(filepaths, name = 'filepaths')
Lseries = pd.Series(labels, name = 'labels')
df = pd.concat([Fseries, Lseries], axis = 1)

df.head()
print(df.labels.value_counts())

In [None]:
df

In [None]:
img = cv2.imread(df.filepaths[0])  # BGR格式

height, width, channels = img.shape
print(f"图像尺寸: {width}x{height}, 通道数: {channels}")

In [None]:
train_df, temp_df = train_test_split(
    df,
    test_size = 0.2,
    shuffle = True,
    stratify = df['labels'],
    random_state = 123
)

valid_df, test_df = train_test_split(
    temp_df,
    test_size = 0.4,
    shuffle = True,
    stratify = temp_df['labels'],
    random_state = 123
)

print(f'the train dataset size is {len(train_df)}')
print(f'the validation dataset size is {len(valid_df)}')
print(f'the test dataset size is {len(test_df)}')

In [None]:
batch_size = 64
image_size = (224, 224)
image_shape = (image_size[0], image_size[1], channels)


# Load training and validation sets
datagen = ImageDataGenerator(rescale=1./255)
train_gen = datagen.flow_from_dataframe(
    dataframe = train_df,
    x_col='filepaths',
    y_col='labels',
    target_size = image_size,
    class_mode='categorical',
    # color_mode= 'rgb',
    shuffle=True,
    batch_size=batch_size
)

valid_gen = datagen.flow_from_dataframe(
    dataframe = valid_df,
    x_col = 'filepaths',
    y_col = 'labels',
    target_size = image_size,
    class_mode='categorical',
    # color_mode= 'rgb',
    shuffle=True,
    batch_size=batch_size
)

test_gen = datagen.flow_from_dataframe(
    dataframe = test_df,
    x_col = 'filepaths',
    y_col = 'labels',
    target_size = image_size,
    class_mode='categorical',
    # color_mode= 'rgb',
    shuffle=False,
    batch_size=batch_size
)

In [None]:
g_dict = train_gen.class_indices      # defines dictionary {'class': index}
classes = list(g_dict.keys())         # defines list of dictionary's keys (class names)
images, labels = next(train_gen)      # get a batch size samples from the generator

plt.figure(figsize=(20, 20))

for i in range(16):
    plt.subplot(4, 4, i + 1)
    image = images[i]   # No need to divide by 255 again
    plt.imshow(image)  # Display image (if it's BGR, convert to RGB as necessary)
    index = np.argmax(labels[i])  # Get image index (one-hot to class index)
    class_name = classes[index]   # Get class name from index
    plt.title(class_name, color='blue', fontsize=12)
    plt.axis('off')  # Hide axis for cleaner image display

plt.show()

In [None]:
print(g_dict)

# Establish model

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense

# 定义输入
input_shape = (224, 224, 3)
inputs = Input(shape=input_shape)

# 卷积块 1
x = Conv2D(64, (3,3), padding="same", activation="relu")(inputs)
x = Conv2D(64, (3,3), padding="same", activation="relu")(x)
x = MaxPooling2D((2, 2))(x)

# 卷积块 2
x = Conv2D(128, (3,3), padding="same", activation="relu")(x)
x = Conv2D(128, (3,3), padding="same", activation="relu")(x)
x = MaxPooling2D((2, 2))(x)

# 卷积块 3
x = Conv2D(256, (3,3), padding="same", activation="relu")(x)
x = Conv2D(256, (3,3), padding="same", activation="relu")(x)
x = Conv2D(256, (3,3), padding="same", activation="relu")(x)
x = MaxPooling2D((2, 2))(x)

# 卷积块 4
x = Conv2D(512, (3,3), padding="same", activation="relu")(x)
x = Conv2D(512, (3,3), padding="same", activation="relu")(x)
x = Conv2D(512, (3,3), padding="same", activation="relu")(x)
x = MaxPooling2D((2, 2))(x)

# 卷积块 5
x = Conv2D(512, (3,3), padding="same", activation="relu")(x)
x = Conv2D(512, (3,3), padding="same", activation="relu")(x)
x = Conv2D(512, (3,3), padding="same", activation="relu")(x)
x = MaxPooling2D((2, 2))(x)

# 全连接层
x = Flatten()(x)
x = Dense(256, activation="relu")(x)
x = Dense(64, activation="relu")(x)
outputs = Dense(5, activation="softmax")(x)

# 构建模型
model = Model(inputs=inputs, outputs=outputs)


# Train and Evaluate

In [None]:
mixed_precision.set_global_policy('mixed_float16')
epochs = 20

model.compile(
    optimizer = Adamax(learning_rate = 0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


checkpoint = ModelCheckpoint(
    filepath='best_model.h5',   
    monitor='val_accuracy',     
    save_best_only=True,        
    mode='max',                 
    verbose=1
)

history = model.fit(
    train_gen,
    epochs=epochs,
    verbose=1,
    validation_data = valid_gen,
    callbacks = checkpoint
)

In [None]:
# model = load_model('/kaggle/input/best-model/best_model.h5')

test_loss, test_acc = model.evaluate(test_gen, verbose=1)
print(f"测试集准确率：{test_acc:.4f}")

import matplotlib.pyplot as plt

print('训练过程测试集和验证集Accuracy变化：')
plt.plot(history.history['accuracy'], label='train acc')
plt.plot(history.history['val_accuracy'], label='val acc')
plt.legend()
plt.show()

# Prediction

In [None]:
from PIL import Image # 加载图像

correct_preds = 0
total_preds = 10  # 抽取的图像数量

# 抽取 10 个图像并计算正确率
for _ in range(total_preds):
    row = df.sample(n=1).iloc[0]  # 随机选择一行
    img_path = row['filepaths']
    true_label = row['labels']

    # ======================
    # 读取并预处理图像
    image = Image.open(img_path)
    img = image.resize((224, 224))  # 调整到目标大小
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = img_array / 255.0  # 归一化
    img_array = tf.expand_dims(img_array, 0)  # 扩展为 batch_size = 1

    # ======================
    # 预测
    preds = model.predict(img_array)
    pred_class = np.argmax(preds, axis=1)[0]  # 获取预测类别索引

    # 获取类别名称
    class_indices = {v: k for k, v in train_gen.class_indices.items()}  # 获取类别映射
    pred_label = class_indices[pred_class]

    # 比较真实标签与预测标签
    if true_label == pred_label:
        correct_preds += 1

    print(f"真实标签: {true_label}, 预测标签: {pred_label}, 概率: {preds[0][pred_class]:.4f}")

# 计算准确率
accuracy = correct_preds / total_preds
print(f"\n总的准确率: {accuracy * 100:.2f}%")

# Grad-CAM Heatmap

In [None]:
# 图像尺寸是 224x224
img_size = (224, 224)

# 从 DataFrame 中随机取一张图像
row = df.sample(n=1).iloc[0]
img_path = row['filepaths']
true_label = row['labels']

import IPython
IPython.display.display(IPython.display.Image(img_path))

## Grad-Cam 算法

In [None]:
def get_img_array(img_path, img_size):
    image = Image.open(img_path)
    img = image.resize((224, 224))  # 调整到目标大小
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    
    # 如果你的模型是用 rescale=1./255 训练的，就归一化
    img_array = img_array / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    return img_array


def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer as well as the output predictions
    grad_model = keras.models.Model(
        inputs=model.inputs, 
        outputs=[model.get_layer(last_conv_layer_name).output, model.output]
    )

    # Then, we compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last conv layer
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # This is the gradient of the output neuron (top predicted or chosen)
    # with regard to the output feature map of the last conv layer
    grads = tape.gradient(class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    # then sum all the channels to obtain the heatmap class activation
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()


In [None]:
# 获取你模型中最后一个卷积层的名字
last_conv_layer_name = [layer.name for layer in model.layers if isinstance(layer, tf.keras.layers.Conv2D)][-1]

# 生成 heatmap
img_array = get_img_array(img_path, img_size)
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)

plt.matshow(heatmap)
plt.show()

In [None]:
def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
    # Load the original image
    img = keras.utils.load_img(img_path)
    img = keras.utils.img_to_array(img)

    # Rescale heatmap to a range 0-255
    heatmap = np.uint8(255 * heatmap)

    # Use jet colormap to colorize heatmap
    jet = mpl.colormaps["jet"]

    # Use RGB values of the colormap
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]

    # Create an image with RGB colorized heatmap
    jet_heatmap = keras.utils.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = keras.utils.img_to_array(jet_heatmap)

    # Superimpose the heatmap on original image
    superimposed_img = jet_heatmap * alpha + img
    superimposed_img = keras.utils.array_to_img(superimposed_img)

    # Save the superimposed image
    superimposed_img.save(cam_path)

    # Display Grad CAM
    IPython.display.display(IPython.display.Image(cam_path))


save_and_display_gradcam(img_path, heatmap)
