In [2]:
'''
Experiment Description: 复现黄军豪在2020年发表的关于BCMS的论文中的深度学习模型
DATA：本地医院的数据
'''
import tensorflow as tf 
import os
from tensorflow.keras import layers, Sequential
batch_size = 200
learning_rate = 1e-3
class_names = ['Her2', 'luminal_A', 'luminal_B', 'TN']#每个分类的名称

In [3]:
#获取train data文件路径
train_data_dir = '/media/ly/liangyi/研究课题——乳腺癌分子分型分类/Data/MRI_minbox/train/'
Her2_train_dir = train_data_dir + 'Her_2/'
luminal_A_train_dir = train_data_dir + 'luminal_A/'
luminal_B_train_dir = train_data_dir + 'luminal_B/'
TN_train_dir = train_data_dir + 'TN/'

Her2_train_names = [Her2_train_dir + filename for filename in os.listdir(Her2_train_dir)]
luminal_A_train_names = [luminal_A_train_dir + filename for filename in os.listdir(luminal_A_train_dir)]
luminal_B_train_names = [luminal_B_train_dir + filename for filename in os.listdir(luminal_B_train_dir)]
TN_train_names = [TN_train_dir + filename for filename in os.listdir(TN_train_dir)]
#训练集中各个分型的数量
num_Her2_train = len(Her2_train_names)
num_luminal_A_train = len(luminal_A_train_names)
num_luminal_B_train = len(luminal_B_train_names)
num_TN_train = len(TN_train_names)

print(num_Her2_train, num_luminal_A_train, num_luminal_B_train, num_TN_train)

189 125 148 121


In [4]:
train_image_names = Her2_train_names + luminal_A_train_names + luminal_B_train_names + TN_train_names
train_labels = [0] * num_Her2_train + [1] * num_luminal_A_train + [2] * num_luminal_B_train + [3] * num_TN_train

In [5]:
#map
def _map_loadImage(imagename, label):
    image_string = tf.io.read_file(imagename)
    image_decoded = tf.image.decode_png(image_string, channels=3)
    image_resized = tf.image.resize(image_decoded, [32,32])/255.0
    label = tf.cast(label, dtype=tf.int32)
    return image_resized, label

In [6]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_image_names, train_labels))
train_dataset = train_dataset.map(
    map_func=_map_loadImage,
    num_parallel_calls=tf.data.experimental.AUTOTUNE
)
train_dataset = train_dataset.shuffle(1000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

In [7]:
print(train_dataset)

<PrefetchDataset shapes: ((None, 32, 32, 3), (None,)), types: (tf.float32, tf.int32)>


In [8]:
#准备验证集
val_data_dir = '/media/ly/liangyi/研究课题——乳腺癌分子分型分类/Data/MRI_minbox/validation/'
Her2_val_dir = val_data_dir + 'Her_2/'
luminal_A_val_dir = val_data_dir + 'luminal_A/'
luminal_B_val_dir = val_data_dir + 'luminal_B/'
TN_val_dir = val_data_dir + 'TN/'

Her2_val_names = [Her2_val_dir + imagename for imagename in os.listdir(Her2_val_dir)]
luminal_A_val_names = [luminal_A_val_dir + imagename for imagename in os.listdir(luminal_A_val_dir)]
luminal_B_val_names = [luminal_B_val_dir + imagename for imagename in os.listdir(luminal_B_val_dir)]
TN_val_names = [TN_val_dir + imagename for imagename in os.listdir(TN_val_dir)]

num_Her2_val = len(Her2_val_names)
num_luminal_A_val = len(luminal_A_val_names)
num_luminal_B_val = len(luminal_B_val_names)
num_TN_val = len(TN_val_names)

print(num_Her2_val, num_luminal_A_val, num_luminal_B_val, num_TN_val)

81 54 63 52


In [9]:
val_image_names = Her2_val_names + luminal_A_val_names + luminal_B_val_names + TN_val_names
val_labels = [0] * num_Her2_val + [1] * num_luminal_A_val + [2] * num_luminal_B_val + [3] * num_TN_val

In [10]:
print(len(val_image_names), len(val_labels))

250 250


In [11]:
val_dataset = tf.data.Dataset.from_tensor_slices((val_image_names, val_labels))
val_dataset = val_dataset.map(_map_loadImage).batch(batch_size)

In [12]:
print(val_dataset)

<BatchDataset shapes: ((None, 32, 32, 3), (None,)), types: (tf.float32, tf.int32)>


In [13]:
conv_base = Sequential([
    #unit1
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.MaxPooling2D( pool_size=[2, 2], strides=2, padding="same"),

    #unit2
    layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.MaxPooling2D( pool_size=[2, 2], strides=2, padding="same"),

    #unit3
    layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.MaxPooling2D( pool_size=[2, 2], strides=2, padding="same"),

    #unit4
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.MaxPooling2D( pool_size=[2, 2], strides=2, padding="same"),

    #unit5
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu ),
    layers.MaxPooling2D( pool_size=[2, 2], strides=2, padding="same"),

])

In [14]:
conv_base.build(input_shape = [None, 32, 32, 3])
conv_base.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 16, 16, 128)       147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 128)         0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 8, 8, 256)         2

In [15]:
x = tf.random.normal([4,32,32,3])

In [16]:
out = conv_base(x)
print(out.shape)

(4, 1, 1, 512)


In [17]:
out = tf.squeeze(out)
print(out.shape)

(4, 512)


In [18]:
#构建全连接层
fc_net = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(64, activation=tf.nn.relu),
    tf.keras.layers.Dense(4, activation=tf.nn.relu)
])

In [19]:
fc_net.build(input_shape=[None, 512])

In [22]:
variables = conv_base.trainable_variables + fc_net.trainable_variables
optimizer = tf.keras.optimizers.Adam(lr = learning_rate)

In [23]:
for epoch in range(30):
    for step, (x, y) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            # [None, 32, 32, 3] --> [None, 1, 1, 512]
            out = conv_base(x)
            # [None, 1, 1, 512] --> [None, 512]
            out = tf.squeeze(out)
            # [None, 512] --> [None, 4]
            logits = fc_net(out)
            #comput loss
            y_onehot = tf.one_hot(y, depth=4)
            loss = tf.losses.binary_crossentropy(y_onehot, logits, from_logits=True)
            loss = tf.reduce_mean(loss)
        grads = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(grads, variabels))
        print(epoch, step, 'loss: ', float(loss))
    total_num = 0
    total_correct = 0
    for x, y in val_dataset:
        out = conv_base(x)
        out = tf.squeeze(out)
        logits = fc_net(out)
        prob = tf.nn.softmax(logits)
        pred = tf.argmax(prob, axis=1)
        pred = tf.cast(pred, dtype = tf.int32)

        correct = tf.cast(tf.equal(pred, y), dtype = tf.int32)

        correct = tf.reduce_sum(correct)
        total_num += x.shape[0]
        total_correct += int(correct)

    acc = total_correct/total_num
    print(epoch, 'acc: ',acc)



0 0 loss:  0.6933338046073914
0 1 loss:  0.693194568157196
0 2 loss:  0.6931471824645996
0 acc:  0.324
1 0 loss:  0.6931471824645996
1 1 loss:  0.6931471824645996
1 2 loss:  0.6939392685890198
1 acc:  0.324
2 0 loss:  0.6931471824645996
2 1 loss:  0.6931471824645996
2 2 loss:  0.6931471824645996
2 acc:  0.324
3 0 loss:  0.6931471824645996
3 1 loss:  0.6931471824645996
3 2 loss:  0.6931471824645996
3 acc:  0.324
4 0 loss:  0.6931471824645996
4 1 loss:  0.6931471824645996
4 2 loss:  0.6931471824645996
4 acc:  0.324
5 0 loss:  0.6931471824645996
5 1 loss:  0.6931471824645996
5 2 loss:  0.6931471824645996
5 acc:  0.324
6 0 loss:  0.6931471824645996
6 1 loss:  0.6931471824645996
6 2 loss:  0.6931471824645996
6 acc:  0.324
7 0 loss:  0.6931471824645996
7 1 loss:  0.6931471824645996
7 2 loss:  0.6931471824645996
7 acc:  0.324
8 0 loss:  0.6931471824645996
8 1 loss:  0.6931471824645996
8 2 loss:  0.6931471824645996
8 acc:  0.324
9 0 loss:  0.6931471824645996
9 1 loss:  0.6931471824645996
9 2 l