In [3]:
'''
Function Description：使用现有的VGG16搭建一个神经网络以实现乳腺癌分子分型的分类
'''
import tensorflow as tf 
import os
import numpy as np 
#加载训练数据
#以下是train和test的图片集合
batch_size = 20
learning_rate = 1e-3
class_names = ['Her2', 'luminal_A', 'luminal_B', 'TN']#每个分类的名称

In [4]:
#获取train的文件路径
train_data_dir = '/media/ly/liangyi/分子分型/X线MRI共有/MRI_minbox/train'
validation_data_dir = '/media/ly/liangyi/分子分型/X线MRI共有/MRI_minbox/validation'
Her2_train_data_dir = train_data_dir + '/Her_2/'
luminal_A_train_data_dir = train_data_dir + '/luminal_A/'
luminal_B_train_data_dir = train_data_dir + '/luminal_B/'
TN_train_data_dir = train_data_dir + '/TN/'

#将train的图片名依次读取
Her2_train_data_name = [Her2_train_data_dir + filename for filename in os.listdir(Her2_train_data_dir)]
luminal_A_train_data_name = [luminal_A_train_data_dir + filename for filename in os.listdir(luminal_A_train_data_dir)]
luminal_B_train_data_name = [luminal_B_train_data_dir + filename for filename in os.listdir(luminal_B_train_data_dir)]
TN_train_data_name = [TN_train_data_dir + filename for filename in os.listdir(TN_train_data_dir)]

train_data_name = Her2_train_data_name + luminal_A_train_data_name + luminal_B_train_data_name + TN_train_data_name
train_data_name = tf.convert_to_tensor(train_data_name)

#用0,1,2,3分别代表四种类型
train_data_labels = [0]* len(Her2_train_data_name) + [1]*len(luminal_A_train_data_name) + [2]*len(luminal_B_train_data_name) + [3] * len(TN_train_data_name)
train_data_labels = tf.convert_to_tensor(train_data_labels, dtype=tf.int64)
#train_data_name = np.array(train_data_name)
#train_data_labels = np.array(train_data_labels)


In [5]:
print(train_data_name.shape)
print(train_data_labels.shape)

(583,)
(583,)


In [37]:
print(len(Her2_train_data_name), len(luminal_A_train_data_name), len(luminal_B_train_data_name), len(TN_train_data_name))

189 125 148 121


In [6]:
#获取验证图像的文家路径
Her2_val_data_dir = validation_data_dir + '/Her_2/'
luminal_A_val_data_dir = validation_data_dir + '/luminal_A/'
luminal_B_val_data_dir = validation_data_dir + '/luminal_B/'
TN_val_data_dir = validation_data_dir + '/TN/'
#获取验证图像的文件名称
Her2_val_data_name = [Her2_val_data_dir + filename for filename in os.listdir(Her2_val_data_dir)]
luminal_A_val_data_name = [luminal_A_val_data_dir + filename for filename in os.listdir(luminal_A_val_data_dir)]
luminal_B_val_data_name = [luminal_B_val_data_dir + filename for filename in os.listdir(luminal_B_val_data_dir)]
TN_val_data_name = [TN_val_data_dir + filename for filename in os.listdir(TN_val_data_dir)]
val_data_name = Her2_val_data_name + luminal_A_val_data_name + luminal_B_val_data_name + TN_val_data_name

val_data_labels = [0] * len(Her2_val_data_name) + [1] * len(luminal_A_val_data_name) + [2] * len(luminal_B_val_data_name) + [3] * len(TN_val_data_name)
#val_data_name = np.array(val_data_name)
#val_data_labels = np.array(val_data_labels)
val_data_name = tf.convert_to_tensor(val_data_name)
val_data_labels = tf.convert_to_tensor(val_data_labels)

In [7]:
print(val_data_name.shape)
print(val_data_labels.shape)

(250,)
(250,)


In [8]:
#创建映射函数
def _map(imagename, label):
    image_string = tf.io.read_file(imagename)
    image_decoded = tf.image.decode_png(image_string, channels = 1)
    image_resize = tf.image.resize(image_decoded, [28,28])/255.0
    return image_resize, label


In [9]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_data_name, train_data_labels))


In [10]:
train_dataset = train_dataset.map(
    map_func = _map,
    num_parallel_calls= tf.data.experimental.AUTOTUNE
)

In [11]:
#对训练集合做出设定， 
train_dataset = train_dataset.shuffle(buffer_size = 1000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

In [12]:
val_dataset = tf.data.Dataset.from_tensor_slices((val_data_name, val_data_labels))
val_dataset = val_dataset.map(_map).batch(batch_size)

In [13]:
print(train_dataset, val_dataset)# dataset 本身无法方便地获取元素

<PrefetchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)> <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int32)>


In [14]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation = tf.nn.relu, input_shape = (28,28,1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 5, activation = tf.nn.relu),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation = tf.nn.relu),
    tf.keras.layers.Dense(4, activation = tf.nn.sigmoid)
])

In [15]:
num_epochs = 30

In [16]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate = learning_rate),
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    metrics=[tf.keras.metrics.sparse_categorical_accuracy]
)

In [17]:
model.fit(train_dataset, epochs = num_epochs)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7fe018ec3d50>

In [18]:
model.evaluate(val_dataset)



[1.128541350364685, 0.5199999809265137]

In [32]:
print(val_dataset)

<BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int32)>


In [33]:
model1 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(64, 3, activation = tf.nn.relu, input_shape = (28,28,1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(128, 3, activation = tf.nn.relu),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(128, 3, activation = tf.nn.relu),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation = tf.nn.relu),
    tf.keras.layers.Dense(64, activation = tf.nn.relu),
    tf.keras.layers.Dense(4, activation = tf.nn.sigmoid)
])
model1.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate = learning_rate),
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    metrics=[tf.keras.metrics.sparse_categorical_accuracy]
)

In [36]:
model1.fit(train_dataset, validation_data = val_dataset, epochs = 40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7fdf244ae450>