In [1]:
import time
import warnings
warnings.filterwarnings("ignore", message=r"Passing", category=FutureWarning)
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import keras
import numpy as np
from nets.unet import Unet
from nets.unet_xception import UnetXception
from nets.unet_training import Generator, dice_loss_with_CE, CE
from nets.losses import tversky_loss_CE
from keras.utils.data_utils import get_file
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger
from keras.metrics import categorical_accuracy
from keras import backend as K
from keras import losses
from PIL import Image
from utils.metrics import Iou_score, f_score
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session  
config = tf.ConfigProto()  
config.gpu_options.allow_growth = True  
# config.gpu_options.per_process_gpu_memory_fraction = 0.5
set_session(tf.Session(config=config))  

Using TensorFlow backend.


# 从头训练

In [2]:
# config

inputs_size = [512,512,3]
log_dir = "logs_512/20230605"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
#---------------------#
#   分类个数+1
#---------------------#
num_classes = 2
#--------------------------------------------------------------------#
#   建议选项：
#   种类少（几类）时，设置为True
#   种类多（十几类）时，如果batch_size比较大（10以上），那么设置为True
#   种类多（十几类）时，如果batch_size比较小（10以下），那么设置为False
#---------------------------------------------------------------------# 
dice_loss = True
train_file = 'VOCdevkit/train_20230601/ImageSets/Segmentation/train.txt'
val_file = 'VOCdevkit/train_20230601/ImageSets/Segmentation/val.txt'
img_dir = 'VOCdevkit/train_20230601/JPEGImages'
mask_dir = 'VOCdevkit/train_20230601/SegmentationClass/'

In [3]:
# 获取model
K.clear_session()
K.set_learning_phase(1)
model = UnetXception(inputs_size,num_classes, drop_rate=0.8)
# model.summary()



Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [4]:
# 打开数据集的txt
with open(train_file,"r") as f:
    train_lines = f.readlines()

# 打开数据集的txt
with open(val_file,"r") as f:
    val_lines = f.readlines()

In [5]:
# 保存的方式，3世代保存一次
checkpoint_period = ModelCheckpoint(os.path.join(log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                    monitor='val_loss', save_weights_only=False, save_best_only=True, period=2)
# 学习率下降的方式，acc三次不下降就下降学习率继续训练
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
# 是否需要早停，当val_loss一直不下降的时候意味着模型基本训练完毕，可以停止
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1)
# tensorboard
tensorboard = TensorBoard(log_dir=log_dir)
csvlogger = CSVLogger(os.path.join(log_dir, 'loss.csv'), append=True)

In [6]:
lr = 1e-5

initial_epoch = 0
end_epoch  = 150
Batch_size = 8
# 交叉熵
model.compile(loss = tversky_loss_CE(alpha=0.5),
        optimizer = Adam(lr=lr),
        metrics = [f_score()])
print('Train on {} samples, val on {} samples, with batch size {}.'.format(len(train_lines), len(val_lines), Batch_size))

gen = Generator(Batch_size, train_lines, inputs_size, num_classes, img_dir, mask_dir).generate(True)
gen_val = Generator(Batch_size, val_lines, inputs_size, num_classes, img_dir, mask_dir).generate(False)
# 开始训练
model.fit_generator(gen,
                    steps_per_epoch=len(train_lines)//Batch_size, 
                    validation_data=gen_val,
                    validation_steps=len(val_lines)//Batch_size,
                    epochs=end_epoch,
                    initial_epoch=initial_epoch,
                    workers=1,
                    use_multiprocessing=False,
                    callbacks=[checkpoint_period, reduce_lr,tensorboard, csvlogger])

Train on 1419 samples, val on 355 samples, with batch size 8.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/150


ValueError: cannot reshape array of size 2359296 into shape (512,512,3)

In [None]:
cv

# fine-tune

In [None]:
model.load_weights('logs_512/20230424_LC/ep050-loss0.199-val_loss0.209.h5')

In [None]:
model.compile(loss = tversky_loss_CE(alpha=0.7),
            optimizer = Adam(lr=5e-6),
            metrics = [f_score()])

In [None]:
gen = Generator(4, train_lines, inputs_size, num_classes, img_dir, mask_dir).generate(True)
gen_val = Generator(4, val_lines, inputs_size, num_classes, img_dir, mask_dir).generate(False)
# 开始训练
model.fit_generator(gen,
                    steps_per_epoch=len(train_lines)//4, 
                    validation_data=gen_val,
                    validation_steps=len(val_lines)//4,
                    epochs=150,
                    initial_epoch=50,
                    workers=1,
                    use_multiprocessing=False,
                    callbacks=[checkpoint_period,tensorboard, csvlogger])

## FINE TUNE 

In [None]:
from callbacks import WarmUpCosineDecayScheduler, ExponentDecayScheduler

In [None]:
# 保存的方式，3世代保存一次
checkpoint_period = ModelCheckpoint(os.path.join(log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                    monitor='val_loss', save_weights_only=False, save_best_only=True, period=1)
# 学习率下降的方式，acc三次不下降就下降学习率继续训练
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
reduce_lr   = ExponentDecayScheduler(decay_rate = 0.92, verbose = 1)
# 是否需要早停，当val_loss一直不下降的时候意味着模型基本训练完毕，可以停止
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1)
# tensorboard
tensorboard = TensorBoard(log_dir=log_dir)
csvlogger = CSVLogger(os.path.join(log_dir, 'loss.csv'), append=True)

In [None]:
lr = 1e-6
Freeze_Epoch = 100
Unfreeze_Epoch = 200
Batch_size = 8
# 交叉熵
model.compile(loss = dice_loss_with_CE() if dice_loss else CE(),
        optimizer = Adam(lr=lr),
        metrics = [f_score()])
print('Train on {} samples, val on {} samples, with batch size {}.'.format(len(train_lines), len(val_lines), Batch_size))

gen = Generator(Batch_size, train_lines, inputs_size, num_classes, img_dir, mask_dir).generate(True)
gen_val = Generator(Batch_size, val_lines, inputs_size, num_classes, img_dir, mask_dir).generate(False)
# 开始训练
model.fit_generator(gen,
                    steps_per_epoch=len(train_lines)//Batch_size, 
                    validation_data=gen_val,
                    validation_steps=len(val_lines)//Batch_size,
                    epochs=Unfreeze_Epoch,
                    initial_epoch=Freeze_Epoch,
                    workers=1,
                    use_multiprocessing=False,
                    callbacks=[checkpoint_period, reduce_lr,tensorboard, csvlogger])

# Loss visualization

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
losses = pd.read_csv('logs_512/20220629_ESCA/loss.csv')

In [None]:
losses

In [None]:
loss_train = losses['loss']
loss_val = losses['val_loss']
epochs = losses['epoch']

In [None]:
plt.figure(figsize=(8,8))
plt.plot(epochs, loss_train,  label='Training loss')
plt.plot(epochs, loss_val,  label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('./losses.png')
plt.show()

In [None]:
loss_train = losses['_f_score']
loss_val = losses['val__f_score']
epochs = losses['epoch']

In [None]:
plt.figure(figsize=(8,8))
plt.plot(epochs, loss_train,  label='Training f score')
plt.plot(epochs, loss_val,  label='validation f score')
plt.title('Training and Validation f score')
plt.xlabel('Epochs')
plt.ylabel('f score')
plt.legend()
plt.savefig('./f_score.png')
plt.show()