In [1]:
import tensorflow as tf
import numpy as np
import os
from tqdm import tqdm
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
from config_seg import cfg
from dataset import Dataset
from unet_pp.network import network as network_pp
from unet.network import network as network
from refinenet.network import network as network_refine
from Dupsample.network import network as network_dusample
from PSPnet.network import network as network_psp
from tensorflow.python.framework import graph_util
from loss import dice_loss, CrossEntropy_Loss
%matplotlib inline

class Classitrain(object):
    def __init__(self):
        self.Batch_Size = cfg.TRAIN.BATCHSIZE
        self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT
        self.num_classes = cfg.TRAIN.NUMCLASS
        self.trainset = Dataset(self.num_classes,'train')
        self.testset = Dataset(self.num_classes,'test')
        self.sess = tf.Session()
        self.model_type = cfg.TRAIN.BACKBONE
        self.input_size = cfg.TRAIN.INPUTSIZE
        self.interval = cfg.TRAIN.SAVE
        self.initial_weights = cfg.TRAIN.INITIAL_WEIGHT
        self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END
        self.pretrain_mode = cfg.TRAIN.PRETRAIN_MODE
        self.epoch = cfg.TRAIN.EPOCH
        self.warmup_periods = 2
        self.steps_per_period  = len(self.trainset)
        self.pretrain_model = cfg.TRAIN.BACKBONE_PRETRAIN
        self.moving_ave_decay = 0.9995
        
        self.input_data = tf.placeholder(dtype=tf.float32, name='input')
        self.input_labels = tf.placeholder(dtype=tf.float32, name='label')
        self.trainable = tf.placeholder(dtype=tf.bool, name='trainable')
        
        #self.preds = network_psp(input_=self.input_data, trainable=self.trainable)
        if self.model_type == 'unetpp':
            self.preds = network_pp(input_=self.input_data, trainable=self.trainable)
        else:
            self.preds = network(input_=self.input_data, trainable=self.trainable)
            
        self.loss_seg = dice_loss(label=self.input_labels, pred=self.preds)
        self.loss_ce = CrossEntropy_Loss(label=self.input_labels, pred=self.preds)
        self.loss = self.loss_ce
        self.net_var = tf.global_variables()
        self.varaibles_to_restore = [var for var in self.net_var if 'backbone' in var.name]
        
        with tf.name_scope('learn_rate'):
            self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step')
            warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period,
                                        dtype=tf.float64, name='warmup_steps')
            train_steps = tf.constant( self.epoch* self.steps_per_period,
                                        dtype=tf.float64, name='train_steps')
            self.learn_rate = tf.cond(
                pred=self.global_step < warmup_steps,
                true_fn=lambda: self.global_step / warmup_steps * self.learn_rate_init,
                false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) *
                                    (1 + tf.cos(
                                        (self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))
            )
            self.global_step_update = tf.assign_add(self.global_step, 1.0)
            
        #moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables())
        self.optimizer_seg =  tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss_seg, var_list=[var for var in self.net_var if 'fc_layer' not in var.name])
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            with tf.control_dependencies([self.optimizer_seg, self.global_step_update]):
                #with tf.control_dependencies([moving_ave]):
                self.train_op = tf.no_op()
        
            #self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learn_rate_init, momentum=0.8).minimize(self.loss)
        
        self.loader_backbone = tf.train.Saver(self.varaibles_to_restore)
        self.loader_whole = tf.train.Saver(tf.global_variables())
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
        
            

    def train(self):
        #self.sess.run(tf.global_variables_initializer())
        if self.pretrain_mode == 'whole':
            try:
                print ('=>Restore weights from ' + self.initial_weights)
                self.loader_whole.restore(self.sess, self.initial_weights)
            except:
                print (self.initial_weights + 'does not exist!')
                print ('=>starts training from scratch ...')
        else:
            try:
                print ('=>Restore weights from ' + self.pretrain_model)
                self.loader_backbone.restore(self.sess, self.pretrain_model)
            except:
                print (self.pretrain_model + 'does not exist!')
                print ('=>starts training from scratch ...')
        
        min_loss_val = 0.9
        min_loss_train = 0.8
        for epoch in range(self.epoch):
            pabr = tqdm(self.trainset)
            train_epoch_loss, test_epoch_loss = [], []
            train_epoch_acc, test_epoch_acc = [], []
            for train_data in pabr:
                _, train_step_loss,_  = self.sess.run([self.train_op, self.loss, self.global_step], feed_dict={self.input_data: train_data[0],
                                                         self.input_labels: train_data[1],
                                                         self.trainable: True
                                                         })
                
                train_epoch_loss.append(train_step_loss)
                pabr.set_description("train loss: %.2f" %train_step_loss)
            
            for test_data in self.testset:
                test_step_loss = self.sess.run(self.loss,  feed_dict={self.input_data: test_data[0],
                                                                      self.input_labels: test_data[1],
                                                                      self.trainable: False, 
                                                                      })

                test_epoch_loss.append(test_step_loss)

            train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss)
            print ('Epoch: %2d Train loss: %.2f'
                   %(epoch, train_epoch_loss))

            print ('Test loss: %.2f'
                   % (test_epoch_loss))
            
            
            if epoch >= 15 and test_epoch_loss < min_loss_val:
                min_loss_val = test_epoch_loss
                min_loss_train = train_epoch_loss
                constant_graph = graph_util.convert_variables_to_constants(self.sess, self.sess.graph_def, ['decoder/op_to_store'])
                model_name = self.model_type+'_epoch=%d' %epoch
#                model_name = 'PSP_epoch=%d' %epoch
#                 ckpt_file = '/hdd/sd5/tlc/PDL1/Model/Model_ckpt/'+ model_name + '_test_loss=%.4f.ckpt' %test_epoch_loss
#                 self.saver.save(self.sess, ckpt_file, global_step=epoch)
                with tf.gfile.FastGFile('/hdd/sd5/tlc/PDL1/Model/Model_pb_ce/'+model_name+'.pb', mode='wb') as f:
                    f.write(constant_graph.SerializeToString())
                
                

    def main(self):
        self.sess.run(tf.global_variables_initializer())
        self.train()
        
if __name__ == '__main__': 
    Classitrain().main() 

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use keras.layers.conv2d_transpose instead.
[None, None, None, 3]
Instructions for updating:
Use tf.cast instead.
=>Restore weights from /ssd2/tlc/pretrain_model/U-net/res18_epoch=29_test_loss=0.6633.ckpt-29
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /ssd2/tlc/pretrain_model/U-net/res18_epoch=29_test_loss=0.6633.ckpt-29


  0%|          | 0/365 [00:00<?, ?it/s]

/ssd2/tlc/pretrain_model/U-net/res18_epoch=29_test_loss=0.6633.ckpt-29does not exist!
=>starts training from scratch ...


train loss: 0.63: 100%|██████████| 365/365 [05:11<00:00,  1.17it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  0 Train loss: 0.71
Test loss: 0.82


train loss: 0.64: 100%|██████████| 365/365 [05:06<00:00,  1.19it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  1 Train loss: 0.64
Test loss: 0.61


train loss: 0.62: 100%|██████████| 365/365 [04:48<00:00,  1.26it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  2 Train loss: 0.63
Test loss: 0.62


train loss: 0.62: 100%|██████████| 365/365 [04:41<00:00,  1.30it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  3 Train loss: 0.63
Test loss: 0.61


train loss: 0.62: 100%|██████████| 365/365 [04:44<00:00,  1.28it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  4 Train loss: 0.62
Test loss: 0.67


train loss: 0.60: 100%|██████████| 365/365 [04:42<00:00,  1.29it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  5 Train loss: 0.62
Test loss: 0.62


train loss: 0.61: 100%|██████████| 365/365 [04:54<00:00,  1.24it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  6 Train loss: 0.61
Test loss: 0.61


train loss: 0.62: 100%|██████████| 365/365 [04:47<00:00,  1.27it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  7 Train loss: 0.61
Test loss: 0.60


train loss: 0.61: 100%|██████████| 365/365 [05:02<00:00,  1.21it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  8 Train loss: 0.61
Test loss: 0.61


train loss: 0.60: 100%|██████████| 365/365 [05:03<00:00,  1.20it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch:  9 Train loss: 0.61
Test loss: 0.61


train loss: 0.61: 100%|██████████| 365/365 [04:53<00:00,  1.24it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch: 10 Train loss: 0.61
Test loss: 0.60


train loss: 0.61: 100%|██████████| 365/365 [04:40<00:00,  1.30it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch: 11 Train loss: 0.61
Test loss: 0.60


train loss: 0.60: 100%|██████████| 365/365 [04:32<00:00,  1.34it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch: 12 Train loss: 0.61
Test loss: 0.60


train loss: 0.61: 100%|██████████| 365/365 [04:45<00:00,  1.28it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch: 13 Train loss: 0.60
Test loss: 0.61


train loss: 0.60: 100%|██████████| 365/365 [04:22<00:00,  1.39it/s]
  0%|          | 0/365 [00:00<?, ?it/s]

Epoch: 14 Train loss: 0.60
Test loss: 0.60


train loss: 0.61: 100%|██████████| 365/365 [04:09<00:00,  1.46it/s]


Epoch: 15 Train loss: 0.60
Test loss: 0.60
Instructions for updating:
Use tf.compat.v1.graph_util.convert_variables_to_constants
Instructions for updating:
Use tf.compat.v1.graph_util.extract_sub_graph
INFO:tensorflow:Froze 276 variables.
INFO:tensorflow:Converted 276 variables to const ops.
Instructions for updating:
Use tf.gfile.GFile.


train loss: 0.61: 100%|██████████| 365/365 [04:14<00:00,  1.43it/s]


Epoch: 16 Train loss: 0.60
Test loss: 0.60
INFO:tensorflow:Froze 276 variables.
INFO:tensorflow:Converted 276 variables to const ops.


train loss: 0.62:  27%|██▋       | 98/365 [01:05<03:01,  1.47it/s]

KeyboardInterrupt: 