In [1]:
import tensorflow as tf
import numpy as np
import os
from tqdm import tqdm
os.environ['CUDA_VISIBLE_DEVICES'] = '5'
from config_reg import cfg
from dataset import Dataset
from factory_reg import backbone
from random import randint
from tensorflow.python.framework import graph_util
import pandas as pd

class Regression(object):
    def __init__(self):
        self.Batch_Size = cfg.TRAIN.BATCHSIZE
        self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT
        self.num_classes = cfg.TRAIN.REG_NUM
        self.trainset = Dataset(self.num_classes,'train')
        self.testset = Dataset(self.num_classes,'test')
        self.network = cfg.TRAIN.NETWORK
        self.train_txt = cfg.TRAIN.ANNO_PATH
        self.sess = tf.Session()
        self.model_type = cfg.TRAIN.NETWORK
        self.input_size = cfg.TRAIN.INPUTSIZE
        self.interval = cfg.TRAIN.SAVE
        #self.excel_path = './result.xlsx'
        #self.df = pd.DataFrame(columns=['Source', 'Quantity', 'Model_name', 'Epoch', 'Loss', 'Acc'])
        self.initial_weights = cfg.TRAIN.INITIAL_WEIGHT
        self.pretrain_mode = cfg.TRAIN.PRETRAIN_MODE
        self.epoch = cfg.TRAIN.EPOCH
        self.pretrain_model = cfg.TRAIN.BACKBONE_PRETRAIN
        self.moving_ave_decay = cfg.TRAIN.MOMENTUM
        self.save_path_ckpt = cfg.TRAIN.SAVE_PATH_CKPT
        self.save_path_pb = cfg.TRAIN.SAVE_PATH_PB
        self.quantity = len(open(self.train_txt, 'r').readlines())
        self.source = ''
        #self.get_src()
        self.keep_prob = tf.placeholder(dtype=tf.float32, name='dropout')
        #self.writer = pd.ExcelWriter(self.excel_path)
        self.moving_ave_decay = 0.995
        
       
        self.input_data = tf.placeholder(shape = [None, self.input_size, self.input_size, 3], dtype=tf.float32, name='input')
        self.input_labels = tf.placeholder(shape = [None, self.num_classes], dtype=tf.float32, name='label')
        self.input_scale = tf.placeholder(shape = [None, 1], dtype=tf.float32, name='scale')
        self.trainable = tf.placeholder(dtype=tf.bool, name='trainable')

        
        self.model = backbone(model=self.network, input_data=self.input_data, trainable=self.trainable, classes=self.num_classes, keep_prob=1.0, scale=self.input_scale)
        self.loss = self.model.compute_loss(labels=self.input_labels)
        self.net_var = tf.trainable_variables()
        self.varaibles_to_restore = [var for var in self.net_var if 'backbone' in var.name]
        
            
        #moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables())
        self.global_step = tf.Variable(1.0, dtype=tf.float32, trainable=False, name='global_step')
        self.global_step_update = tf.assign_add(self.global_step, 1.0)
        self.optimizer =  tf.train.AdamOptimizer(self.learn_rate_init).minimize(self.loss, var_list=self.net_var)
        #self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learn_rate_init, momentum=0.8).minimize(self.loss)
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            with tf.control_dependencies([self.optimizer, self.global_step_update]):
                #with tf.control_dependencies([moving_ave]):
                self.train_op = tf.no_op()
        
            #self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learn_rate_init, momentum=0.8).minimize(self.loss)
        
        self.loader_backbone = tf.train.Saver(self.varaibles_to_restore)
        self.loader_whole = tf.train.Saver(tf.global_variables())
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
        
        
#         tensor_name_list = [tensor.name for tensor in tf.get_default_graph().as_graph_def().node]
#         for tensor_name in tensor_name_list:
#             print(tensor_name,'\n')

        

    def train(self):
        #self.sess.run(tf.global_variables_initializer())
        if self.pretrain_mode == 'whole':
            try:
                print ('=>Restore weights from ' + self.initial_weights)
                self.loader_whole.restore(self.sess, self.initial_weights)
            except:
                print (self.initial_weights + 'does not exist!')
                print ('=>starts training from scratch ...')
        else:
            try:
                print ('=>Restore weights from ' + self.pretrain_model)
                self.loader_backbone.restore(self.sess, self.pretrain_model)
            except:
                print (self.pretrain_model + 'does not exist!')
                print ('=>starts training from scratch ...')
        
        min_loss_val = 20
        min_loss_train = 20
        i = 0
        for epoch in range(self.epoch):
            pabr = tqdm(self.trainset)
            train_epoch_loss, test_epoch_loss = [], []
            #train_epoch_acc, test_epoch_acc = [], []
            for train_data in pabr:
                _, train_step_loss = self.sess.run([self.train_op, self.loss], feed_dict={self.input_data: train_data[0],
                                                         self.input_labels: train_data[1],
                                                         self.input_scale: train_data[2],
                                                         self.trainable: True,
                                                         })
                
                train_epoch_loss.append(train_step_loss)
                #train_epoch_acc.append(train_step_acc)
                pabr.set_description("train loss: %.2f" %train_step_loss)
            
            for test_data in self.testset:
                test_step_loss = self.sess.run([self.loss],
                                                                feed_dict={self.input_data: test_data[0],
                                                                           self.input_labels: test_data[1],
                                                                           self.input_scale: test_data[2],
                                                                           self.trainable: False, 
                                                                           })

                test_epoch_loss.append(test_step_loss)
                #test_epoch_acc.append(test_step_acc)

            train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss)
            print ('Epoch: %2d Train loss: %.2f'
                   %(epoch, train_epoch_loss))

            print ('Test loss: %.2f '
                   % (test_epoch_loss))
            
            
            if epoch >= 5 and test_epoch_loss < min_loss_val and train_epoch_loss < min_loss_train:
                min_loss_val = test_epoch_loss
                min_loss_train = train_epoch_loss
                constant_graph = graph_util.convert_variables_to_constants(self.sess, self.sess.graph_def, ['fc_layer/op_to_store'])
                model_name = self.model_type+'_epoch=%d' %epoch
                ckpt_file = self.save_path_ckpt + model_name + '_test_loss=%.4f.ckpt' %test_epoch_loss
                #self.saver.save(self.sess, ckpt_file, global_step=epoch)
                with tf.gfile.FastGFile(self.save_path_pb + model_name+'.pb', mode='wb') as f:
                    f.write(constant_graph.SerializeToString())
#                 self.df.loc[i] = [randint(-1, 1) for _ in range(len(self.df.columns))]
#                 self.df.iloc[i,0] = self.source
#                 self.df.iloc[i,1] = str(self.quantity)
#                 self.df.iloc[i,2] = model_name
#                 self.df.iloc[i,3] = str(epoch)
#                 self.df.iloc[i,4] = str(test_epoch_loss)
#                 self.df.iloc[i,5] = str(test_epoch_acc)
#                 i += 1
                
                
#         self.df.to_excel(self.writer, 'Sheet1')
#         self.writer.save()
                

    def main(self):
        self.sess.run(tf.global_variables_initializer())
        self.train()
        
if __name__ == '__main__':
    Regression().main()





Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
=>Restore weights from /hdd/sd5/tlc/pretrain_model/darknet53/yolov3_test_loss=136.5372.ckpt-80
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /hdd/sd5/tlc/pretrain_model/darknet53/yolov3_test_loss=136.5372.ckpt-80


  0%|          | 0/494 [00:00<?, ?it/s]

/hdd/sd5/tlc/pretrain_model/darknet53/yolov3_test_loss=136.5372.ckpt-80does not exist!
=>starts training from scratch ...


train loss: 5.79: 100%|██████████| 494/494 [01:20<00:00,  6.10it/s] 
train loss: 1.82:   0%|          | 1/494 [00:00<01:11,  6.89it/s]

Epoch:  0 Train loss: 4.79
Test loss: 20.66 


train loss: 2.60: 100%|██████████| 494/494 [01:11<00:00,  6.87it/s] 
train loss: 5.94:   0%|          | 1/494 [00:00<01:17,  6.35it/s]

Epoch:  1 Train loss: 3.54
Test loss: 10.75 


train loss: 3.75: 100%|██████████| 494/494 [01:12<00:00,  6.84it/s] 
train loss: 1.48:   0%|          | 1/494 [00:00<01:22,  5.96it/s]

Epoch:  2 Train loss: 3.10
Test loss: 4.58 


train loss: 3.57: 100%|██████████| 494/494 [01:11<00:00,  6.86it/s] 
train loss: 2.61:   0%|          | 1/494 [00:00<01:20,  6.10it/s]

Epoch:  3 Train loss: 2.93
Test loss: 6.21 


train loss: 1.47: 100%|██████████| 494/494 [01:11<00:00,  6.90it/s] 
train loss: 5.79:   0%|          | 1/494 [00:00<01:23,  5.93it/s]

Epoch:  4 Train loss: 3.12
Test loss: 2.69 


train loss: 2.48: 100%|██████████| 494/494 [01:11<00:00,  6.88it/s] 


Epoch:  5 Train loss: 2.66
Test loss: 3.21 
Instructions for updating:
Use tf.compat.v1.graph_util.convert_variables_to_constants
Instructions for updating:
Use tf.compat.v1.graph_util.extract_sub_graph
INFO:tensorflow:Froze 89 variables.
INFO:tensorflow:Converted 89 variables to const ops.
Instructions for updating:
Use tf.gfile.GFile.


train loss: 0.79: 100%|██████████| 494/494 [01:18<00:00,  6.29it/s] 
train loss: 0.88:   0%|          | 1/494 [00:00<01:21,  6.04it/s]

Epoch:  6 Train loss: 2.58
Test loss: 11.11 


train loss: 2.37: 100%|██████████| 494/494 [01:11<00:00,  6.93it/s] 


Epoch:  7 Train loss: 2.34
Test loss: 2.82 
INFO:tensorflow:Froze 89 variables.
INFO:tensorflow:Converted 89 variables to const ops.


train loss: 3.86: 100%|██████████| 494/494 [01:10<00:00,  6.99it/s] 
train loss: 3.01:   0%|          | 1/494 [00:00<01:13,  6.72it/s]

Epoch:  8 Train loss: 2.38
Test loss: 2.70 


train loss: 1.95: 100%|██████████| 494/494 [01:09<00:00,  7.09it/s] 
train loss: 3.45:   0%|          | 1/494 [00:00<01:05,  7.55it/s]

Epoch:  9 Train loss: 2.23
Test loss: 3.09 


train loss: 2.14: 100%|██████████| 494/494 [01:08<00:00,  7.26it/s] 
train loss: 1.16:   0%|          | 1/494 [00:00<01:13,  6.73it/s]

Epoch: 10 Train loss: 2.12
Test loss: 2.89 


train loss: 0.88: 100%|██████████| 494/494 [01:07<00:00,  7.32it/s] 
train loss: 0.65:   0%|          | 1/494 [00:00<01:05,  7.56it/s]

Epoch: 11 Train loss: 2.12
Test loss: 2.94 


train loss: 1.52: 100%|██████████| 494/494 [01:07<00:00,  7.31it/s]
train loss: 2.42:   0%|          | 1/494 [00:00<01:16,  6.46it/s]

Epoch: 12 Train loss: 1.99
Test loss: 10.63 


train loss: 1.21: 100%|██████████| 494/494 [01:07<00:00,  7.29it/s] 


Epoch: 13 Train loss: 1.95
Test loss: 2.55 
INFO:tensorflow:Froze 89 variables.
INFO:tensorflow:Converted 89 variables to const ops.


train loss: 2.03: 100%|██████████| 494/494 [01:06<00:00,  7.48it/s] 
train loss: 2.75:   0%|          | 1/494 [00:00<00:59,  8.33it/s]

Epoch: 14 Train loss: 1.90
Test loss: 3.60 


train loss: 1.59: 100%|██████████| 494/494 [01:07<00:00,  7.29it/s] 
train loss: 1.98:   0%|          | 1/494 [00:00<01:03,  7.82it/s]

Epoch: 15 Train loss: 1.84
Test loss: 2.92 


train loss: 1.10: 100%|██████████| 494/494 [01:07<00:00,  7.31it/s] 
train loss: 1.00:   0%|          | 1/494 [00:00<01:13,  6.72it/s]

Epoch: 16 Train loss: 1.80
Test loss: 3.79 


train loss: 1.37: 100%|██████████| 494/494 [01:07<00:00,  7.31it/s]
train loss: 1.07:   0%|          | 1/494 [00:00<01:13,  6.68it/s]

Epoch: 17 Train loss: 1.77
Test loss: 5.74 


train loss: 2.09: 100%|██████████| 494/494 [01:07<00:00,  7.33it/s] 
train loss: 2.51:   0%|          | 1/494 [00:00<00:59,  8.27it/s]

Epoch: 18 Train loss: 1.72
Test loss: 9.32 


train loss: 1.26: 100%|██████████| 494/494 [01:07<00:00,  7.37it/s]


Epoch: 19 Train loss: 1.82
Test loss: 2.15 
INFO:tensorflow:Froze 89 variables.
INFO:tensorflow:Converted 89 variables to const ops.


train loss: 3.07: 100%|██████████| 494/494 [01:07<00:00,  7.28it/s]
train loss: 0.85:   0%|          | 1/494 [00:00<01:06,  7.36it/s]

Epoch: 20 Train loss: 1.70
Test loss: 4.08 


train loss: 1.28: 100%|██████████| 494/494 [01:07<00:00,  7.34it/s]


Epoch: 21 Train loss: 1.62
Test loss: 2.15 
INFO:tensorflow:Froze 89 variables.
INFO:tensorflow:Converted 89 variables to const ops.


train loss: 1.29: 100%|██████████| 494/494 [01:08<00:00,  7.23it/s]
train loss: 3.44:   0%|          | 1/494 [00:00<01:02,  7.87it/s]

Epoch: 22 Train loss: 1.59
Test loss: 2.22 


train loss: 3.03: 100%|██████████| 494/494 [01:06<00:00,  7.41it/s]
train loss: 1.47:   0%|          | 1/494 [00:00<01:01,  8.00it/s]

Epoch: 23 Train loss: 1.53
Test loss: 2.44 


train loss: 0.97: 100%|██████████| 494/494 [01:06<00:00,  7.40it/s]
train loss: 0.69:   0%|          | 1/494 [00:00<01:00,  8.09it/s]

Epoch: 24 Train loss: 1.51
Test loss: 2.40 


train loss: 0.54: 100%|██████████| 494/494 [01:07<00:00,  7.35it/s]
train loss: 0.99:   0%|          | 1/494 [00:00<01:25,  5.73it/s]

Epoch: 25 Train loss: 1.52
Test loss: 4.99 


train loss: 0.81: 100%|██████████| 494/494 [01:07<00:00,  7.34it/s]
train loss: 2.08:   0%|          | 1/494 [00:00<01:00,  8.20it/s]

Epoch: 26 Train loss: 1.47
Test loss: 2.63 


train loss: 2.26: 100%|██████████| 494/494 [01:06<00:00,  7.40it/s]
train loss: 2.04:   0%|          | 1/494 [00:00<01:15,  6.54it/s]

Epoch: 27 Train loss: 1.57
Test loss: 6.62 


train loss: 1.67:  26%|██▌       | 128/494 [00:17<00:51,  7.05it/s]

KeyboardInterrupt: 