In [1]:
import tensorflow as tf
import numpy as np
import os
from tqdm import tqdm
os.environ['CUDA_VISIBLE_DEVICES'] = '7'
from config_cls import cfg
from dataset_triplet import Dataset
from factory_cls import backbone
from random import randint
from tensorflow.python.framework import graph_util
from classi_nets import resnet_18_triplet
import pandas as pd

class Classitrain(object):
    def __init__(self):
        self.Batch_Size = cfg.TRAIN.BATCHSIZE
        self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT
        self.num_classes = cfg.TRAIN.NUMCLASS
        self.trainset = Dataset(self.num_classes,'train')
        self.testset = Dataset(self.num_classes,'test')
        self.network = cfg.TRAIN.NETWORK
        self.train_txt = cfg.TRAIN.ANNO_PATH
        self.sess = tf.Session()
        self.model_type = cfg.TRAIN.NETWORK
        self.input_size = cfg.TRAIN.INPUTSIZE
        self.interval = cfg.TRAIN.SAVE
        self.initial_weights = cfg.TRAIN.INITIAL_WEIGHT
        self.pretrain_mode = cfg.TRAIN.PRETRAIN_MODE
        self.epoch = cfg.TRAIN.EPOCH
        self.pretrain_model = cfg.TRAIN.BACKBONE_PRETRAIN
        self.moving_ave_decay = cfg.TRAIN.MOMENTUM
        self.steps_per_period  = len(self.trainset)
        self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END
        self.quantity = len(open(self.train_txt, 'r').readlines())
        self.keep_prob = tf.placeholder(dtype=tf.float32, name='dropout')
        self.moving_ave_decay = 0.995
        self.warmup_periods = 1
        
       
        self.input_anchor = tf.placeholder(shape = [None, self.input_size, self.input_size, 3], dtype=tf.float32, name='input_anchor')
        self.input_pos = tf.placeholder(shape = [None, self.input_size, self.input_size, 3], dtype=tf.float32, name='input_pos')
        self.input_neg = tf.placeholder(shape = [None, self.input_size, self.input_size, 3], dtype=tf.float32, name='input_neg')
        self.input_labels = tf.placeholder(dtype=tf.float32, name='label')
        self.trainable = tf.placeholder(dtype=tf.bool, name='trainable')

        
        self.model = resnet_18_triplet.Resnet18(input_anchor=self.input_anchor, input_pos=self.input_pos, input_neg=self.input_neg, trainable=self.trainable,
                                       classes=self.num_classes, keep_prob=self.keep_prob)
        self.loss = self.model.compute_loss()
        self.net_var = tf.global_variables()
        self.varaibles_to_restore = [var for var in self.net_var if 'backbone' in var.name]
        
            
        #moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables())
        with tf.name_scope('learn_rate'):
            self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step')
            warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period,
                                        dtype=tf.float64, name='warmup_steps')
            train_steps = tf.constant( self.epoch * self.steps_per_period,
                                        dtype=tf.float64, name='train_steps')
            self.learn_rate = tf.cond(
                pred=self.global_step < warmup_steps,
                true_fn=lambda: self.global_step / warmup_steps * self.learn_rate_init,
                false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) *
                                    (1 + tf.cos(
                                        (self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))
            )
            global_step_update = tf.assign_add(self.global_step, 1.0)
            
        #moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables())
        self.optimizer =  tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss, var_list=self.net_var)
        #self.optimizer = tf.train.Momen
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            with tf.control_dependencies([self.optimizer, global_step_update]):
                #with tf.control_dependencies([moving_ave]):
                self.train_op = tf.no_op()
        
            #self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learn_rate_init, momentum=0.8).minimize(self.loss)
        
        self.loader_backbone = tf.train.Saver(self.varaibles_to_restore)
        self.loader_whole = tf.train.Saver(tf.global_variables())
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
        
    def compute_keep_prob(self, now_step=0,start_value=1.0, stop_value=0.75, nr_steps=100000, trainable=False):
        prob_values = np.linspace(start=start_value, stop=stop_value, num=nr_steps)
        keep_prob = prob_values[now_step]
        return keep_prob

    def train(self):
        #self.sess.run(tf.global_variables_initializer())
        if self.pretrain_mode == 'whole':
            try:
                print ('=>Restore weights from ' + self.initial_weights)
                self.loader_whole.restore(self.sess, self.initial_weights)
            except:
                print (self.initial_weights + 'does not exist!')
                print ('=>starts training from scratch ...')
        else:
            try:
                print ('=>Restore weights from ' + self.pretrain_model)
                self.loader_backbone.restore(self.sess, self.pretrain_model)
            except:
                print (self.pretrain_model + 'does not exist!')
                print ('=>starts training from scratch ...')
        
        min_loss_val = 0.8
        min_loss_train = 0.8
        i = 0
        for epoch in range(self.epoch):
            pabr = tqdm(self.trainset)
            train_epoch_loss, test_epoch_loss = [], []
#             train_epoch_acc, test_epoch_acc = [], []
            for train_data in pabr:
                keep_prob = self.compute_keep_prob(now_step=epoch, nr_steps = self.epoch, trainable=True)
                _, train_step_loss, _ = self.sess.run([self.train_op, self.loss, self.global_step], 
                                                                      feed_dict={self.input_anchor: train_data[0],
                                                                                 self.input_pos: train_data[1],
                                                                                 self.input_neg: train_data[2],
                                                                                 self.trainable: True,
                                                                                 self.keep_prob: 1.0})
                
                train_epoch_loss.append(train_step_loss)
                pabr.set_description("train loss: %.2f" %train_step_loss)
            
            for test_data in self.testset:
                test_step_loss = self.sess.run([self.loss],
                                                feed_dict={self.input_anchor: test_data[0],
                                                           self.input_pos: test_data[1],
                                                           self.input_neg: test_data[2],
                                                           self.trainable: False, 
                                                           self.keep_prob:1.0})

                test_epoch_loss.append(test_step_loss)

            train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss)
            print ('Epoch: %2d Train loss: %.2f'
                   %(epoch, train_epoch_loss))

            print ('Test loss: %.2f'
                   % (test_epoch_loss))
            
            
            if epoch >= 5 and test_epoch_loss < min_loss_val and train_epoch_loss < min_loss_train:
                min_loss_val = test_epoch_loss
                min_loss_train = train_epoch_loss
                constant_graph = graph_util.convert_variables_to_constants(self.sess, self.sess.graph_def, ['fc_layer/op_to_store'])
                model_name = self.model_type+'_epoch=%d' %epoch
#                 ckpt_file = '/hdd/sd5/tlc/TCT/Model_ckpt/'+ model_name + '_test_loss=%.4f.ckpt' %test_epoch_loss
#                 self.saver.save(self.sess, ckpt_file, global_step=epoch)
                with tf.gfile.FastGFile('/hdd/sd5/tlc/TCT/Model_pb/comb/'+model_name+'.pb', mode='wb') as f:
                    f.write(constant_graph.SerializeToString())


                

    def main(self):
        self.sess.run(tf.global_variables_initializer())
        self.train()
        
if __name__ == '__main__':
    Classitrain().main() 





Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use tf.cast instead.
=>Restore weights from /ssd2/tlc/pretrain_model/res18/res18_epoch=16_test_loss=0.3583.ckpt-16
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /ssd2/tlc/pretrain_model/res18/res18_epoch=16_test_loss=0.3583.ckpt-16


  0%|          | 0/1688 [00:00<?, ?it/s]

Instructions for updating:
Use tf.compat.v1.graph_util.convert_variables_to_constants
Instructions for updating:
Use tf.compat.v1.graph_util.extract_sub_graph
INFO:tensorflow:Froze 90 variables.
INFO:tensorflow:Converted 90 variables to const ops.
Instructions for updating:
Use tf.gfile.GFile.


train loss: 0.19:   0%|          | 1/1688 [00:19<9:06:45, 19.45s/it]ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/opt/miniconda3/envs/py2/lib/python2.7/site-packages/IPython/core/ultratb.py", line 1132, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/opt/miniconda3/envs/py2/lib/python2.7/site-packages/IPython/core/ultratb.py", line 313, in wrapped
    return f(*args, **kwargs)
  File "/opt/miniconda3/envs/py2/lib/python2.7/site-packages/IPython/core/ultratb.py", line 358, in _fixed_getinnerframes
    records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
  File "/opt/miniconda3/envs/py2/lib/python2.7/inspect.py", line 1058, in getinnerframes
    framelist.append((tb.tb_frame,) + getframeinfo(tb, context))
  File "/opt/miniconda3/envs/py2/lib/python2.7/inspect.py", line 1018, in getframeinfo
    filename = getsourcefile(frame) or getfile(frame)
  File "/opt/miniconda3/envs/py2/lib/python2.7/inspect.py", line 453, in getsourcefile
    if hasattr(getmodule(object, filename), '__loader

IndexError: string index out of range