In [1]:

import six
import sys
import tensorflow as tf
from utils import *
from deeplab import common
from deeplab import model
from deeplab.datasets import segmentation_dataset
from deeplab.utils import input_generator
from deeplab.utils import train_utils
from deployment import model_deploy

num_clones= 1 

clone_on_cpu= False 

num_replicas= 1 

startup_delay_steps= 15 

num_ps_tasks= 0 

#master=

task= 0 

train_logdir= None 

log_steps= 10 

save_interval_secs= 1200 

save_summaries_secs= 600 

save_summaries_images= False 

learning_policy= 'poly'

base_learning_rate= .0001 

learning_rate_decay_factor= 0.1 

learning_rate_decay_step= 2000 

learning_power= 0.9 

training_number_of_steps= 30000 

momentum= 0.9

train_batch_size= 8 

weight_decay= 0.00004 

train_crop_size= 523 #[513  513] 

last_layer_gradient_multiplier= 1.0 

upsample_logits= True 

tf_initial_checkpoint= None 

initialize_last_layer= True 

last_layers_contain_logits_only= False 

slow_start_step= 0 

slow_start_learning_rate= 1e-4 

fine_tune_batch_norm= True 

min_scale_factor= 0.5 

max_scale_factor= 2. 

scale_factor_step_size= 0.25 

atrous_rates= None 

output_stride= 16 

master=''

dataset= 'pascal_voc_seg'

train_split= 'train'

dataset_dir= "deeplab/datasets/pascal_voc_seg/tfrecord/cal_train_aug/model.ckpt"

train_logdir="deeplab/trainlog"

# --train_logdir=deeplab/      --dataset_dir=deeplab/datasets/pascal_voc_seg/tfrecord/cal_train_aug/model.ckpt     --train_logdir=deeplab/tra  




### 输入参数
model_variant="xception_65"
train_crop_size=513
clone_batch_size=train_batch_size//num_clones
min_resize_value=None

slim = tf.contrib.slim
prefetch_queue = slim.prefetch_queue

"""
具体的模型建立依赖:
common.ModelOptions
model.multi_scale_logits
train_utils.add_softmax_cross_entropy_loss_for_each_scale
"""

def _build_deeplab(inputs_queue,outputs_to_num_classes,ignore_labels):
    """构建deeplab网络
    inputs_queue:
            输入sample
    outputs_num_classes:
            当前是几分类的网络
    
    return:
            返回deeplab网络
    """
    samples=inputs_queue.dequeue() # 从队列中取出样本
    # 添加一些助记名字
    samples[common.IMAGE]=tf.identity(samples[common.IMAGE],name=common.IMAGE)
    samples[common.LABEL]=tf.identity(samples[common.LABEL],name=common.LABEL)
    
    # setup
    model_options=common.ModelOptions(
        outputs_to_num_classes=outputs_to_num_classes,
        crop_size=train_crop_size,
        atrous_rates=atrous_rates,
        output_stride=output_stride)
    
    # 几率表达式(其实是softmax的输出,可认为是概率)
    outputs_to_scales_to_logits=model.multi_scale_logits(
        samples[common.IMAGE],
        model_options=model_options,
        image_pyramid=image_pyramid,# image_pyramid=NULL
        weight_decay=weight_decay,# 4e-05
        is_training=True,
        fine_tune_batch_norm=fine_tune_batch_norm)
    # 添加一些助记名字
    output_type_dict=outputs_to_scales_to_logits[common.OUTPUT_TYPE]
    output_type_dict[model.MERGED_LOGITS_SCOPE]=tf.identity(
        output_type_dict[model.MERGED_LOGITS_SCOPE],
        name=common.OUTPUT_TYPE)
    
    for output,num_classes in six.iteritems(outputs_to_num_classes):
        # softmax
        train_utils.add_softmax_cross_entropy_loss_for_each_scale(
            outputs_to_scales_to_logits[output],
            samples[common.LABEL],
            ignore_labels,
            loss_weight=1.0,
            upsampling_logits=upsample_logits, #Upsample logits during training
            scope=output)
        
    return outputs_to_scales_to_logits
        

def train():

    config=model_deploy.DeploymentConfig(num_clones=num_clones,
                                          clone_on_cpu=clone_on_cpu, # bool,是否使用cpu
                                          replica_id=task, # task id
                                          num_replicas=num_replicas,
                                          num_ps_tasks=num_ps_tasks)
    assert train_batch_size % num_clones ==0,("train batch size SHOULD be divisble by num of cores")
    
    # 取出train的数据集
    dataSet=segmentation_dataset.get_dataset(dataset,train_split,dataset_dir)
    
    tf.logging.info("this is %s SET",train_split)
    
    with tf.Graph().as_default() as graph:
        # step 1 把data从磁盘中取出,需要什么,这部分属于input_device.可以使用cpu.因此提供一个tf.devices办法
        with tf.device(config.input_device()):
            sample=input_generator.get(dataset,
                                       train_crop_size,
                                       clone_batch_size,
                                       min_resize_value=min_resize_value,
                                       max_resize_value=max_resize_value,
                                       resize_factor=resize_factor,
                                       min_scale_factor=min_scale_factor,
                                       max_scale_factor=max_scale_factor,
                                       scale_factor_step_size=scale_factor_step_size,
                                       dataset_split=train_split,
                                       is_training=True,
                                       model_variant=model_variant)        
            prefetch_queue.prefetch_queue(sample,capacity=128*config.num_clones)
            
        # step 2 对于变量,需要使用gpu.
        with tf.device(config.variables_device()):
            global_step=tf.train.get_or_create_global_step()
            # 定义网络
            model_fn=_build_deeplab
            model_args=(input_queue,{
                common.OUTPUT_TYPE:dataSet.num_classes
            },
                        dataSet.ignore_label)
            # 把deeplab网络图构建起来,构建多个克隆体.
            clones=model_deploy.creat_clones(config,model_fn,args=model_args)
            
            first_clone_scope=config.clone_scope(0) # 是个字符串"clone_0"
            update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS,first_clone_scope) # 找打标记有"clone_0"的并且是UPDATE_OPS描述的均值和方差
            
            
        # step 3 在图上构建优化算法
        with tf.device(config.optimizer_device()):
            learning_rate=train_utils.get_model_learning_rate(learning_policy,
                                                              base_learning_rate,
                                                              learning_rate_decay_step,
                                                              learning_rate_decay_factor,
                                                              training_number_of_steps,
                                                              learning_power,
                                                              slow_start_step,
                                                              slow_start_learning_rate
                                                             )
            optimizer=tf.train.MomentumOptimizer(learning_rate,momentum)
        
        # step 4 利用optimizer 计算给定clones的loss
        with tf.device(config.variables_device()):
            total_loss,gradient_and_var=model_deploy.optimize_clones(clones,optimizer)
            total_loss=tf.check_numerics(total_loss,'Loss is inf or nan.') # sanity check
            
            # 更新最后一层的梯度,对于我们可能会对最后一层做fine-tune. 多分类的fine-tune
            # 1. 先取出最后一层
            last_layers=model.get_extra_layer_scopes(last_layers_contain_logits_only)
            grand_mult=train_utils.get_model_gradient_multipliers(last_layers,last_layer_gradient_multiplier)
            if grand_mult:
                gradient_and_var=slim.learing.multiply_gradients(gradient_and_var,grand_mult)
            grand_updates=optimizer.apply_gradients(gradient_and_var,grand_mult)
            update_ops.append(grand_updates)
            update_ops=tf.group(*update_ops)
            with tf.control_dependencies([update_ops]):
                train_tensor=tf.identity(total_loss,name='train_op') # train_tensor就是train_op的代名词
            
        # 允许用cpu
        session_config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)
        # step 5 开始train
        slim.learing.train(train_tensor,
                           logdir=train_logdir,
                           log_every_steps=log_steps,
                           master=master,
                           number_of_steps=training_number_of_steps,
                           is_chief=(task==0),
                           session_config=session_config,
                           init_fn=train_utils.get_model_init_fn(
                               train_logdir,
                               tf_initial_checkpoint,
                               initialize_last_layer,
                               last_layers,
                               ignore_missing_vars=True,
                           ),
                           
        )

train()

"""
if __name__=='__main__':
    if len(sys.argv) < 2:
        print ("NO action specified.")
        sys.exit()

    if sys.argv[1].startswith('--'):
        option = sys.argv[1][2:]
        if option == 'version':
            print ("version 1.2 ")
        elif option == 'help':
            print ("This program prints files to the standard output.\
                 Any number of files can be specified.\
                 Options include:\
                 --version : Prints the version number\
                 --train: traing segnet\
                 --test: test segnet\
                 --help     : Display this help")
            
        elif option == 'train':
            print("start training")
            training(trainfilepath="/home/julyedu_433249/work/tf_base/segNet/SegNet/CamVid/train.txt",
             valfilepath="/home/julyedu_433249/work/tf_base/segNet/SegNet/CamVid/val.txt",
             batch_size=5,
             image_width=480,
             image_height=360,
             image_ch=3,
             max_steps=20000)
        elif option == 'test':
            print("start testing")
            test(testfilename="/home/julyedu_433249/work/tf_base/segNet/SegNet/CamVid/test.txt",
             batch_size=5,
             image_width=480,
             image_height=360,
             image_ch=3)

        else:
            print("Unknow option.")
"""

  from ._conv import register_converters as _register_converters


ImportError: cannot import name 'common'

In [25]:
!export PYTHONPATH=$PYTHONPATH:$PWD:$PWD/slim

In [26]:
!echo $PYTHONPATH




In [27]:
!echo $PWD

/home/deeplearning/work/tf_base/deepLab


In [28]:
!echo $PWD/slim

/home/deeplearning/work/tf_base/deepLab/slim


#### demo

In [6]:
outputs_to_num_classes=11
outputs_to_scales_to_logits = {
      k: {}
      for k in range(outputs_to_num_classes)
  }
print(outputs_to_scales_to_logits)

{0: {}, 1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 9: {}, 10: {}}
