In [1]:
import tensorflow as tf
import os
import time
import numpy as np
from tensorflow.python.keras import layers
print(tf.__version__)

1.8.0


In [2]:
# Convolution Block

def _conv(x,kernel,name,log=False):
    with tf.variable_scope(name):
        W = tf.get_variable(initializer=tf.truncated_normal(shape=kernel,stddev=0.01),name='W')
        b = tf.get_variable(initializer=tf.constant(0.0,shape=[kernel[3]]),name='b')
        conv = tf.nn.conv2d(x, W, strides=[1,1,1,1],padding='SAME')
        activation = tf.nn.relu(tf.add(conv,b))
        pool = tf.nn.max_pool(activation,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        if log==True:
            tf.summary.histogram("weights",W)
            tf.summary.histogram("biases",b)
            tf.summary.histogram("activations",activation)
        return pool

# Dense Block

def _dense(x,size_in,size_out,name,relu=False,log=False):
    with tf.variable_scope(name):
        flat = tf.reshape(x,[-1,size_in])
        W = tf.get_variable(initializer=tf.truncated_normal([size_in,size_out],stddev=0.1),name='W')
        b = tf.get_variable(initializer=tf.constant(0.0,shape=[size_out]),name='b')
        activation = tf.add(tf.matmul(flat,W),b)
        if relu==True:
            activation = tf.nn.relu(activation)
        if log==True:
            tf.summary.histogram("weights",W)
            tf.summary.histogram("biases",b)
            tf.summary.histogram("activations",activation)
        return activation

In [3]:
def tinyimg_fn(features, labels, mode, params):
    
    #### 1 INFERNCE MODEL
    
    input_layer = tf.reshape(features, [-1, 64, 64, 3])
    conv1 = _conv(input_layer,kernel=[7,7,3,128],name='conv1',log=params['log'])
    conv2 = _conv(conv1,kernel=[5,5,128,128],name='conv2',log=params['log'])
    conv3 = _conv(conv2,kernel=[5,5,128,256],name='conv3',log=params['log'])
    conv4 = _conv(conv3,kernel=[3,3,256,512],name='conv4',log=params['log'])
    dense = _dense(conv4,size_in=4*4*512,size_out=params['dense_units'],
                   name='Dense',relu=True,log=params['log'])
    if mode==tf.estimator.ModeKeys.TRAIN:
        dense = tf.nn.dropout(dense,params['drop_out'])
    logits = _dense(dense,size_in=params['dense_units'],
                    size_out=200,name='Output',relu=False,log=params['log'])
    
    #### 2 CALCULATIONS AND METRICS
    
    predictions = {"classes": tf.argmax(input=logits,axis=1),
                   "logits": logits,
                   "probabilities": tf.nn.softmax(logits,name='softmax')}
    export_outputs = {'predictions': tf.estimator.export.PredictOutput(predictions)}
    if (mode==tf.estimator.ModeKeys.TRAIN or mode==tf.estimator.ModeKeys.EVAL):
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,logits=logits)
        #accuracy = tf.metrics.accuracy(
        #    labels=labels, predictions=tf.argmax(logits,axis=1))
        #metrics = {'accuracy':accuracy}
        
    #### 3 MODE = PREDICT
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode, predictions=predictions, export_outputs=export_outputs)

    #### 4 MODE = TRAIN

    if mode == tf.estimator.ModeKeys.TRAIN:
        learning_rate = tf.train.exponential_decay(
            params['learning_rate'],tf.train.get_global_step(),
            decay_steps=100000,decay_rate=0.96)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        if params['replicate']==True:
            optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
        train_op = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step())
        tf.summary.scalar('learning_rate', learning_rate)
        #tf.summary.scalar('accuracy',accuracy[1])
        return tf.estimator.EstimatorSpec(
            mode=mode, loss=loss, train_op=train_op)
    
    #### 5 MODE = EVAL
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            #mode=mode,loss=loss,eval_metric_ops=metrics)
            mode=mode,loss=loss)

In [4]:
def parse_tfrecord(example):
    feature={'idx'     : tf.FixedLenFeature((), tf.int64),
             'label'   : tf.FixedLenFeature((), tf.int64),
             'image'   : tf.FixedLenFeature((), tf.string, default_value='')}
    parsed = tf.parse_single_example(example, feature)
    image = tf.decode_raw(parsed['image'],tf.float32)
    image = tf.reshape(image,[64,64,3])
    return image, parsed['label']

In [5]:
def image_scaling(x):
    return tf.image.per_image_standardization(x)

In [6]:
def distort(x):
    x = tf.image.resize_image_with_crop_or_pad(x, 80, 80)
    x = tf.random_crop(x, [64, 64, 3])
    x = tf.image.random_flip_left_right(x)
    return x

In [7]:
def dataset_input_fn(params):
    dataset = tf.data.TFRecordDataset(
        params['filenames'],num_parallel_reads=params['threads'])
    dataset = dataset.map(parse_tfrecord, num_parallel_calls=params['threads'])
    dataset = dataset.map(
    lambda x,y: (image_scaling(x),y),num_parallel_calls=params['threads'])
    if params['mode']==tf.estimator.ModeKeys.TRAIN:
        dataset = dataset.map(lambda x,y: (distort(x),y),num_parallel_calls=params['threads'])
        dataset = dataset.shuffle(buffer_size=params['shuffle_buff'])
    dataset = dataset.repeat()
    dataset = dataset.batch(params['batch'])
    dataset = dataset.prefetch(2*params['batch'])

    return dataset


In [9]:
model_params  = {'drop_out'      : 0.3,
                 'dense_units'   : 1024,
                 'learning_rate' : 1e-3,
                 'log'           : True,
                 'replicate'     : False
                }
'''
distribution = tf.contrib.distribute.MirroredStrategy(num_gpus=8)
config = tf.estimator.RunConfig(save_checkpoints_secs = 30,
                                keep_checkpoint_max = 5,
                                session_config=tf.ConfigProto(
                                    allow_soft_placement=True, log_device_placement=True),
                                train_distribute = distribution)
'''
config = tf.estimator.RunConfig(save_checkpoints_secs = 30,keep_checkpoint_max = 5)


In [10]:
!pwd

/home/tsaikevin/zzz/tinyimagenet/tiny-imagenet-200


In [11]:
if model_params['replicate']==True:
    model_fn = tf.contrib.estimator.replicate_model_fn(
        tinyimg_fn, loss_reduction=tf.losses.Reduction.MEAN)
else:
    model_fn = tinyimg_fn

name = 'cnn_model/cnn_model_'
if model_params['replicate']==True:
    name = 'cnn_model_dist/cnn_model_'
name = name + 'dense(' + str(model_params['dense_units']) + ')_'
name = name + 'drop(' + str(model_params['drop_out']) + ')_'
name = name + 'lr(' + str(model_params['learning_rate']) + ')_'
name = name + time.strftime("%Y%m%d%H%M%S")
cnn_dir  = os.path.join('./',name)

print(cnn_dir)

cnn_classifier = tf.estimator.Estimator(
    model_fn=model_fn,model_dir=cnn_dir,params=model_params,config=config)



./cnn_model/cnn_model_dense(1024)_drop(0.3)_lr(0.001)_20180527021730
INFO:tensorflow:Using config: {'_is_chief': True, '_keep_checkpoint_every_n_hours': 10000, '_task_id': 0, '_global_id_in_cluster': 0, '_session_config': None, '_tf_random_seed': None, '_num_worker_replicas': 1, '_save_checkpoints_secs': 30, '_num_ps_replicas': 0, '_task_type': 'worker', '_service': None, '_model_dir': './cnn_model/cnn_model_dense(1024)_drop(0.3)_lr(0.001)_20180527021730', '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4b1cd670b8>, '_log_step_count_steps': 100, '_master': '', '_train_distribute': None, '_evaluation_master': '', '_save_checkpoints_steps': None}


In [12]:
tinyimg_estimator = tf.estimator.Estimator(
    model_fn=model_fn,model_dir=cnn_dir,params=model_params,config=config)

INFO:tensorflow:Using config: {'_is_chief': True, '_keep_checkpoint_every_n_hours': 10000, '_task_id': 0, '_global_id_in_cluster': 0, '_session_config': None, '_tf_random_seed': None, '_num_worker_replicas': 1, '_save_checkpoints_secs': 30, '_num_ps_replicas': 0, '_task_type': 'worker', '_service': None, '_model_dir': './cnn_model/cnn_model_dense(1024)_drop(0.3)_lr(0.001)_20180527021730', '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4b273c11d0>, '_log_step_count_steps': 100, '_master': '', '_train_distribute': None, '_evaluation_master': '', '_save_checkpoints_steps': None}


In [13]:
#train_files = !gsutil ls gs://tsaikevin-data/tiny_imagenet/tiny_imagenet_00*.tfrecords
#val_files   = !gsutil ls gs://tsaikevin-data/tiny_imagenet/tiny_imagenet_01*.tfrecords
train_files = !ls tiny_imagenet_2_00*.tfrecords
val_files   = !ls tiny_imagenet_2_01*.tfrecords

train_params = {'filenames'    : train_files,
                'mode'         : tf.estimator.ModeKeys.TRAIN,
                'threads'      : 16,
                'shuffle_buff' : 100000,
                'batch'        : 100
               }

eval_params  = {'filenames'    : val_files,
                'mode'         : tf.estimator.ModeKeys.EVAL,
                'threads'      : 8,
                'batch'        : 200
               }

train_spec = tf.estimator.TrainSpec(input_fn=lambda: dataset_input_fn(train_params),max_steps=2000)
eval_spec  = tf.estimator.EvalSpec(input_fn=lambda: dataset_input_fn(eval_params),steps=10,throttle_secs=600)



In [14]:
!gsutil rm -rf $model_dir
tf.estimator.train_and_evaluate(tinyimg_estimator, train_spec, eval_spec)

CommandException: The rm command (without -I) expects at least one URL.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into ./cnn_model/cnn_model_dense(1024)_drop(0.3)_lr(0.001)_20180527021730/model.ckpt.
INFO:tensorflow:step = 0, loss = 5.356467
INFO:tensorflow:global_step/sec: 6.07185
INFO:tensorflow:step = 100, loss = 5.310028 (16.471 sec)
INFO:tensorflow:Saving checkpoints for 193 into ./cnn_model/cnn_model_dense(1024)_drop(0.3)_lr(0.001)_20180527021730/model.ckpt.
INFO:tensorflow:global_step/sec: 6.10852
INFO:tensorflow:step = 200, loss = 5.3041

In [16]:
!ls tiny_imagenet_00*.tfrecords

ls: cannot access 'tiny_imagenet_00*.tfrecords': No such file or directory


In [17]:
!ls

cnn_model			      tiny_imagenet_2_007.tfrecords
cnn_model_dist			      tiny_imagenet_2_008.tfrecords
create_tfrecords.ipynb		      tiny_imagenet_2_009.tfrecords
model.py			      tiny_imagenet_2_010.tfrecords
__pycache__			      tiny_imagenet_2_011.tfrecords
resnet_v2			      tinyimg2.ipynb
resnet_v2_imagenet_savedmodel.tar.gz  tinyimg-distributed.ipynb
test				      tinyimg.ipynb
tiny_imagenet_2_000.tfrecords	      tinyimg-tf18-distributed.ipynb
tiny_imagenet_2_001.tfrecords	      tinyimg-tf18.ipynb
tiny_imagenet_2_002.tfrecords	      train
tiny_imagenet_2_003.tfrecords	      val
tiny_imagenet_2_004.tfrecords	      wnids.txt
tiny_imagenet_2_005.tfrecords	      words.txt
tiny_imagenet_2_006.tfrecords
