In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
import _pickle
import csv
import subprocess

In [2]:
# reading cifar dataset

for i in range(5):
    with open('cifar-10-batches-py/data_batch_'+str(i+1), 'rb') as f1:
        dict = _pickle.load( f1 , encoding='latin1')
        if i == 0:
            data = dict['data']
            labels = dict['labels']
        else :
            data = np.concatenate((data , dict['data']))
            labels.extend(dict['labels'])

with open('cifar-10-batches-py/test_batch', 'rb') as f2:
    dict = _pickle.load( f2 , encoding='latin1')
    val_data = dict['data'][:5000]
    val_labels = dict['labels'][:5000]
    test_data = dict['data'][5000:]
    test_labels = dict['labels'][5000:]


del dict

In [3]:
# feature scaling and one hot encoding
def fit(arr):
    p = arr.reshape((-1, 3, 32 ,32))
    p = p.swapaxes(1,3)
    p = p.swapaxes(1,2)
    return p.astype(np.float32) / 127.5 -1

def onehot(list):
    return ((np.array(list)[:,None] == np.arange(10)).astype(np.float32))

data = fit(data)
val_data = fit(val_data)
test_data = fit(test_data)

labels = onehot(labels)
val_labels = onehot(val_labels)
test_labels = onehot(test_labels)

In [4]:
# reading model size value

model=[]
with open('model.csv', newline='') as csvfile:
    csvfile = csv.reader(csvfile, delimiter=',', quotechar='"')
    for row in csvfile :
        del row
        break
    for row in csvfile :
        del row[2]
        model.append(list(map(int, row)))


del csvfile
for i in range(len(model)):print(model[i])

[32, 16, 6, 8, 12, 3, 3, 6, 6]
[32, 30, 6, 6, 10, 5, 5, 8, 6]
[32, 30, 0, 11, 18, 8, 8, 12, 30]
[16, 60, 15, 10, 21, 3, 3, 8, 8]
[16, 52, 16, 12, 24, 5, 5, 10, 10]
[16, 60, 0, 24, 36, 15, 15, 24, 60]
[8, 120, 40, 20, 40, 8, 8, 20, 20]
[8, 120, 0, 36, 80, 24, 24, 40, 120]


In [5]:
'''
index for model
0 'input len',
1 'input depth',
delete 'name',
2 '1x1',
3 '3x3 reduce',
4 '3x3',
5 '5x5 reduce',
6 'part1',
7 'part2',
8 'pool'   
'''
# fn to parameters for each inception/ inception-pool module
def createWB(shape,name):
    return [tf.get_variable(name+"W", shape=shape, initializer=tf.contrib.layers.xavier_initializer()),
                tf.Variable(tf.constant(0.2, shape=[shape[3]]), name=name+"B")]

def makepara(array, cate, no):
    parameter = []
    
    with tf.name_scope(cate+str(no)):
        # weights for 1x1
        if array[2] != 0 :
            parameter.append(createWB([1, 1, array[1], array[2]], cate+str(no)+'1x1'))

        # weights for 3x3 reduce
        parameter.append(createWB([1, 1, array[1], array[3]], cate+str(no)+'3x3red'))
        # weights for 3x3 
        parameter.append(createWB([3, 3, array[3], array[4]], cate+str(no)+'3x3'))
        # weights for 5x5 reduce
        parameter.append(createWB([1, 1, array[1], array[5]], cate+str(no)+'5x5red'))
        # weights for part1
        parameter.append(createWB([3, 3, array[5], array[6]], cate+str(no)+'5x5pt1'))
        # weights for part2
        parameter.append(createWB([3, 3, array[6], array[7]], cate+str(no)+'5x5pt2'))

        # weights for pool
        if array[2] != 0 :
            parameter.append(createWB([1, 1, array[1], array[8]], cate+str(no)+'pool'))
    return parameter

In [6]:
# creaing all required weights
n_classes = 10
img_side = 32

# weights for 5x5 conv 
with tf.name_scope('initial_conv'):
    convW = tf.Variable(tf.random_normal([5, 5, 3, model[0][1]]))
    convB = tf.Variable(tf.random_normal([model[0][1]]))
    
# fully connected linear
with tf.name_scope('linear_fully_connected'):
    lenW = tf.Variable(tf.random_normal([model[-1][2] + model[-1][4] + model[-1][7] + model[-1][8], n_classes]))
    lenB = tf.Variable(tf.random_normal([n_classes]))

# generating weights according to model
para = []
noins, nopool = 0, 0
for i in range(len(model)):
    
    if model[i][2] == 0 :
        cate = 'pool'
        nopool += 1
        notopass = nopool
    else :
        cate = 'ins'
        noins += 1
        notopass = noins
    
    para.append(makepara(model[i], cate, notopass))
    
for i in para:
    for j in i:
        for k in j:
            print(k)

Tensor("ins11x1W/read:0", shape=(1, 1, 16, 6), dtype=float32)
Tensor("ins1/ins11x1B/read:0", shape=(6,), dtype=float32)
Tensor("ins13x3redW/read:0", shape=(1, 1, 16, 8), dtype=float32)
Tensor("ins1/ins13x3redB/read:0", shape=(8,), dtype=float32)
Tensor("ins13x3W/read:0", shape=(3, 3, 8, 12), dtype=float32)
Tensor("ins1/ins13x3B/read:0", shape=(12,), dtype=float32)
Tensor("ins15x5redW/read:0", shape=(1, 1, 16, 3), dtype=float32)
Tensor("ins1/ins15x5redB/read:0", shape=(3,), dtype=float32)
Tensor("ins15x5pt1W/read:0", shape=(3, 3, 3, 3), dtype=float32)
Tensor("ins1/ins15x5pt1B/read:0", shape=(3,), dtype=float32)
Tensor("ins15x5pt2W/read:0", shape=(3, 3, 3, 6), dtype=float32)
Tensor("ins1/ins15x5pt2B/read:0", shape=(6,), dtype=float32)
Tensor("ins1poolW/read:0", shape=(1, 1, 16, 6), dtype=float32)
Tensor("ins1/ins1poolB/read:0", shape=(6,), dtype=float32)
Tensor("ins21x1W/read:0", shape=(1, 1, 30, 6), dtype=float32)
Tensor("ins2/ins21x1B/read:0", shape=(6,), dtype=float32)
Tensor("ins23x3

In [7]:
# wrapper for inception module and pool
'''
index for para
norm;pool 
0   '1x1',
1 0 '3x3 reduce',
2 1 '3x3',
3 2 '5x5 reduce',
4 3 'part1',      strides, strides
5 4 'part2',
6   'pool'   
'''
def insWrapper(x, parameter):
    strides=1
    #1x1 
    con1 = tf.nn.conv2d(x, parameter[0][0], strides=[1, 1, 1, 1], padding='SAME')
    con1 = tf.nn.relu(con1 + parameter[0][1])
    
    #3x3red
    con3 = tf.nn.conv2d(x, parameter[1][0], strides=[1, 1, 1, 1], padding='SAME')
    con3 = tf.nn.relu(con3 + parameter[1][1])
    
    #3x3
    con3 = tf.nn.conv2d(con3, parameter[2][0], strides=[1, strides, strides, 1], padding='SAME')
    con3 = tf.nn.relu(con3 + parameter[2][1])
    
    #5x5red
    con5 = tf.nn.conv2d(x, parameter[3][0], strides=[1, 1, 1, 1], padding='SAME')
    con5 = tf.nn.relu(con5 + parameter[3][1])
    
    #part1
    con5 = tf.nn.conv2d(con5, parameter[4][0], strides=[1, 1, 1, 1], padding='SAME')
    con5 = tf.nn.relu(con5 + parameter[4][1])
    
    #part2
    con5 = tf.nn.conv2d(con5, parameter[5][0], strides=[1, strides, strides, 1], padding='SAME')
    con5 = tf.nn.relu(con5 + parameter[5][1])
    
    #pool
    pool = tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME')
    pool = tf.nn.relu(pool)
    
    #poolred
    pool = tf.nn.conv2d(pool, parameter[6][0], strides=[1, 1, 1, 1], padding='SAME')
    pool = tf.nn.relu(pool + parameter[6][1])
    
    return tf.concat(3, [con1, con3, con5, pool])

def poolWrapper(x, parameter):
    strides=2
    
    #3x3red
    con3 = tf.nn.conv2d(x, parameter[0][0], strides=[1, 1, 1, 1], padding='SAME')
    con3 = tf.nn.relu(con3 + parameter[0][1])
    
    #3x3
    con3 = tf.nn.conv2d(con3, parameter[1][0], strides=[1, strides, strides, 1], padding='SAME')
    con3 = tf.nn.relu(con3 + parameter[1][1])
    
    #5x5red
    con5 = tf.nn.conv2d(x, parameter[2][0], strides=[1, 1, 1, 1], padding='SAME')
    con5 = tf.nn.relu(con5 + parameter[2][1])
    
    #part1
    con5 = tf.nn.conv2d(con5, parameter[3][0], strides=[1, 1, 1, 1], padding='SAME')
    con5 = tf.nn.relu(con5 + parameter[3][1])
    
    #part2
    con5 = tf.nn.conv2d(con5, parameter[4][0], strides=[1, strides, strides, 1], padding='SAME')
    con5 = tf.nn.relu(con5 + parameter[4][1])
    
    #pool
    pool = tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, strides, strides, 1], padding='SAME')
    pool = tf.nn.relu(pool)
    
    return tf.concat(3, [con3, con5, pool])

In [8]:
lRate = 0.02
batch = 100
drop = 0.75 # 40% dropout

with tf.name_scope('inputs'):
    x = tf.placeholder(tf.float32, [None, 32, 32, 3], name = 'x_placeholder')
    y = tf.placeholder(tf.float32, [None, n_classes], name = 'y_placeholder')
    dropout = tf.placeholder(tf.float32, name = 'drop_placeholder')

def final(x, para, dropout):

    # first 5x5 conv
    x = tf.nn.conv2d(x, convW, strides=[1, 1, 1, 1], padding='SAME')
    x = tf.nn.relu(x + convB)
    
    # inception and pool
    for row in para:
        if len(row) == 7:
            x = insWrapper(x, row)
        else :
            x = poolWrapper(x, row)
    
    # avgpool and dropout
    x = tf.nn.avg_pool(x, ksize=[1, 4, 4, 1], strides=[1, 1, 1, 1], padding='VALID')
    x = tf.nn.dropout(x, dropout)
    
    global model
    return tf.matmul(tf.reshape(x, (-1, model[-1][2] + model[-1][4] + model[-1][7] + model[-1][8])), lenW) + lenB

# Construct model
pred = final(x, para, dropout)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
#optimizer = tf.train.GradientDescentOptimizer(lRate).minimize(cost)
# del : lRate = tf.placeholder(tf.float32, name = 'learning_rate')
optimizer = tf.train.AdamOptimizer(learning_rate=lRate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# create summary for our cost and accuracy
tf.summary.scalar("cost", tf.cast(cost, tf.float32))
tf.summary.scalar("accuracy", accuracy)

summary_op = tf.summary.merge_all()

check = tf.add_check_numerics_ops()


subprocess.run('rm -r log; mkdir log', shell=True)

CompletedProcess(args='rm -r log; mkdir log', returncode=0)

In [9]:

with tf.Session() as sess:

    
    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter('log', graph=tf.get_default_graph())
    for i in range(0,len(data)+1,batch):
        
        pare = sess.run(para[0][0])
        #print(pare[0][0][0][8],pare[1])
        
        summary, _ = sess.run([summary_op, optimizer],
                              feed_dict={x: data[i:i+batch], y: labels[i:i+batch], dropout: drop})
        
        writer.add_summary(summary, i)
        
        if i % 500 == 0 :
            acc, cos, chec = sess.run([accuracy, cost, check], feed_dict={x: data[i:i+batch], y: labels[i:i+batch], dropout: 1.})
            
            print(str(i)+'  acc = {:.3f}'.format(acc)  )
            print('loss = {:.3f}'.format(cos))
            print(chec)
            #print(' = {:.3f}'.format(para))

            
    
    val_acc = sess.run(accuracy, feed_dict={x: val_data[:2000], y: val_labels[:2000], dropout: 1.})
    print('accuracy at '+str(i)+' = {:.5f}'.format(val_acc) )
    
    test_acc = sess.run(accuracy, feed_dict={x: test_data[:2000], y: test_labels[:2000], dropout: 1.})
    print('accuracy at '+str(i)+' = {:.5f}'.format(test_acc) )

0  acc = 0.130
loss = 96.942
None
500  acc = 0.090
loss = 3.121
None
1000  acc = 0.100
loss = 2.367
None
1500  acc = 0.140
loss = 2.349
None
2000  acc = 0.100
loss = 2.424
None
2500  acc = 0.070
loss = 2.321
None
3000  acc = 0.110
loss = 2.334
None
3500  acc = 0.120
loss = 2.316
None
4000  acc = 0.120
loss = 2.377
None
4500  acc = 0.150
loss = 2.297
None
5000  acc = 0.070
loss = 2.360
None
5500  acc = 0.100
loss = 2.289
None
6000  acc = 0.150
loss = 2.255
None
6500  acc = 0.160
loss = 2.228
None
7000  acc = 0.190
loss = 2.257
None
7500  acc = 0.080
loss = 2.417
None
8000  acc = 0.120
loss = 2.261
None
8500  acc = 0.110
loss = 2.303
None
9000  acc = 0.140
loss = 2.308
None
9500  acc = 0.160
loss = 2.263
None
10000  acc = 0.170
loss = 2.218
None
10500  acc = 0.180
loss = 2.172
None
11000  acc = 0.160
loss = 2.195
None
11500  acc = 0.190
loss = 2.211
None
12000  acc = 0.180
loss = 2.114
None
12500  acc = 0.210
loss = 2.186
None
13000  acc = 0.280
loss = 2.142
None
13500  acc = 0.160
loss 

InvalidArgumentError: Mean:0 : Tensor had NaN values
	 [[Node: CheckNumerics_673 = CheckNumerics[T=DT_FLOAT, message="Mean:0", _device="/job:localhost/replica:0/task:0/cpu:0"](Mean, ^CheckNumerics_672)]]

Caused by op 'CheckNumerics_673', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/mendu/.local/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/mendu/.local/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/mendu/.local/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/home/mendu/.local/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/mendu/.local/lib/python3.5/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/home/mendu/.local/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/mendu/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/mendu/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/mendu/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/mendu/.local/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/mendu/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/mendu/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/mendu/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/home/mendu/.local/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/mendu/.local/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/mendu/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/mendu/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/mendu/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-079119ab5f8a>", line 49, in <module>
    check = tf.add_check_numerics_ops()
  File "/home/mendu/.local/lib/python3.5/site-packages/tensorflow/python/ops/numerics.py", line 68, in add_check_numerics_ops
    check_op = [array_ops.check_numerics(output, message=message)]
  File "/home/mendu/.local/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 412, in check_numerics
    message=message, name=name)
  File "/home/mendu/.local/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
    op_def=op_def)
  File "/home/mendu/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/mendu/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Mean:0 : Tensor had NaN values
	 [[Node: CheckNumerics_673 = CheckNumerics[T=DT_FLOAT, message="Mean:0", _device="/job:localhost/replica:0/task:0/cpu:0"](Mean, ^CheckNumerics_672)]]


cd cifar && tensorboard --logdir=run1:log --port 6006