Skip to content

Commit

Permalink
prepare nasnet large training
Browse files Browse the repository at this point in the history
  • Loading branch information
titu1994 committed Jan 10, 2018
1 parent 5295d44 commit 476e38b
Show file tree
Hide file tree
Showing 3 changed files with 255 additions and 0 deletions.
76 changes: 76 additions & 0 deletions extract_nasnet_large_features.py
@@ -0,0 +1,76 @@
import tensorflow as tf
from keras import backend as K

from utils.nasnet import NASNetLarge
from utils.data_loader import train_generator, val_generator

sess = tf.Session()
K.set_session(sess)

image_size = 224

def _float32_feature_list(floats):
return tf.train.Feature(float_list=tf.train.FloatList(value=floats))

model = NASNetLarge((image_size, image_size, 3), include_top=False, pooling='avg')
model.summary()

# ''' TRAIN SET '''
nb_samples = 250000 * 2
batchsize = 200

with sess.as_default():
generator = train_generator(batchsize, shuffle=False)
writer = tf.python_io.TFRecordWriter('weights/nasnet_large_train.tfrecord')

count = 0
for _ in range(nb_samples // batchsize):
x_batch, y_batch = next(generator)

with sess.as_default():
x_batch = model.predict(x_batch, batchsize, verbose=1)

for i, (x, y) in enumerate(zip(x_batch, y_batch)):
examples = {
'features': _float32_feature_list(x.flatten()),
'scores': _float32_feature_list(y.flatten()),
}
features = tf.train.Features(feature=examples)
example = tf.train.Example(features=features)
writer.write(example.SerializeToString())

count += batchsize

print("Finished %0.2f percentage storing dataset" % (count * 100 / float(nb_samples)))

writer.close()

''' TRAIN SET '''
nb_samples = 5000
batchsize = 200

with sess.as_default():
generator = val_generator(batchsize)
writer = tf.python_io.TFRecordWriter('weights/nasnet_large_val.tfrecord')

count = 0
for _ in range(nb_samples // batchsize):
x_batch, y_batch = next(generator)

with sess.as_default():
x_batch = model.predict(x_batch, batchsize, verbose=1)

for i, (x, y) in enumerate(zip(x_batch, y_batch)):
examples = {
'features': _float32_feature_list(x.flatten()),
'scores': _float32_feature_list(y.flatten()),
}
features = tf.train.Features(feature=examples)
example = tf.train.Example(features=features)
writer.write(example.SerializeToString())

count += batchsize

print("Finished %0.2f percentage storing dataset" % (count * 100 / float(nb_samples)))

writer.close()
86 changes: 86 additions & 0 deletions pretrain_nasnet_large.py
@@ -0,0 +1,86 @@
import os

from keras.models import Model
from keras.layers import Input, Dense, Dropout
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.optimizers import Adam
from keras import backend as K

from utils.data_loader import features_generator

'''
Below is a modification to the TensorBoard callback to perform
batchwise writing to the tensorboard, instead of only at the end
of the batch.
'''
class TensorBoardBatch(TensorBoard):
def __init__(self, *args, **kwargs):
super(TensorBoardBatch, self).__init__(*args, **kwargs)

# conditionally import tensorflow iff TensorBoardBatch is created
self.tf = __import__('tensorflow')

def on_batch_end(self, batch, logs=None):
logs = logs or {}

for name, value in logs.items():
if name in ['batch', 'size']:
continue
summary = self.tf.Summary()
summary_value = summary.value.add()
summary_value.simple_value = value.item()
summary_value.tag = name
self.writer.add_summary(summary, batch)

self.writer.flush()

def on_epoch_end(self, epoch, logs=None):
logs = logs or {}

for name, value in logs.items():
if name in ['batch', 'size']:
continue
summary = self.tf.Summary()
summary_value = summary.value.add()
summary_value.simple_value = value.item()
summary_value.tag = name
self.writer.add_summary(summary, epoch * self.batch_size)

self.writer.flush()

def earth_mover_loss(y_true, y_pred):
cdf_ytrue = K.cumsum(y_true, axis=-1)
cdf_ypred = K.cumsum(y_pred, axis=-1)
samplewise_emd = K.sqrt(K.mean(K.square(K.abs(cdf_ytrue - cdf_ypred)), axis=-1))
return K.mean(samplewise_emd)

image_size = 224
ip = Input(shape=(1056,))
x = Dropout(0.75)(ip)
x = Dense(10, activation='softmax')(x)

model = Model(ip, x)
model.summary()
optimizer = Adam(lr=1e-4)
model.compile(optimizer, loss=earth_mover_loss)

# load weights from trained model if it exists
if os.path.exists('weights/nasnet_large_pretrained_weights.h5'):
model.load_weights('weights/nasnet_large_pretrained_weights.h5')

checkpoint = ModelCheckpoint('weights/nasnet_large_pretrained_weights.h5', monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True,
mode='min')
tensorboard = TensorBoardBatch(log_dir='./nasnet_logs/')
callbacks = [checkpoint, tensorboard]

batchsize = 200
epochs = 20

TRAIN_RECORD_PATH = 'weights/nasnet_large_train.tfrecord'
VAL_RECORD_PATH = 'weights/nasnet_large_val.tfrecord'

model.fit_generator(features_generator(TRAIN_RECORD_PATH, batchsize=batchsize, shuffle=True),
steps_per_epoch=(500000. // batchsize),
epochs=epochs, verbose=1, callbacks=callbacks,
validation_data=features_generator(VAL_RECORD_PATH, batchsize=batchsize, shuffle=False),
validation_steps=(5000. // batchsize))
93 changes: 93 additions & 0 deletions train_nasnet_large.py
@@ -0,0 +1,93 @@
import os

from keras.models import Model
from keras.layers import Dense, Dropout
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.optimizers import Adam
from keras import backend as K

from utils.nasnet import NASNetLarge

from utils.data_loader import train_generator, val_generator

'''
Below is a modification to the TensorBoard callback to perform
batchwise writing to the tensorboard, instead of only at the end
of the batch.
'''
class TensorBoardBatch(TensorBoard):
def __init__(self, *args, **kwargs):
super(TensorBoardBatch, self).__init__(*args, **kwargs)

# conditionally import tensorflow iff TensorBoardBatch is created
self.tf = __import__('tensorflow')

def on_batch_end(self, batch, logs=None):
logs = logs or {}

for name, value in logs.items():
if name in ['batch', 'size']:
continue
summary = self.tf.Summary()
summary_value = summary.value.add()
summary_value.simple_value = value.item()
summary_value.tag = name
self.writer.add_summary(summary, batch)

self.writer.flush()

def on_epoch_end(self, epoch, logs=None):
logs = logs or {}

for name, value in logs.items():
if name in ['batch', 'size']:
continue
summary = self.tf.Summary()
summary_value = summary.value.add()
summary_value.simple_value = value.item()
summary_value.tag = name
self.writer.add_summary(summary, epoch * self.batch_size)

self.writer.flush()

def earth_mover_loss(y_true, y_pred):
cdf_ytrue = K.cumsum(y_true, axis=-1)
cdf_ypred = K.cumsum(y_pred, axis=-1)
samplewise_emd = K.sqrt(K.mean(K.square(K.abs(cdf_ytrue - cdf_ypred)), axis=-1))
return K.mean(samplewise_emd)

image_size = 224

base_model = NASNetLarge((image_size, image_size, 3), include_top=False, pooling='avg', weight_decay=0, dropout=0)
for layer in base_model.layers:
layer.trainable = False

x = Dropout(0.75)(base_model.output)
x = Dense(10, activation='softmax')(x)

model = Model(base_model.input, x)
model.summary()
optimizer = Adam(lr=1e-4)
model.compile(optimizer, loss=earth_mover_loss)

# load weights from trained model if it exists
if os.path.exists('weights/nasnet_large_weights.h5'):
model.load_weights('weights/nasnet_large_weights.h5')

# load pre-trained NIMA(NASNet Mobile) classifier weights
if os.path.exists('weights/nasnet_large_pretrained_weights.h5'):
model.load_weights('weights/nasnet_large_pretrained_weights.h5', by_name=True)

checkpoint = ModelCheckpoint('weights/nasnet_large_weights.h5', monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True,
mode='min')
tensorboard = TensorBoardBatch(log_dir='./nasnet_logs/')
callbacks = [checkpoint, tensorboard]

batchsize = 200
epochs = 20

model.fit_generator(train_generator(batchsize=batchsize),
steps_per_epoch=(250000. // batchsize),
epochs=epochs, verbose=1, callbacks=callbacks,
validation_data=val_generator(batchsize=batchsize),
validation_steps=(5000. // batchsize))

0 comments on commit 476e38b

Please sign in to comment.