In [1]:
import argparse
import math
import h5py
import numpy as np
import tensorflow as tf
import socket
import importlib
import os
import sys

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
BASE_DIR = os.getcwd()
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, 'models'))
sys.path.append(os.path.join(BASE_DIR, 'utils'))
print(BASE_DIR)

/home/kuramin/Diploma/repos/DGCNN_tf/dgcnn-1/tensorflow


In [3]:
import provider
from utils import tf_util

In [4]:
BATCH_SIZE = 24
NUM_POINT = 1024
MAX_EPOCH = 50 #250
BASE_LEARNING_RATE = 0.001
GPU_INDEX = 0
MOMENTUM = 0.9
OPTIMIZER = 'adam'
DECAY_STEP = 200000
DECAY_RATE = 0.7

In [5]:
MODEL = importlib.import_module('dgcnn')  # import network module
MODEL_FILE = os.path.join(BASE_DIR, 'models', 'dgcnn.py')
LOG_DIR = 'log'
if not os.path.exists(LOG_DIR):
    os.mkdir(LOG_DIR)
# os.system('cp %s %s' % (MODEL_FILE, LOG_DIR)) # bkp of model def
# os.system('cp train.py %s' % (LOG_DIR)) # bkp of train procedure
LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w')
LOG_FOUT.write("BATCH_SIZE = 24, NUM_POINT = 1024, MAX_EPOCH = 250, BASE_LEARNING_RATE = 0.001, GPU_INDEX = 0, MOMENTUM = 0.9, OPTIMIZER = 'adam', DECAY_STEP = 200000, DECAY_RATE = 0.7\n")

MAX_NUM_POINT = 4096 #2048
NUM_CLASSES = 40

BN_INIT_DECAY = 0.5
BN_DECAY_DECAY_RATE = 0.5
BN_DECAY_DECAY_STEP = float(DECAY_STEP)
BN_DECAY_CLIP = 0.99

HOSTNAME = socket.gethostname()

In [6]:
# ModelNet40 official train/test split
TRAIN_FILES = provider.get_data_files( \
    os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/train_files.txt'))
TEST_FILES = provider.get_data_files( \
    os.path.join(BASE_DIR, 'data/modelnet40_ply_hdf5_2048/test_files.txt'))

In [7]:
def log_string(log_file, out_str):
    log_file.write(out_str + '\n')
    log_file.flush()
    print(out_str)


def get_learning_rate(batch):
    learning_rate = tf.train.exponential_decay(
        BASE_LEARNING_RATE,  # Base learning rate.
        batch * BATCH_SIZE,  # Current index into the dataset.
        DECAY_STEP,  # Decay step.
        DECAY_RATE,  # Decay rate.
        staircase=True)
    learning_rate = tf.maximum(learning_rate, 0.00001)  # don't allow learning rate go beyond 0.00001
    return learning_rate


def get_bn_decay(batch):
    bn_momentum = tf.train.exponential_decay(
        BN_INIT_DECAY,
        batch * BATCH_SIZE,
        BN_DECAY_DECAY_STEP,
        BN_DECAY_DECAY_RATE,
        staircase=True)
    bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum)
    return bn_decay


def train():
    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(GPU_INDEX)):
            pointclouds_pl, labels_pl = MODEL.placeholder_inputs(BATCH_SIZE,
                                                                 NUM_POINT)  # create placeholders using method
            is_training_pl = tf.placeholder(tf.bool, shape=())
            print(is_training_pl)

            # Note the global_step=batch parameter to minimize. 
            # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains.
            batch = tf.Variable(0)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get prediction and loss based on placeholders
            pred, end_points = MODEL.get_model(pointclouds_pl, is_training_pl,
                                               bn_decay=bn_decay)  # predictions on probabilities of BATCH_SIZE clouds belonging to 40 classes
            loss = MODEL.get_loss(pred, labels_pl,
                                  end_points)  # value of loss for batch, based on difference of predictions and true values of classes for BATCH_SIZE clouds
            tf.summary.scalar('loss', loss)

            correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(
                labels_pl))  # vector of booleans of correct guesses, based on predictions and and true values of classes for BATCH_SIZE clouds
            accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(
                BATCH_SIZE)  # cast vector of corrects to float, define number of corrects and divide by total number
            tf.summary.scalar('accuracy', accuracy)

            # Get training operator
            learning_rate = get_learning_rate(batch)  # calculate current value of learning rate
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(loss, global_step=batch)  # define what does optimizer minimize

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)

        # Add summary writers
        # merged = tf.merge_all_summaries()
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'))

        # Init variables
        init = tf.global_variables_initializer()
        # To fix the bug introduced in TF 0.12.1 as in
        # http://stackoverflow.com/questions/41543774/invalidargumenterror-for-tensor-bool-tensorflow-0-12-1
        # sess.run(init)
        sess.run(init, {is_training_pl: True})

        ops = {'pointclouds_pl': pointclouds_pl,
               'labels_pl': labels_pl,
               'is_training_pl': is_training_pl,
               'pred': pred,
               'loss': loss,
               'train_op': train_op,
               'merged': merged,
               'step': batch}

        # for every epoch in range 0 to MAX_EPOCH - 1 do train, evaluation and saving
        for epoch in range(MAX_EPOCH):
            log_string(LOG_FOUT, '**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()

            # training within every epoch will be done using 5 train-files,
            # dividing each into pieces of size BATCH_SIZE
            # BATCH_SIZE of clouds with different labels
            train_one_epoch(sess, ops, train_writer)

            # evaluation within every epoch
            eval_one_epoch(sess, ops, test_writer)

            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
                log_string(LOG_FOUT, "Model saved in file: %s" % save_path)


# define function which creates data batches and feeds them to a built graph for training
def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True

    # Shuffle train files
    train_file_idxs = np.arange(0, len(TRAIN_FILES))
    np.random.shuffle(train_file_idxs)  # train_file_idxs will be some random sequence of indexes of train files
    print("train_file_idxs is", train_file_idxs)

    # training within every epoch will be done using 5 train-files
    for fn in range(len(TRAIN_FILES)):
        log_string(LOG_FOUT, 'train file ----' + str(fn) + '-----')
        # train_file_idxs[fn] is some random index of train file.
        # current_data will be 2048 clouds with 2048 (MAX_NUM_POINT) points in each, 3 coors per point
        # current_label will be 2048 clouds with 1 label each
        # current_data, current_label = provider.loadDataFile(TRAIN_FILES[train_file_idxs[fn]])
        current_data, current_label = provider.load_h5(TRAIN_FILES[train_file_idxs[fn]])
        current_data = current_data[:, 0:NUM_POINT, :]  # cut only specified number of points from each cloud
        current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label))
        # current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE

        total_correct = 0
        total_seen = 0
        loss_sum = 0

        # clouds within every file will be divided into groups of size BATCH_SIZE
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE

            # Cut a sequence of BATCH_SIZE clouds from current_data
            # And augment batched point clouds by rotation and jittering
            rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :])
            jittered_data = provider.jitter_point_cloud(rotated_data)
            jittered_data = provider.random_scale_point_cloud(jittered_data)
            jittered_data = provider.rotate_perturbation_point_cloud(jittered_data)
            jittered_data = provider.shift_point_cloud(jittered_data)

            # define dict and perform feeding of this batch
            feed_dict = {ops['pointclouds_pl']: jittered_data,
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training, }
            summary, step, _, loss_value, pred_value = sess.run([ops['merged'], ops['step'],
                                                                 ops['train_op'], ops['loss'], ops['pred']],
                                                                feed_dict=feed_dict)
            train_writer.add_summary(summary, step)
            pred_value = np.argmax(pred_value, 1)  # pred_value is argmax of pred_value probabilities on axis 1
            correct = np.sum(pred_value == current_label[start_idx:end_idx])  # number of correct predictions
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += loss_value

        log_string(LOG_FOUT, 'mean loss: %f' % (loss_sum / float(num_batches)))  # mean loss of current epoch
        log_string(LOG_FOUT, 'accuracy: %f' % (
                    total_correct / float(total_seen)))  # accuracy of current epoch. Both numbers count clouds


# define function which creates data batches and feeds them to a built graph for testing
def eval_one_epoch(sess, ops, test_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = False
    total_correct = 0
    total_seen = 0
    loss_sum = 0
    total_seen_class = [0 for _ in range(NUM_CLASSES)]
    total_correct_class = [0 for _ in range(NUM_CLASSES)]

    # evaluation within every epoch will be done using 5 train-files
    for fn in range(len(TEST_FILES)):
        log_string(LOG_FOUT, 'eval file ----' + str(fn) + '-----')
        #current_data, current_label = provider.loadDataFile(TEST_FILES[fn])  # load data and labels from file
        current_data, current_label = provider.load_h5(TEST_FILES[fn])  # load data and labels from file
        current_data = current_data[:, 0:NUM_POINT, :]  # crop only specific number of points from current_data
        current_label = np.squeeze(current_label)  # no need to shuffle, so that's it

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE

        # clouds within every file will be divided into groups of size BATCH_SIZE
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE

            # define dict and perform feeding of this batch
            feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx, :, :],
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training}
            summary, step, loss_value, pred_value = sess.run([ops['merged'], ops['step'],
                                                              ops['loss'], ops['pred']], feed_dict=feed_dict)
            pred_value = np.argmax(pred_value, 1)  # pred_value is argmax of pred_value probabilities on axis 1
            correct = np.sum(pred_value == current_label[start_idx:end_idx])  # number of correct predictions
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += (loss_value * BATCH_SIZE)
            #loss_sum += loss_value
            for i in range(start_idx, end_idx):
                l = current_label[i]
                total_seen_class[l] += 1
                total_correct_class[l] += (pred_value[i - start_idx] == l)

    log_string(LOG_FOUT, 'eval mean loss: %f' % (loss_sum / float(total_seen)))
    #log_string('eval mean loss: %f' % (loss_sum / float(num_batches)))
    log_string(LOG_FOUT, 'eval accuracy: %f' % (total_correct / float(total_seen)))
    log_string(LOG_FOUT, 'eval avg class acc: %f' % (
        np.mean(np.array(total_correct_class) / np.array(total_seen_class, dtype=np.float))))

In [8]:
if __name__ == "__main__":
    train()
    LOG_FOUT.close()

Tensor("Placeholder_2:0", shape=(), dtype=bool, device=/device:GPU:0)
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
**** EPOCH 000 ****
train_file_idxs is [3 2 4 0 1]
train file ----0-----


  f = h5py.File(h5_filename)


mean loss: 3.564339
accuracy: 0.188235
train file ----1-----
mean loss: 3.176361
accuracy: 0.278431
train file ----2-----
mean loss: 2.881954
accuracy: 0.365196
train file ----3-----
mean loss: 2.695330
accuracy: 0.430882
train file ----4-----
mean loss: 2.529665
accuracy: 0.491667
eval file ----0-----
eval file ----1-----
eval mean loss: 2.225706
eval accuracy: 0.590686
eval avg class acc: 0.516813
Model saved in file: log/model.ckpt
**** EPOCH 001 ****
train_file_idxs is [1 3 4 2 0]
train file ----0-----
mean loss: 2.409746
accuracy: 0.545588
train file ----1-----
mean loss: 2.319252
accuracy: 0.582843
train file ----2-----
mean loss: 2.255344
accuracy: 0.599265
train file ----3-----
mean loss: 2.224069
accuracy: 0.635784
train file ----4-----
mean loss: 2.216511
accuracy: 0.624510
eval file ----0-----
eval file ----1-----
eval mean loss: 1.990084
eval accuracy: 0.697712
eval avg class acc: 0.606529
**** EPOCH 002 ****
train_file_idxs is [2 3 0 4 1]
train file ----0-----
mean loss: 2

mean loss: 1.656243
accuracy: 0.854412
train file ----2-----
mean loss: 1.667702
accuracy: 0.857843
train file ----3-----
mean loss: 1.656534
accuracy: 0.853922
train file ----4-----
mean loss: 1.670900
accuracy: 0.854412
eval file ----0-----
eval file ----1-----
eval mean loss: 1.555554
eval accuracy: 0.882761
eval avg class acc: 0.830617
**** EPOCH 018 ****
train_file_idxs is [2 1 3 4 0]
train file ----0-----
mean loss: 1.640860
accuracy: 0.858824
train file ----1-----
mean loss: 1.680612
accuracy: 0.838725
train file ----2-----
mean loss: 1.658591
accuracy: 0.856373
train file ----3-----
mean loss: 1.634266
accuracy: 0.867647
train file ----4-----
mean loss: 1.661254
accuracy: 0.848039
eval file ----0-----
eval file ----1-----
eval mean loss: 1.555325
eval accuracy: 0.877859
eval avg class acc: 0.817047
**** EPOCH 019 ****
train_file_idxs is [1 2 0 3 4]
train file ----0-----
mean loss: 1.644556
accuracy: 0.862255
train file ----1-----
mean loss: 1.650434
accuracy: 0.863725
train fil

mean loss: 1.573040
accuracy: 0.887255
train file ----3-----
mean loss: 1.557981
accuracy: 0.893137
train file ----4-----
mean loss: 1.562428
accuracy: 0.891667
eval file ----0-----
eval file ----1-----
eval mean loss: 1.503339
eval accuracy: 0.888889
eval avg class acc: 0.848052
**** EPOCH 035 ****
train_file_idxs is [4 0 1 3 2]
train file ----0-----
mean loss: 1.540425
accuracy: 0.903799
train file ----1-----
mean loss: 1.544385
accuracy: 0.903922
train file ----2-----
mean loss: 1.567756
accuracy: 0.893137
train file ----3-----
mean loss: 1.553367
accuracy: 0.902941
train file ----4-----
mean loss: 1.554902
accuracy: 0.902941
eval file ----0-----
eval file ----1-----
eval mean loss: 1.498260
eval accuracy: 0.892565
eval avg class acc: 0.851755
**** EPOCH 036 ****
train_file_idxs is [0 1 4 2 3]
train file ----0-----
mean loss: 1.543691
accuracy: 0.907843
train file ----1-----
mean loss: 1.552477
accuracy: 0.898529
train file ----2-----
mean loss: 1.548424
accuracy: 0.905637
train fil

mean loss: 1.506767
accuracy: 0.920588
train file ----4-----
mean loss: 1.493347
accuracy: 0.931373
eval file ----0-----
eval file ----1-----
eval mean loss: 1.478546
eval accuracy: 0.894199
eval avg class acc: 0.864893
**** EPOCH 052 ****
train_file_idxs is [3 4 2 1 0]
train file ----0-----
mean loss: 1.497125
accuracy: 0.925980
train file ----1-----
mean loss: 1.475272
accuracy: 0.931985
train file ----2-----
mean loss: 1.507135
accuracy: 0.918137
train file ----3-----
mean loss: 1.494829
accuracy: 0.922059
train file ----4-----
mean loss: 1.501017
accuracy: 0.920098
eval file ----0-----
eval file ----1-----
eval mean loss: 1.476872
eval accuracy: 0.892974
eval avg class acc: 0.855515
**** EPOCH 053 ****
train_file_idxs is [0 1 4 2 3]
train file ----0-----
mean loss: 1.499498
accuracy: 0.918137
train file ----1-----
mean loss: 1.489464
accuracy: 0.925490
train file ----2-----
mean loss: 1.489710
accuracy: 0.924020
train file ----3-----
mean loss: 1.501398
accuracy: 0.915686
train fil

eval file ----1-----
eval mean loss: 1.468258
eval accuracy: 0.901552
eval avg class acc: 0.868440
**** EPOCH 069 ****
train_file_idxs is [1 4 3 2 0]
train file ----0-----
mean loss: 1.460698
accuracy: 0.940196
train file ----1-----
mean loss: 1.469759
accuracy: 0.925245
train file ----2-----
mean loss: 1.459329
accuracy: 0.940686
train file ----3-----
mean loss: 1.469024
accuracy: 0.929412
train file ----4-----
mean loss: 1.466122
accuracy: 0.934314
eval file ----0-----
eval file ----1-----
eval mean loss: 1.462903
eval accuracy: 0.892974
eval avg class acc: 0.858594
**** EPOCH 070 ****
train_file_idxs is [2 4 1 3 0]
train file ----0-----
mean loss: 1.463942
accuracy: 0.941667
train file ----1-----
mean loss: 1.458223
accuracy: 0.941176
train file ----2-----
mean loss: 1.463415
accuracy: 0.936275
train file ----3-----
mean loss: 1.467188
accuracy: 0.934314
train file ----4-----
mean loss: 1.458283
accuracy: 0.939216
eval file ----0-----
eval file ----1-----
eval mean loss: 1.461835
ev

train_file_idxs is [1 2 4 0 3]
train file ----0-----
mean loss: 1.430314
accuracy: 0.949510
train file ----1-----
mean loss: 1.431855
accuracy: 0.946569
train file ----2-----
mean loss: 1.433027
accuracy: 0.946691
train file ----3-----
mean loss: 1.438038
accuracy: 0.945098
train file ----4-----
mean loss: 1.431334
accuracy: 0.946569
eval file ----0-----
eval file ----1-----
eval mean loss: 1.459293
eval accuracy: 0.899918
eval avg class acc: 0.867530
**** EPOCH 087 ****
train_file_idxs is [2 3 0 4 1]
train file ----0-----
mean loss: 1.440706
accuracy: 0.942647
train file ----1-----
mean loss: 1.434006
accuracy: 0.943137
train file ----2-----
mean loss: 1.436254
accuracy: 0.945588
train file ----3-----
mean loss: 1.427743
accuracy: 0.952819
train file ----4-----
mean loss: 1.434121
accuracy: 0.948039
eval file ----0-----
eval file ----1-----
eval mean loss: 1.455938
eval accuracy: 0.899918
eval avg class acc: 0.869736
**** EPOCH 088 ****
train_file_idxs is [2 3 1 4 0]
train file ----0-

mean loss: 1.411418
accuracy: 0.955882
train file ----1-----
mean loss: 1.419726
accuracy: 0.953431
train file ----2-----
mean loss: 1.420709
accuracy: 0.951961
train file ----3-----
mean loss: 1.416069
accuracy: 0.958824
train file ----4-----
mean loss: 1.415779
accuracy: 0.953922
eval file ----0-----
eval file ----1-----
eval mean loss: 1.454033
eval accuracy: 0.899510
eval avg class acc: 0.867382
**** EPOCH 104 ****
train_file_idxs is [0 1 3 4 2]
train file ----0-----
mean loss: 1.410317
accuracy: 0.957843
train file ----1-----
mean loss: 1.411306
accuracy: 0.957843
train file ----2-----
mean loss: 1.417745
accuracy: 0.952451
train file ----3-----
mean loss: 1.414611
accuracy: 0.952819
train file ----4-----
mean loss: 1.423514
accuracy: 0.950490
eval file ----0-----
eval file ----1-----
eval mean loss: 1.450573
eval accuracy: 0.900735
eval avg class acc: 0.868197
**** EPOCH 105 ****
train_file_idxs is [3 4 1 2 0]
train file ----0-----
mean loss: 1.417572
accuracy: 0.951961
train fil

KeyboardInterrupt: 