In [8]:
#      0=================================0
#      |    Kernel Point Convolutions    |
#      0=================================0
#
#
# ----------------------------------------------------------------------------------------------------------------------
#
#      Callable script to start a training on S3DIS dataset
#
# ----------------------------------------------------------------------------------------------------------------------
#
#      Hugues THOMAS - 06/03/2020
#


# ----------------------------------------------------------------------------------------------------------------------
#
#           Imports and global variables
#       \**********************************/
#

# Common libs
import signal
import os

# Dataset
from datasets.S3DIS import *
from torch.utils.data import DataLoader

from utils.config import Config
from utils.trainer import ModelTrainer
from models.architectures import KPFCNN
import subprocess

In [9]:
# ----------------------------------------------------------------------------------------------------------------------
#
#           Config Class
#       \******************/
#
class S3DISConfig(Config):
    """
    Override the parameters you want to modify for this dataset
    """

    ####################
    # Dataset parameters
    ####################

    # Dataset name
    dataset = 'S3DIS'

    # Number of classes in the dataset (This value is overwritten by dataset class when Initializating dataset).
    num_classes = None

    # Type of task performed on this dataset (also overwritten)
    dataset_task = ''

    # Number of CPU threads for the input pipeline
    input_threads = 0  # 10 kuramin changed

    #########################
    # Architecture definition
    #########################

    # Define layers
    architecture = ['simple',
                    'resnetb',
                    'resnetb_strided',
                    'resnetb',
                    'resnetb',
                    'resnetb_strided',
                    'resnetb_deformable',
                    'resnetb_deformable',
                    'resnetb_deformable_strided',
                    'resnetb_deformable',
                    'resnetb_deformable',
                    'resnetb_deformable_strided',
                    'resnetb_deformable',
                    'resnetb_deformable',
                    'nearest_upsample',
                    'unary',
                    'nearest_upsample',
                    'unary',
                    'nearest_upsample',
                    'unary',
                    'nearest_upsample',
                    'unary']

    ###################
    # KPConv parameters
    ###################

    # Radius of the input sphere
    in_radius = 15 #1.5 kuramin changed from s3dis to ahn

    # Number of kernel points
    num_kernel_points = 15  # kuramin changed back from 9

    # Size of the first subsampling grid in meter
    first_subsampling_dl = 2.0 #0.03 kuramin changed from s3dis to ahn

    # Radius of convolution in "number grid cell". (2.5 is the standard value)
    conv_radius = 2.5

    # Radius of deformable convolution in "number grid cell". Larger so that deformed kernel can spread out
    deform_radius = 6.0

    # Radius of the area of influence of each kernel point in "number grid cell". (1.0 is the standard value)
    KP_extent = 1.2

    # Behavior of convolutions in ('constant', 'linear', 'gaussian')
    KP_influence = 'linear'

    # Aggregation function of KPConv in ('closest', 'sum')
    aggregation_mode = 'sum'

    # Choice of input features
    first_features_dim = 128 # kuramin changed back from 8
    in_features_dim = 5 # kuramin changed back from 4

    # Can the network learn modulations
    modulated = False

    # Batch normalization parameters
    use_batch_norm = True
    batch_norm_momentum = 0.02

    # Deformable offset loss
    # 'point2point' fitting geometry by penalizing distance from deform point to input points
    # 'point2plane' fitting geometry by penalizing distance from deform point to input point triplet (not implemented)
    deform_fitting_mode = 'point2point'
    deform_fitting_power = 1.0              # Multiplier for the fitting/repulsive loss
    deform_lr_factor = 0.1                  # Multiplier for learning rate applied to the deformations
    repulse_extent = 1.2                    # Distance of repulsion for deformed kernel points

    #####################
    # Training parameters
    #####################

    # Maximal number of epochs
    max_epoch = 10  # 500  kuramin changed

    # Learning rate management
    learning_rate = 1e-2
    momentum = 0.98
    lr_decays = {i: 0.1 ** (1 / 150) for i in range(1, max_epoch)}
    grad_clip_norm = 100.0

    # Number of batch
    batch_num = 6  # target_aver_batch_size will be set equal to it

    # Number of steps per epoch (how many batches will be created from dataloader by enumerate(dataloader))
    steps_per_epoch = 50  # kuramin changed back from 100

    # Number of validation examples per epoch
    validation_size = 50

    # Number of epoch between each checkpoint
    checkpoint_gap = 50

    # Augmentations
    augment_scale_anisotropic = True
    augment_symmetries = [True, False, False]
    augment_rotation = 'vertical'
    augment_scale_min = 0.8
    augment_scale_max = 1.2
    augment_noise = 0.001
    augment_color = 0.8

    # The way we balance segmentation loss
    #   > 'none': Each point in the whole batch has the same contribution.
    #   > 'class': Each class has the same contribution (points are weighted according to class balance)
    #   > 'batch': Each cloud in the batch has the same contribution (points are weighted according cloud sizes)
    segloss_balance = 'none'

    # Do we need to save convergence
    saving = True
    saving_path = None

In [10]:
# ----------------------------------------------------------------------------------------------------------------------
#
#           Main Call
#       \***************/
#
#if __name__ == '__main__':

############################
# Initialize the environment
############################

# Set which gpu is going to be used
number_of_gpus = str(subprocess.check_output(["nvidia-smi", "-L"])).count('UUID')
print('Number of GPUs is', number_of_gpus)

if number_of_gpus == 1:
    GPU_ID = '0'
else:
    GPU_ID = '3'
print('GPU_ID is', GPU_ID)

# Set GPU visible device
os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID

Number of GPUs is 4
GPU_ID is 3


In [11]:
###############
# Previous chkp
###############

# Choose here if you want to start training from a previous snapshot (None for new training)
# previous_training_path = 'Log_2020-03-19_19-53-27'
previous_training_path = ''

# Choose index of checkpoint to start from. If None, uses the latest chkp
chkp_idx = None
if previous_training_path:

    # Find all snapshot in the chosen training folder
    chkp_path = os.path.join('results', previous_training_path, 'checkpoints')
    chkps = [f for f in os.listdir(chkp_path) if f[:4] == 'chkp']

    # Find which snapshot to restore
    if chkp_idx is None:
        chosen_chkp = 'current_chkp.tar'
    else:
        chosen_chkp = np.sort(chkps)[chkp_idx]
    chosen_chkp = os.path.join('results', previous_training_path, 'checkpoints', chosen_chkp)

else:
    chosen_chkp = None

In [12]:
##############
# Prepare Data (several cells)
##############

print()
print('Data Preparation')
print('****************')

# Initialize configuration class
config = S3DISConfig()
if previous_training_path:
    config.load(os.path.join('results', previous_training_path))
    config.saving_path = None


Data Preparation
****************
self.deform_layers set to [False, False, True, True, True]


In [13]:
# Get path from argument if given
if len(sys.argv) > 1:
    config.saving_path = None  #sys.argv[1]
    print('config.saving_path is', config.saving_path)

config.saving_path is None


In [14]:
# Initialize datasets
training_dataset = S3DISDataset(config, set='training', use_potentials=True)  # kuramin commented
test_dataset = S3DISDataset(config, set='validation', use_potentials=True)

self.deform_layers set to []
Ply-files are already created based on txt-files

Preparing KDTree ../datasets/Stanford3dDataset_v1.2/input_2.000/1_rgb_KDTree.pkl for cloud 1_rgb with path ../datasets/Stanford3dDataset_v1.2/input_2.000/1_rgb.ply, subsampled at 2.000
field_list[0].shape[0] is 199807
5.6 MB loaded in 4.1s

Preparing potentials
Lets find coarse poiints with pot_dl = 1.5
Done in 0.4s

self.deform_layers set to []
Ply-files are already created based on txt-files

Preparing KDTree ../datasets/Stanford3dDataset_v1.2/input_2.000/2_rgb_KDTree.pkl for cloud 2_rgb with path ../datasets/Stanford3dDataset_v1.2/input_2.000/2_rgb.ply, subsampled at 2.000
field_list[0].shape[0] is 211785
5.9 MB loaded in 4.8s

Preparing potentials
Lets find coarse poiints with pot_dl = 1.5
Done in 0.4s

Preparing reprojection indices for testing
Transforming labels of whole cloud to int in order to fill self.test_proj. Number of processed members is 100000
Transforming labels of whole cloud to int in ord

In [15]:
# Initialize samplers
training_sampler = S3DISSampler(training_dataset)  # defines the strategy to draw samples from the dataset
test_sampler = S3DISSampler(test_dataset)

In [16]:
# Initialize the dataloader
r"""
    dataset (Dataset): dataset from which to load the data.
    batch_size (int, optional): how many samples per batch to load
        (default: ``1``).
    shuffle (bool, optional): set to ``True`` to have the data reshuffled
        at every epoch (default: ``False``).
    sampler (Sampler, optional): defines the strategy to draw samples from
        the dataset. If specified, :attr:`shuffle` must be ``False``.
    batch_sampler (Sampler, optional): like :attr:`sampler`, but returns a batch of
        indices at a time. Mutually exclusive with :attr:`batch_size`,
        :attr:`shuffle`, :attr:`sampler`, and :attr:`drop_last`.
    num_workers (int, optional): how many subprocesses to use for data
        loading. ``0`` means that the data will be loaded in the main process.
        (default: ``0``)
    collate_fn (callable, optional): merges a list of samples to form a
        mini-batch of Tensor(s).  Used when using batched loading from a
        map-style dataset.
    pin_memory (bool, optional): If ``True``, the data loader will copy Tensors
        into CUDA pinned memory before returning them.  If your data elements
        are a custom type, or your :attr:`collate_fn` returns a batch that is a custom type,
        see the example below.
    drop_last (bool, optional): set to ``True`` to drop the last incomplete batch,
        if the dataset size is not divisible by the batch size. If ``False`` and
        the size of dataset is not divisible by the batch size, then the last batch
        will be smaller. (default: ``False``)
    timeout (numeric, optional): if positive, the timeout value for collecting a batch
        from workers. Should always be non-negative. (default: ``0``)
    worker_init_fn (callable, optional): If not ``None``, this will be called on each
        worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as
        input, after seeding and before data loading. (default: ``None``)
"""
training_loader = DataLoader(training_dataset,
                             batch_size=1,
                             sampler=training_sampler,
                             collate_fn=S3DISCollate,
                             num_workers=config.input_threads,
                             pin_memory=True)

In [17]:
test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         sampler=test_sampler,
                         collate_fn=S3DISCollate,
                         num_workers=config.input_threads,
                         pin_memory=True)

In [18]:
# Calibrate samplers
training_sampler.calibration(training_loader, verbose=True)
test_sampler.calibration(test_loader, verbose=True)

# Optional debug functions
# debug_timing(training_dataset, training_loader)
# debug_timing(test_dataset, test_loader)
# debug_upsampling(training_dataset, training_loader)


Starting Calibration (use verbose=True for more details)

Previous calibration found:
Check batch limit dictionary
[91m"potentials_15.000_2.000_6": ?[0m
Check neighbors limit dictionary
[91m"2.000_5.000": ?[0m
[91m"4.000_10.000": ?[0m
[91m"8.000_48.000": ?[0m
[91m"16.000_96.000": ?[0m
[91m"32.000_192.000": ?[0m
Before range10
Begin iter o range10. Before enumerate(dataloader)
neighb_mat.numpy()) [[  0 252  47 ... 639 639 639]
 [  1  15   2 ... 639 639 639]
 [  2   3   1 ... 639 639 639]
 ...
 [636 637 554 ... 639 639 639]
 [637 638 636 ... 639 639 639]
 [638 637 595 ... 639 639 639]]
neighb_mat.shape[0]) 639
neighb_mat.numpy()) [[  0   1   2 ... 161 161 161]
 [  1   0  28 ... 161 161 161]
 [  2   3   1 ... 161 161 161]
 ...
 [158  48  74 ... 161 161 161]
 [159 138 153 ... 161 161 161]
 [160  75  49 ... 161 161 161]]
neighb_mat.shape[0]) 161
neighb_mat.numpy()) [[ 0 29  1 ... 23 34 22]
 [ 1  2 12 ... 28 18 25]
 [ 2  1 32 ... 20 18 25]
 ...
 [33 27  3 ... 25 21 18]
 [34 10  

In [19]:
print('\nModel Preparation')
print('*****************')

# Define network model
t1 = time.time()
net = KPFCNN(config, training_dataset.label_values, training_dataset.ignored_labels)

# debug = False
# if debug:
#     print('\n*************************************\n')
#     print(net)
#     print('\n*************************************\n')
#     for param in net.parameters():
#         if param.requires_grad:
#             print(param.shape)
#     print('\n*************************************\n')
#     print("Model size %i" % sum(param.numel() for param in net.parameters() if param.requires_grad))
#     print('\n*************************************\n')


Model Preparation
*****************
encoder_blocks is calculated as ModuleList(
  (0): SimpleBlock(
    (KPConv): KPConv(radius: 5.00, in_feat: 5, out_feat: 64)
    (batch_norm): BatchNormBlock(in_feat: 64, momentum: 0.020, only_bias: False)
    (leaky_relu): LeakyReLU(negative_slope=0.1)
  )
  (1): ResnetBottleneckBlock(
    (unary1): UnaryBlock(in_feat: 64, out_feat: 32, BN: True, ReLU: True)
    (KPConv): KPConv(radius: 5.00, in_feat: 32, out_feat: 32)
    (batch_norm_conv): BatchNormBlock(in_feat: 32, momentum: 0.020, only_bias: False)
    (unary2): UnaryBlock(in_feat: 32, out_feat: 128, BN: True, ReLU: False)
    (unary_shortcut): UnaryBlock(in_feat: 64, out_feat: 128, BN: True, ReLU: False)
    (leaky_relu): LeakyReLU(negative_slope=0.1)
  )
  (2): ResnetBottleneckBlock(
    (unary1): UnaryBlock(in_feat: 128, out_feat: 32, BN: True, ReLU: True)
    (KPConv): KPConv(radius: 5.00, in_feat: 32, out_feat: 32)
    (batch_norm_conv): BatchNormBlock(in_feat: 32, momentum: 0.020, only_b

In [20]:
# Define a trainer class
trainer = ModelTrainer(net, config, chkp_path=chosen_chkp)
print('Done in {:.1f}s\n'.format(time.time() - t1))

Done in 0.2s



In [21]:
print('\nStart training')
print('**************')

# Training
trainer.train(net, training_loader, test_loader, config)


Start training
**************
e000-i0000 => L=22.110 acc= 10% / t(ms):  44.6 925.4  95.1)
e000-i0005 => L=22.034 acc= 63% / t(ms):  55.1  60.3  83.1)
e000-i0011 => L=17.752 acc= 64% / t(ms):  55.0  60.0  81.7)
e000-i0017 => L=17.863 acc= 59% / t(ms):  53.6  60.0  83.5)
e000-i0023 => L=17.393 acc= 55% / t(ms):  53.6  60.0  83.7)
e000-i0029 => L=15.611 acc= 69% / t(ms):  54.6  60.1  83.2)
e000-i0034 => L=17.179 acc= 71% / t(ms):  54.9  60.0  85.0)
e000-i0040 => L=16.907 acc= 71% / t(ms):  55.4  60.2  85.3)
e000-i0046 => L=32.621 acc= 79% / t(ms):  55.9  60.1  84.1)
Validation : 26.0% (timings : 26.43 10.72)
Validation : 54.0% (timings : 39.29 15.68)
Validation : 84.0% (timings : 44.92 18.21)
field_list[0].shape[0] is 198605
S3DIS mean IoU = 17.0%
e001-i0000 => L=28.417 acc= 69% / t(ms): 7573.5  60.1  91.9)
e001-i0006 => L=72.993 acc= 72% / t(ms):  61.5  59.5  75.4)
e001-i0012 => L=142.146 acc= 66% / t(ms):  58.4  59.9  78.6)
e001-i0018 => L=436.962 acc= 87% / t(ms):  56.6  59.8  78.5)
e

KeyError: 26

In [None]:
#print('Forcing exit now')
#os.kill(os.getpid(), signal.SIGINT)