In [12]:
#      0=================================0
#      |    Kernel Point Convolutions    |
#      0=================================0
#
#
# ----------------------------------------------------------------------------------------------------------------------
#
#      Callable script to start a training on S3DIS dataset
#
# ----------------------------------------------------------------------------------------------------------------------
#
#      Hugues THOMAS - 06/03/2020
#


# ----------------------------------------------------------------------------------------------------------------------
#
#           Imports and global variables
#       \**********************************/
#

# Common libs
import signal
import os

# Dataset
from datasets.S3DIS import *
from torch.utils.data import DataLoader

from utils.config import Config
from utils.trainer import ModelTrainer
from models.architectures import KPFCNN
import subprocess

In [13]:
# ----------------------------------------------------------------------------------------------------------------------
#
#           Config Class
#       \******************/
#
class S3DISConfig(Config):
    """
    Override the parameters you want to modify for this dataset
    """

    ####################
    # Dataset parameters
    ####################

    # Dataset name
    dataset = 'S3DIS'

    # Number of classes in the dataset (This value is overwritten by dataset class when Initializating dataset).
    num_classes = None

    # Type of task performed on this dataset (also overwritten)
    dataset_task = ''

    # Number of CPU threads for the input pipeline
    input_threads = 10  # 10 kuramin changed

    #########################
    # Architecture definition
    #########################

    # Define layers
    architecture = ['simple',
                    'resnetb',
                    'resnetb_strided',
                    'resnetb',
                    'resnetb',
                    'resnetb_strided',
                    'resnetb_deformable',
                    'resnetb_deformable',
                    'resnetb_deformable_strided',
                    'resnetb_deformable',
                    'resnetb_deformable',
                    'resnetb_deformable_strided',
                    'resnetb_deformable',
                    'resnetb_deformable',
                    'nearest_upsample',
                    'unary',
                    'nearest_upsample',
                    'unary',
                    'nearest_upsample',
                    'unary',
                    'nearest_upsample',
                    'unary']

    ###################
    # KPConv parameters
    ###################

    # Radius of the input sphere
    in_radius = 1.5 # kuramin changed back from AHN (15) to s3dis

    # Number of kernel points
    num_kernel_points = 15  # kuramin changed back from 9

    # Size of the first subsampling grid in meter
    first_subsampling_dl = 0.03 # kuramin changed back from 2.0 from AHN to s3dis

    # Radius of convolution in "number grid cell". (2.5 is the standard value)
    conv_radius = 2.5

    # Radius of deformable convolution in "number grid cell". Larger so that deformed kernel can spread out
    deform_radius = 6.0

    # Radius of the area of influence of each kernel point in "number grid cell". (1.0 is the standard value)
    KP_extent = 1.2

    # Behavior of convolutions in ('constant', 'linear', 'gaussian')
    KP_influence = 'linear'

    # Aggregation function of KPConv in ('closest', 'sum')
    aggregation_mode = 'sum'

    # Choice of input features
    first_features_dim = 128 # kuramin changed back from 8
    in_features_dim = 5 # kuramin changed back from 4

    # Can the network learn modulations
    modulated = False

    # Batch normalization parameters
    use_batch_norm = True
    batch_norm_momentum = 0.02

    # Deformable offset loss
    # 'point2point' fitting geometry by penalizing distance from deform point to input points
    # 'point2plane' fitting geometry by penalizing distance from deform point to input point triplet (not implemented)
    deform_fitting_mode = 'point2point'
    deform_fitting_power = 1.0              # Multiplier for the fitting/repulsive loss
    deform_lr_factor = 0.1                  # Multiplier for learning rate applied to the deformations
    repulse_extent = 1.2                    # Distance of repulsion for deformed kernel points

    #####################
    # Training parameters
    #####################

    # Maximal number of epochs
    max_epoch = 10  # 500  kuramin changed

    # Learning rate management
    learning_rate = 1e-2
    momentum = 0.98
    lr_decays = {i: 0.1 ** (1 / 150) for i in range(1, max_epoch)}
    grad_clip_norm = 100.0

    # Number of batch
    batch_num = 6  # target_aver_batch_size will be set equal to it

    # Number of steps per epoch (how many batches will be created from dataloader by enumerate(dataloader))
    steps_per_epoch = 50  # kuramin changed back from 100

    # Number of validation examples per epoch
    validation_size = 50

    # Number of epoch between each checkpoint
    checkpoint_gap = 50

    # Augmentations
    augment_scale_anisotropic = True
    augment_symmetries = [True, False, False]
    augment_rotation = 'vertical'
    augment_scale_min = 0.8
    augment_scale_max = 1.2
    augment_noise = 0.001
    augment_color = 0.8

    # The way we balance segmentation loss
    #   > 'none': Each point in the whole batch has the same contribution.
    #   > 'class': Each class has the same contribution (points are weighted according to class balance)
    #   > 'batch': Each cloud in the batch has the same contribution (points are weighted according cloud sizes)
    segloss_balance = 'none'

    # Do we need to save convergence
    saving = True
    saving_path = None

In [14]:
# ----------------------------------------------------------------------------------------------------------------------
#
#           Main Call
#       \***************/
#
#if __name__ == '__main__':

############################
# Initialize the environment
############################

# Set which gpu is going to be used
number_of_gpus = str(subprocess.check_output(["nvidia-smi", "-L"])).count('UUID')
print('Number of GPUs is', number_of_gpus)

if number_of_gpus == 1:
    GPU_ID = '0'
else:
    GPU_ID = '3'
print('GPU_ID is', GPU_ID)

# Set GPU visible device
os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID

Number of GPUs is 4
GPU_ID is 3


In [15]:
###############
# Previous chkp
###############

# Choose here if you want to start training from a previous snapshot (None for new training)
# previous_training_path = 'Log_2020-03-19_19-53-27'
previous_training_path = ''

# Choose index of checkpoint to start from. If None, uses the latest chkp
chkp_idx = None
if previous_training_path:

    # Find all snapshot in the chosen training folder
    chkp_path = os.path.join('results', previous_training_path, 'checkpoints')
    chkps = [f for f in os.listdir(chkp_path) if f[:4] == 'chkp']

    # Find which snapshot to restore
    if chkp_idx is None:
        chosen_chkp = 'current_chkp.tar'
    else:
        chosen_chkp = np.sort(chkps)[chkp_idx]
    chosen_chkp = os.path.join('results', previous_training_path, 'checkpoints', chosen_chkp)

else:
    chosen_chkp = None

In [16]:
##############
# Prepare Data (several cells)
##############

print()
print('Data Preparation')
print('****************')

# Initialize configuration class
config = S3DISConfig()
if previous_training_path:
    config.load(os.path.join('results', previous_training_path))
    config.saving_path = None


Data Preparation
****************
self.deform_layers set to [False, False, True, True, True]


In [17]:
# Get path from argument if given
if len(sys.argv) > 1:
    config.saving_path = None  #sys.argv[1]
    print('config.saving_path is', config.saving_path)

config.saving_path is None


In [18]:
# Initialize datasets
training_dataset = S3DISDataset(config, set='training', use_potentials=True)  # kuramin commented
test_dataset = S3DISDataset(config, set='validation', use_potentials=True)

self.deform_layers set to []
Process S3DIS dataset
Ply-files are already created based on txt-files

Found KDTree ../datasets/Stanford3dDataset_v1.2/input_0.030/Area_1_KDTree.pkl for cloud Area_1 with path ../datasets/Stanford3dDataset_v1.2/input_0.030/Area_1.ply, subsampled at 0.030
146.6 MB loaded in 0.2s

Preparing potentials
Done in 0.0s

self.deform_layers set to []
Process S3DIS dataset
Ply-files are already created based on txt-files

Found KDTree ../datasets/Stanford3dDataset_v1.2/input_0.030/Area_3_KDTree.pkl for cloud Area_3 with path ../datasets/Stanford3dDataset_v1.2/input_0.030/Area_3.ply, subsampled at 0.030
62.6 MB loaded in 0.1s

Preparing potentials
Done in 0.0s

Preparing reprojection indices for testing
Area_3 done in 0.3s



In [19]:
# Initialize samplers
training_sampler = S3DISSampler(training_dataset)  # defines the strategy to draw samples from the dataset
test_sampler = S3DISSampler(test_dataset)

In [20]:
# Initialize the dataloader
r"""
    dataset (Dataset): dataset from which to load the data.
    batch_size (int, optional): how many samples per batch to load
        (default: ``1``).
    shuffle (bool, optional): set to ``True`` to have the data reshuffled
        at every epoch (default: ``False``).
    sampler (Sampler, optional): defines the strategy to draw samples from
        the dataset. If specified, :attr:`shuffle` must be ``False``.
    batch_sampler (Sampler, optional): like :attr:`sampler`, but returns a batch of
        indices at a time. Mutually exclusive with :attr:`batch_size`,
        :attr:`shuffle`, :attr:`sampler`, and :attr:`drop_last`.
    num_workers (int, optional): how many subprocesses to use for data
        loading. ``0`` means that the data will be loaded in the main process.
        (default: ``0``)
    collate_fn (callable, optional): merges a list of samples to form a
        mini-batch of Tensor(s).  Used when using batched loading from a
        map-style dataset.
    pin_memory (bool, optional): If ``True``, the data loader will copy Tensors
        into CUDA pinned memory before returning them.  If your data elements
        are a custom type, or your :attr:`collate_fn` returns a batch that is a custom type,
        see the example below.
    drop_last (bool, optional): set to ``True`` to drop the last incomplete batch,
        if the dataset size is not divisible by the batch size. If ``False`` and
        the size of dataset is not divisible by the batch size, then the last batch
        will be smaller. (default: ``False``)
    timeout (numeric, optional): if positive, the timeout value for collecting a batch
        from workers. Should always be non-negative. (default: ``0``)
    worker_init_fn (callable, optional): If not ``None``, this will be called on each
        worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as
        input, after seeding and before data loading. (default: ``None``)
"""
training_loader = DataLoader(training_dataset,
                             batch_size=1,
                             sampler=training_sampler,
                             collate_fn=S3DISCollate,
                             num_workers=config.input_threads,
                             pin_memory=True)

In [21]:
test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         sampler=test_sampler,
                         collate_fn=S3DISCollate,
                         num_workers=config.input_threads,
                         pin_memory=True)

In [22]:
# Calibrate samplers
training_sampler.calibration(training_loader, verbose=True)
test_sampler.calibration(test_loader, verbose=True)

# Optional debug functions
# debug_timing(training_dataset, training_loader)
# debug_timing(test_dataset, test_loader)
# debug_upsampling(training_dataset, training_loader)


Starting Calibration (use verbose=True for more details)

Previous calibration found:
Check batch limit dictionary
[91m"potentials_1.500_0.030_6": ?[0m
Check neighbors limit dictionary
[91m"0.030_0.075": ?[0m
[91m"0.060_0.150": ?[0m
[91m"0.120_0.720": ?[0m
[91m"0.240_1.440": ?[0m
[91m"0.480_2.880": ?[0m
Before range10
Begin iter o range10. Before enumerate(dataloader)
neighb_mat.numpy()) [[    0     3  5028 ... 23340 23340 23340]
 [    1     4    32 ... 23340 23340 23340]
 [    2     3    51 ... 23340 23340 23340]
 ...
 [23337 23338 23307 ... 23340 23340 23340]
 [23338 23299 23337 ... 23340 23340 23340]
 [23339 23336 23305 ... 23340 23340 23340]]
neighb_mat.shape[0]) 23340
neighb_mat.numpy()) [[   0 5349    8 ... 6875 6875 6875]
 [   1 4176  104 ... 6875 6875 6875]
 [   2 6367 6373 ... 6875 6875 6875]
 ...
 [6872 2402 3974 ... 6875 6875 6875]
 [6873 5837 3829 ... 6875 6875 6875]
 [6874 2856 6541 ... 6875 6875 6875]]
neighb_mat.shape[0]) 6875
neighb_mat.numpy()) [[   0 1067

In [23]:
print('\nModel Preparation')
print('*****************')

# Define network model
t1 = time.time()
net = KPFCNN(config, training_dataset.label_values, training_dataset.ignored_labels)

# debug = False
# if debug:
#     print('\n*************************************\n')
#     print(net)
#     print('\n*************************************\n')
#     for param in net.parameters():
#         if param.requires_grad:
#             print(param.shape)
#     print('\n*************************************\n')
#     print("Model size %i" % sum(param.numel() for param in net.parameters() if param.requires_grad))
#     print('\n*************************************\n')


Model Preparation
*****************
encoder_blocks is calculated as ModuleList(
  (0): SimpleBlock(
    (KPConv): KPConv(radius: 0.07, in_feat: 5, out_feat: 64)
    (batch_norm): BatchNormBlock(in_feat: 64, momentum: 0.020, only_bias: False)
    (leaky_relu): LeakyReLU(negative_slope=0.1)
  )
  (1): ResnetBottleneckBlock(
    (unary1): UnaryBlock(in_feat: 64, out_feat: 32, BN: True, ReLU: True)
    (KPConv): KPConv(radius: 0.07, in_feat: 32, out_feat: 32)
    (batch_norm_conv): BatchNormBlock(in_feat: 32, momentum: 0.020, only_bias: False)
    (unary2): UnaryBlock(in_feat: 32, out_feat: 128, BN: True, ReLU: False)
    (unary_shortcut): UnaryBlock(in_feat: 64, out_feat: 128, BN: True, ReLU: False)
    (leaky_relu): LeakyReLU(negative_slope=0.1)
  )
  (2): ResnetBottleneckBlock(
    (unary1): UnaryBlock(in_feat: 128, out_feat: 32, BN: True, ReLU: True)
    (KPConv): KPConv(radius: 0.07, in_feat: 32, out_feat: 32)
    (batch_norm_conv): BatchNormBlock(in_feat: 32, momentum: 0.020, only_b

In [24]:
# Define a trainer class
trainer = ModelTrainer(net, config, chkp_path=chosen_chkp)
print('Done in {:.1f}s\n'.format(time.time() - t1))

Done in 0.2s



In [25]:
print('\nStart training')
print('**************')

# Training
trainer.train(net, training_loader, test_loader, config)


Start training
**************
repulsive_loss tensor(0.0835, device='cuda:0', grad_fn=<AddBackward0>) fitting_loss tensor(0.7129, device='cuda:0', grad_fn=<AddBackward0>)
repulsive_loss tensor(0.1573, device='cuda:0', grad_fn=<AddBackward0>) fitting_loss tensor(1.4125, device='cuda:0', grad_fn=<AddBackward0>)
repulsive_loss tensor(0.2384, device='cuda:0', grad_fn=<AddBackward0>) fitting_loss tensor(2.1318, device='cuda:0', grad_fn=<AddBackward0>)
repulsive_loss tensor(0.3288, device='cuda:0', grad_fn=<AddBackward0>) fitting_loss tensor(2.8422, device='cuda:0', grad_fn=<AddBackward0>)
repulsive_loss tensor(0.4104, device='cuda:0', grad_fn=<AddBackward0>) fitting_loss tensor(3.5504, device='cuda:0', grad_fn=<AddBackward0>)
repulsive_loss tensor(0.4871, device='cuda:0', grad_fn=<AddBackward0>) fitting_loss tensor(4.2970, device='cuda:0', grad_fn=<AddBackward0>)
repulsive_loss tensor(0.5626, device='cuda:0', grad_fn=<AddBackward0>) fitting_loss tensor(5.0573, device='cuda:0', grad_fn=<AddB

KeyboardInterrupt: 

In [None]:
#print('Forcing exit now')
#os.kill(os.getpid(), signal.SIGINT)