In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

In [2]:
import os
from pathlib import Path

#import cv2

import pandas as pd
import numpy as np

#from sklearn.model_selection import train_test_split
#from pdb import set_trace as st

#import pretrainedmodels as pm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms

#from albumentations import Compose, JpegCompression, CLAHE, RandomRotate90, Transpose, ShiftScaleRotate, \
#        Blur, OpticalDistortion, GridDistortion, HueSaturationValue, Flip, VerticalFlip

from kekas import Keker, DataOwner, DataKek
from kekas.transformations import Transformer, to_torch, normalize
from kekas.metrics import accuracy
from kekas.modules import Flatten, AdaptiveConcatPool2d
from kekas.callbacks import Callback, Callbacks, DebuggerCallback



In [3]:
import fastai_sparse # 3D 

In [4]:
import sparseconvnet as scn

from fastai_sparse import utils, visualize
from fastai_sparse.utils import log
#from fastai_sparse.data import DataSourceConfig, MeshesDataset, SparseDataBunch
#from fastai_sparse.learner import SparseModelConfig, Learner
#from fastai_sparse.callbacks import TimeLogger, SaveModelCallback, CSVLogger
from fastai_sparse.transforms import Transform, Compose

from data import merge_fn

# Experiment environment and system metrics

In [5]:
#import neptune
#from neptune_callbacks import NeptuneMonitor

In [6]:
params={'n_epoch': 384,
        'max_lr': 2.0,
        'wd':0.0001
        }
params

{'n_epoch': 384, 'max_lr': 2.0, 'wd': 0.0001}

In [7]:
#with open('NEPTUNE_API_TOKEN.txt','r') as f:
#    NEPTUNE_API_TOKEN = f.readline().splitlines()[0]
#    
#neptune.init(api_token=NEPTUNE_API_TOKEN,
#             project_qualified_name='roma-goodok/fastai-sparse-scannet')
#
## create experiment in the project defined above
#exp = neptune.create_experiment(params=params)
#print(exp.id)
#exp.append_tag('study')
#exp.append_tag('kekas')
#exp.append_tag('unet24')
#exp.append_tag('1cycle')

In [8]:
try:
    experiment_name = exp.id
except Exception as e:
    experiment_name = "kekas_04"

print("Experiment:", experiment_name)
logdir = os.path.join('logdir', experiment_name)
Path(logdir).mkdir(parents=True, exist_ok=True)
print("Logdir:", logdir)

Experiment: kekas_04
Logdir: logdir/kekas_04


In [9]:
utils.watermark(pandas=True, kekas=True)

virtualenv:     (aseg_torch1) 
python:         3.6.8
nvidia driver:  b'384.130'
nvidia cuda:    9.0, V9.0.176
cudnn:          7.1.4
torch:          1.0.0
pandas:         0.24.2
kekas:          0.1.17
fastai:         1.0.48
fastai_sparse:  0.0.4.dev0


In [10]:
!git log1 -n3

[33m3cfbe93[m mIoU callbacks implemented (reproducing of FSCSCN-23 started)
[33m5502368[m mIoU callbacks implemented (draft)
[33md5c7ab0[m kekas: train to reproduce FASSCN-23 :completed


In [11]:
!nvidia-smi

Thu Apr 25 00:11:47 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.130                Driver Version: 384.130                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:28:00.0  On |                  N/A |
| 29%   57C    P0    69W / 250W |    722MiB / 11163MiB |      1%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  Off  | 00000000:29:00.0 Off |                  N/A |
|  0%   50C    P2    54W / 250W |     11MiB / 11172MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                            

In [12]:
!lscpu | grep "Model"

Model name:            AMD Ryzen 7 1700 Eight-Core Processor


## Jupyter notebook display options

In [13]:
utils.wide_notebook()
# uncomment this lines if you want switch off interactive and save visaulisation as screenshoots:
# For rendering run command in terminal:    `chromium-browser --remote-debugging-port=9222`
if False:
    visualize.options.interactive = False
    visualize.options.save_images = True
    visualize.options.verbose = True
    visualize.options.filename_pattern_image = Path('images', experiment_name, 'fig_{fig_number}')
else:
    for key, o in visualize.options.__dataclass_fields__.items():
        setattr(visualize.options, key, o.default)

# Dataset creation

## Downloading

see how download and preprocess data by the following link from fastai_sparse library: https://github.com/goodok/fastai_sparse/tree/master/examples/scannet/data

In [14]:
SOURCE_DIR = Path('data', 'scannet_merged_ply')
assert SOURCE_DIR.exists(), "Run prepare_data.ipynb"

definition_of_spliting_dir = Path('data', 'ScanNet_Tasks_Benchmark')
assert definition_of_spliting_dir.exists()


os.listdir(SOURCE_DIR / 'scene0000_01')

['scene0000_01.merged.ply']

In [15]:
def find_files(path, ext='merged.ply'):
    pattern = str(path / '*' / ('*' + ext))
    fnames = glob.glob(pattern)
    return fnames

def get_df_list(verbose=0):
    # train /valid / test splits
    fn_lists = {}

    fn_lists['train'] = definition_of_spliting_dir / 'scannetv1_train.txt'
    fn_lists['valid'] = definition_of_spliting_dir / 'scannetv1_val.txt'
    fn_lists['test'] = definition_of_spliting_dir / 'scannetv1_test.txt'

    for datatype in ['train', 'valid', 'test']:
        assert fn_lists[datatype].exists(), datatype

    dfs = {}
    total = 0
    for datatype in ['train', 'valid', 'test']:
        df = pd.read_csv(fn_lists[datatype], header=None, names=['example_id'])
        df = df.assign(datatype=datatype)
        df = df.assign(subdir=df.example_id)
        df = df.sort_values('example_id')
        dfs[datatype] = df

        if verbose:
            print(f"{datatype:5} counts: {len(df):>4}")
        
        total += len(df)
    if verbose:
        print(f"total:     {total}")
    return dfs

In [16]:
df_list = get_df_list(verbose=1)

train counts: 1045
valid counts:  156
test  counts:  312
total:     1513


In [17]:
df_list['train'].head()

Unnamed: 0,example_id,datatype,subdir
827,scene0000_00,train,scene0000_00
828,scene0000_01,train,scene0000_01
829,scene0000_02,train,scene0000_02
496,scene0001_00,train,scene0001_00
497,scene0001_01,train,scene0001_01


In [18]:
os.listdir(os.path.join(SOURCE_DIR, 'scene0000_00'))

['scene0000_00.merged.ply']

# Augmentation

In [19]:
from fastai_sparse.data_items  import MeshItem, PointsItem
from fastai_sparse.learner import SparseModelConfig
import transforms as T

In [20]:
# at first we need to create a reader function that will define how image will be opened
def reader_fn(i, row):
    fn = SOURCE_DIR / row['subdir'] / f'{row["example_id"]}.merged.ply'
    m = MeshItem.from_file(fn, label_field='label')
    return m

In [21]:
m = reader_fn(0, df_list['train'].iloc[0])
m.describe()

MeshItem (scene0000_00.merged.ply)
vertices:                shape: (81369, 3)            dtype: float64        min:   -0.01657,  max:    8.74040,  mean:    3.19051
faces:                   shape: (153587, 3)           dtype: int64          min:          0,  max:      81368,  mean: 40549.68796
colors:                  shape: (81369, 4)            dtype: uint8          min:    1.00000,  max:  255.00000,  mean:  145.80430
labels:                  shape: (81369,)              dtype: uint16         min:    0.00000,  max:  230.00000,  mean:   12.97057
Colors from vertices
Labels from vertices


In [22]:
# Map relevant classes to {0,1,...,19}, and ignored classes to -100
remapper = np.ones(3000, dtype=np.int32) * (-100)
for i, x in enumerate([1,2,3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]):
    remapper[x] = i

In [23]:
PRE_TFMS = [T.to_points_cloud(method='vertices', normals=False), 
            T.remap_labels(remapper=remapper, inplace=False),
            T.colors_normalize(),
            T.normalize_spatial(),
           ]

_scale = 20

AUGS_TRAIN = [
    T.noise_affine(amplitude=0.1),
    T.flip_x(p=0.5),
    T.scale(scale=_scale),
    T.rotate_XY(),
    
    T.elastic(gran=6 * _scale // 50, mag=40 * _scale / 50),
    T.elastic(gran=20 * _scale // 50, mag=160 * _scale / 50),
    
    T.specific_translate(full_scale=4096),
    T.crop_points(low=0, high=4096),
    T.colors_noise(amplitude=0.1),
]

AUGS_VALID = [
    T.noise_affine(amplitude=0.1),
    T.flip_x(p=0.5),
    T.scale(scale=_scale),
    T.rotate_XY(),

    T.translate(offset=4096 / 2),
    T.rand_translate(offset=(-2, 2, 3)),  # low, high, dimention
    
    T.specific_translate(full_scale=4096),
    T.crop_points(low=0, high=4096),
    T.colors_noise(amplitude=0.1),
        
    ]

SPARSE_TFMS = [
    T.merge_features(ones=False, colors=True, normals=False),
    T.to_sparse_voxels(),
]


# reimplement to_torch
def _to_torch(x):
    x.coords 
    x.features
    x.labels
    
    return x

# to_torch = Transform(_to_torch)



In [24]:
from data import merge_fn
merge_fn

functools.partial(<function SparseDataBunch.merge_fn at 0x7f819b1fe400>, keys_lists=['id', 'labels_raw', 'filtred_mask', 'random_seed', 'num_points'], separate_labels=False)

In [25]:
def get_transforms(dataset_key):
        
    return  Compose(PRE_TFMS + AUGS_TRAIN + SPARSE_TFMS), Compose(PRE_TFMS + AUGS_VALID + SPARSE_TFMS)

## DataKeks creation

In [26]:
train_df = df_list['train']#.head(96)
val_df = df_list['valid']#.head(96)

# now let's create DataKeks
train_tfms, val_tfms = get_transforms("mesh")

train_dk = DataKek(df=train_df, reader_fn=reader_fn, transforms=train_tfms)
val_dk = DataKek(df=val_df, reader_fn=reader_fn, transforms=val_tfms)

In [27]:
b = train_dk[0]
b.describe()
b.show()

id: scene0000_00.merged
coords                   shape: (81369, 3)            dtype: int64          min:        693,  max:       2814,  mean: 1834.13710
features                 shape: (81369, 3)            dtype: float32        min:   -1.05092,  max:    1.06730,  mean:   -0.15182
x                        shape: (81369,)              dtype: int64          min:       2636,  max:       2814,  mean: 2722.57990
y                        shape: (81369,)              dtype: int64          min:        693,  max:        867,  mean:  776.14952
z                        shape: (81369,)              dtype: int64          min:       1966,  max:       2049,  mean: 2003.68189
labels                   shape: (81369,)              dtype: int64          min:       -100,  max:         17,  mean:  -48.51506
voxels: 49725
points / voxels: 1.6363800904977375


VBox(children=(Figure(camera=PerspectiveCamera(fov=46.0, position=(0.0, 0.0, 2.0), quaternion=(0.0, 0.0, 0.0, …

## DataLoaders

In [28]:
# and DataLoaders
#batch_size = 32
#workers = 

train_dl = DataLoader(train_dk, batch_size=32, num_workers=8, shuffle=True, drop_last=True, collate_fn=merge_fn, pin_memory=False)
val_dl = DataLoader(val_dk, batch_size=2, num_workers=2, shuffle=False, collate_fn=merge_fn, pin_memory=False)

#train_dl = DataLoader(train_dk, batch_size=12, num_workers=8, shuffle=True, drop_last=True, collate_fn=merge_fn, pin_memory=False)
#val_dl = DataLoader(val_dk, batch_size=2, num_workers=2, shuffle=False, collate_fn=merge_fn, pin_memory=False)

In [29]:
train_dl.pin_memory

False

In [30]:
val_dl.pin_memory

False

In [28]:
for i, batch in enumerate(train_dl):
    break

In [29]:
type(batch)

dict

In [30]:
batch

{'coords': tensor([[3103, 1525, 1590,    0],
         [3103, 1525, 1591,    0],
         [3103, 1526, 1591,    0],
         ...,
         [  44, 3057, 3422,   31],
         [  44, 3057, 3422,   31],
         [  44, 3057, 3422,   31]]),
 'features': tensor([[-0.0393, -0.1166, -0.2235],
         [-0.0549, -0.1323, -0.2549],
         [-0.0706, -0.1479, -0.3020],
         ...,
         [-0.4921, -0.5751, -0.4440],
         [-0.4999, -0.5751, -0.4597],
         [-0.2411, -0.3242, -0.1930]]),
 'labels': tensor([0, 0, 0,  ..., 0, 0, 0]),
 'id': ['scene0470_00.merged',
  'scene0662_00.merged',
  'scene0235_00.merged',
  'scene0654_01.merged',
  'scene0576_02.merged',
  'scene0368_00.merged',
  'scene0360_00.merged',
  'scene0099_01.merged',
  'scene0254_00.merged',
  'scene0031_01.merged',
  'scene0511_01.merged',
  'scene0150_01.merged',
  'scene0121_02.merged',
  'scene0501_01.merged',
  'scene0128_00.merged',
  'scene0369_02.merged',
  'scene0649_00.merged',
  'scene0569_00.merged',
  'scen

In [31]:
len(batch['coords']), sum(batch['num_points'])

(4186851, 4186851)

# Model

In [31]:
# spatial_size  is full_scale
model_config = SparseModelConfig(spatial_size=4096, num_classes=20, num_input_features=3, mode=4,
                                 m=16, num_planes_coeffs=[1, 2, 3, 4, 5, 6, 7])
model_config

SparseModelConfig;
   spatial_size: 4096
   dimension: 3
   block_reps: 1
   m: 16
   num_planes: [16, 32, 48, 64, 80, 96, 112]
   residual_blocks: False
   num_classes: 20
   num_input_features: 3
   mode: 4
   downsample: [2, 2]
   bias: False

In [32]:
class Model(nn.Module):
    def __init__(self, cfg):
        C = cfg
        nn.Module.__init__(self)
        self.sparseModel = scn.Sequential(
            scn.InputLayer(C.dimension, C.spatial_size, mode=C.mode),
            scn.SubmanifoldConvolution(C.dimension, nIn=C.num_input_features, nOut=C.m, filter_size=3, bias=C.bias),
            scn.UNet(C.dimension, C.block_reps, C.num_planes, residual_blocks=C.residual_blocks, downsample=C.downsample),
            scn.BatchNormReLU(C.m),
            scn.OutputLayer(C.dimension),
        )
        self.linear = nn.Linear(C.m, C.num_classes)

    def forward(self, xb):
        x = [xb['coords'], xb['features']]
        x = self.sparseModel(x)
        x = self.linear(x)
        return x

model = Model(model_config)

## Initialization

In [33]:
# the three whales of your pipelane are: the data, the model and the loss (hi, Jeremy)

# the data is represented in Kekas by DataOwner. It is a namedtuple with three fields:
# 'train_dl', 'val_dl', 'test_dl'
# For training process we will need at least two of them, and we can skip 'test_dl' for now
# so we will initialize it with `None` value.
dataowner = DataOwner(train_dl, val_dl, None)

# model is just a pytorch nn.Module, that we created vefore
#model = Net(num_classes=2)

# loss or criterion is also a pytorch nn.Module. For multiloss scenarios it can be a list of nn.Modules
# for our simple example let's use the standart cross entopy criterion
criterion = nn.CrossEntropyLoss()

In [34]:
# Also we need to specify, what model will do with each batch of data on each iteration
# We should define a `step_fn` function
# The code below repeats a `keker.default_step_fn` code to provide you with a concept of step function

def step_fn(model: torch.nn.Module,
            batch: torch.Tensor) -> torch.Tensor:
    """Determine what your model will do with your data.

    Args:
        model: the pytorch module to pass input in
        batch: the batch of data from the DataLoader

    Returns:
        The models forward pass results
    """
    
    # you could define here whatever logic you want
    inp = batch  # here we get an "image" from our dataset
    return model(inp)

In [35]:
import sys
import warnings
from fastai.torch_core import add_metrics
from fastai_sparse.metrics import confusion_matrix, iou_per_class_from_cm

from collections import defaultdict
from kekas.utils import get_opt_lr, get_pbar, DotDict, to_numpy


class IouMeanFiltred(Callback):
    """
    Calc IoU by classes, filter incorrect classes (-100), then mean.
    """

    _order = -19  # Needs to run before the recorder

    def __init__(self, target_key: str, preds_key: str, n_classes, name='iouf', epsilon=sys.float_info.epsilon, **kwargs):
        self.target_key = target_key
        self.preds_key = preds_key
        self.n_classes = n_classes
        self.epsilon = epsilon
        self.name = name
        
        # for kekas
        self.pbar_metrics = None

        #super().__init__(learn, **kwargs)

    #def append_metrics_names(self, names):
    #    recorder = self.learn.recorder
    #    if not hasattr(recorder, '_added_met_names'):
    #        recorder._added_met_names = []
    #    recorder._added_met_names += names

    #def on_train_begin(self, **kwargs):
    #    #self.append_metrics_names(self.names)
        

    #def on_epoch_begin(self, **kwargs):
    def on_epoch_begin(self, epoch: int, epochs: int, state: DotDict) -> None:
        
        #print("state.core.mode:", state.core.mode)
        self._d = {}

        d = {}
        if state.core.mode == 'train':
            self._d['train'] = d
        elif state.core.mode == 'val':
            self._d['valid'] = d

        if state.core.mode in ['train', 'val']:
            d['runned'] = False
            d['cm'] = np.zeros(shape=(self.n_classes, self.n_classes), dtype=np.uint64)
            
        # for kekas
        self.pbar_metrics = defaultdict(float)

    #def on_batch_end(self, last_output, last_target, last_input, train, **kwargs):
    def on_batch_end(self, i: int, state: DotDict) -> None:
        
        train = (state.core.mode == 'train')
        
        #print("state.core.mode:", state.core.mode)
        #print("state.core.do_log:", state.core.do_log)

        last_target = state.core.batch[self.target_key]
        last_input = state.core.batch
        last_output = state.core.out[self.preds_key]

        if train:
            d = self._d['train']
        else:
            d = self._d['valid']

        #predictions = last_output.detach().cpu().numpy()
        predictions = last_output
       
        
        xb = last_input

        num_points_actual_cumsum = np.cumsum([0] + xb['num_points'])
        
        cm_batch = np.zeros(shape=(self.n_classes, self.n_classes), dtype=np.uint64)

        # for each example in the batch extract prediction, argmax, fill omitted by 0-label class (bug), and store
        for k in range(len(xb['id'])):
            # actual number of points
            # num_points = xb['num_points'][k]     # equal len(y)

            labels_raw = xb['labels_raw'][k]
            filtred_mask = xb['filtred_mask'][k]
            num_points_raw = len(labels_raw)

            # extract prediction of example
            start = num_points_actual_cumsum[k]
            end = num_points_actual_cumsum[k + 1]
            example_preds_actual = predictions[start:end]

            # Use argmax now
            # form target prediction
            example_y_pred = np.ones(shape=(num_points_raw), dtype=np.int32) * (self.n_classes - 1)

            # fill preds for the points that net outputs, eg 800, than 200 will be remains with zeros
            example_y_pred[filtred_mask] = example_preds_actual.argmax(1)

            # filter
            indexer = labels_raw >= 0

            # accumulate cm of example
            y_pred = example_y_pred[indexer]
            y_true = labels_raw[indexer]
            if len(y_pred) == 0:
                warnings.warn(f"Wrong example is found: all `labels_raw` < 0. Id={xb['id'][k]}")
            else:
                cm = confusion_matrix(y_pred, y_true, self.n_classes)
                #d['cm'] += cm
                cm_batch += cm
                d['runned'] = True
                
        d['cm'] += cm_batch
        #_iou_per_class = iou_per_class_from_cm(cm_batch)
        #_iou = np.mean(_iou_per_class)

        #print(state.core.mode, _iou, len(xb['id']))
                              
        #if state.core.mode == "val":            
        #    self.pbar_metrics[self.name] = _iou

        #if state.core.mode != "test" and state.core.do_log:
        ## state.core.metrics[state.core.mode]["loss"] = float(to_numpy(state.core.loss))
        #    state.core.metrics[state.core.mode][self.name] = _iou
      

    #def on_epoch_end(self, last_metrics, **kwargs):
    def on_epoch_end(self, epoch: int, state: DotDict) -> None:

        #print("on_epoch_end: state.core.mode:", state.core.mode)
                              
        d = None
        if state.core.mode == 'train':
            d = self._d['train']
        elif state.core.mode == 'val':
            d = self._d['valid']

        if d is not None:
            if d['runned']:
                cm = d['cm']
                d['iou_per_class'] = iou_per_class_from_cm(cm)
                d['iou'] = np.mean(d['iou_per_class'])
            else:
                d['cm'] = None
                d['iou_per_class'] = None
                d['iou'] = 0
                                      
            self.pbar_metrics[state.core.mode + "_" + self.name] = d['iou']

            if state.core.epoch_metrics is None:
                  state.core.epoch_metrics = self.pbar_metrics.copy()
            else:
                state.core.epoch_metrics.update(self.pbar_metrics)
            #print(state.core.epoch_metrics)

        
                              
                              


In [36]:
keker_callbacks = []
keker_callbacks.append(IouMeanFiltred(target_key='labels', preds_key='preds', n_classes=model_config.num_classes))

In [37]:
# previous preparations was mostly out of scope of Kekas library (except DataKeks creation)
# Now let's dive into kekas a little bit

# firstly, we create a Keker - the core Kekas class, that provides all the keks for your pipeline
keker = Keker(model=model,
              dataowner=dataowner,
              criterion=criterion,
              step_fn=step_fn,                    # previosly defined step function
              target_key="labels",                # remember, we defined it in the reader_fn for DataKek?              
              opt=torch.optim.SGD,                # optimizer class. if note specifiyng, 
                                                  # an SGD is using by default
              opt_params={"weight_decay": params['wd']},  # optimizer kwargs in dict format (optional too)
              callbacks=keker_callbacks
             )

# Actually, there are a lot of params for kekers, but this out of scope of this example
# you can read about them in Keker's docstring (but who really reads the docs, huh?)

In [38]:
# before the start of the finetuning procedure let's freeeze all the layers except the last one - the head
# the `freeze` method is mostly inspired (or stolen) from fastai
# but you should define a model's attribute to deal with
# for example, our model is actually model.net, so we need to specify the 'net' attr
# also this method does not freezes batchnorm layers by default. To change this set `freeze_bn=True`

#keker.freeze(model_attr="net")

In [39]:
len(dataowner.train_dl), len(dataowner.val_dl)

(32, 78)

# Stage 1

## Learning Rate finding

In [40]:
# let's find an 'optimal' learning rate with learning rate find procedure
# for details please see the fastai course and this articles:
# https://arxiv.org/abs/1803.09820
# https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html

# NOTE: this is an optional step and you can skip it and use your favorite learning rate

# you MUST specify the logdir to see graphics
# keker will write a tensorboard logs into this folder
# to see them start a tensorboard with `--logdir /path/to/logdir`
# OR you can use keker.plot_kek_lr method (see cell below)
keker.kek_lr(final_lr=0.1, logdir="logdir_stage01_lr")

Epoch 1/1:  47% 15/32 [02:00<01:25,  5.05s/it, loss=3.0701]


Wrong example is found: all `labels_raw` < 0. Id=scene0509_00.merged



Epoch 1/1: 100% 32/32 [03:08<00:00,  3.78s/it, loss=3.0091]
End of LRFinder



In [41]:
keker.plot_kek_lr(logdir="logdir_stage01_lr")

## Fit

In [42]:
# Ok, now let's start training!
# It's as simple as:
keker.kek_one_cycle(cycle_len=params['n_epoch'], 
                    max_lr=params['max_lr'],  
                    div_factor=1000, increase_fraction=0.1, 
                    logdir="logdir_stage01")

Epoch 1/384:  47% 15/32 [02:00<01:29,  5.26s/it, loss=2.7011]


Wrong example is found: all `labels_raw` < 0. Id=scene0509_00.merged



Epoch 1/384: 100% 32/32 [03:33<00:00,  3.84s/it, loss=1.9305, train_iouf=0.0378, val_iouf=0.0549, val_loss=1.5740]
Epoch 2/384: 100% 32/32 [03:33<00:00,  4.17s/it, loss=1.4237, train_iouf=0.0589, val_iouf=0.0696, val_loss=1.2392]
Epoch 3/384: 100% 32/32 [03:33<00:00,  4.74s/it, loss=1.1830, train_iouf=0.0679, val_iouf=0.0738, val_loss=1.0428]
Epoch 4/384: 100% 32/32 [03:32<00:00,  4.06s/it, loss=1.1050, train_iouf=0.0739, val_iouf=0.0898, val_loss=0.9756]
Epoch 5/384: 100% 32/32 [03:31<00:00,  3.98s/it, loss=1.0154, train_iouf=0.0867, val_iouf=0.0914, val_loss=0.9202]
Epoch 6/384: 100% 32/32 [03:33<00:00,  4.04s/it, loss=0.9861, train_iouf=0.0906, val_iouf=0.0931, val_loss=0.8880]
Epoch 7/384: 100% 32/32 [03:29<00:00,  3.75s/it, loss=0.9415, train_iouf=0.0928, val_iouf=0.1004, val_loss=0.8781]
Epoch 8/384: 100% 32/32 [03:32<00:00,  4.08s/it, loss=0.9564, train_iouf=0.0945, val_iouf=0.1011, val_loss=0.8281]
Epoch 9/384: 100% 32/32 [03:33<00:00,  3.85s/it, loss=0.8711, train_iouf=0.0962,

Epoch 71/384: 100% 32/32 [03:32<00:00,  4.04s/it, loss=0.6560, train_iouf=0.2187, val_iouf=0.1941, val_loss=0.6241]
Epoch 72/384: 100% 32/32 [03:30<00:00,  4.11s/it, loss=0.6779, train_iouf=0.2180, val_iouf=0.2260, val_loss=0.6335]
Epoch 73/384: 100% 32/32 [03:31<00:00,  3.74s/it, loss=0.6927, train_iouf=0.2199, val_iouf=0.2414, val_loss=0.6007]
Epoch 74/384: 100% 32/32 [03:33<00:00,  4.20s/it, loss=0.6508, train_iouf=0.2311, val_iouf=0.2512, val_loss=0.5650]
Epoch 75/384: 100% 32/32 [03:32<00:00,  3.93s/it, loss=0.6608, train_iouf=0.2273, val_iouf=0.2139, val_loss=0.7668]
Epoch 76/384: 100% 32/32 [03:33<00:00,  4.03s/it, loss=0.6701, train_iouf=0.2215, val_iouf=0.2126, val_loss=0.6831]
Epoch 77/384: 100% 32/32 [03:33<00:00,  4.02s/it, loss=0.6881, train_iouf=0.2299, val_iouf=0.2131, val_loss=0.6745]
Epoch 78/384: 100% 32/32 [03:33<00:00,  4.01s/it, loss=0.6684, train_iouf=0.2217, val_iouf=0.1994, val_loss=0.6321]
Epoch 79/384: 100% 32/32 [03:34<00:00,  3.91s/it, loss=0.6450, train_iou

Epoch 141/384: 100% 32/32 [03:34<00:00,  4.02s/it, loss=0.6164, train_iouf=0.2669, val_iouf=0.2671, val_loss=0.5930]
Epoch 142/384: 100% 32/32 [03:34<00:00,  4.01s/it, loss=0.6160, train_iouf=0.2702, val_iouf=0.2517, val_loss=0.6389]
Epoch 143/384: 100% 32/32 [03:35<00:00,  3.84s/it, loss=0.6004, train_iouf=0.2707, val_iouf=0.2888, val_loss=0.5749]
Epoch 144/384: 100% 32/32 [03:30<00:00,  4.16s/it, loss=0.6057, train_iouf=0.2707, val_iouf=0.2828, val_loss=0.5978]
Epoch 145/384: 100% 32/32 [03:33<00:00,  4.03s/it, loss=0.6124, train_iouf=0.2636, val_iouf=0.2450, val_loss=0.5935]
Epoch 146/384: 100% 32/32 [03:33<00:00,  3.83s/it, loss=0.6167, train_iouf=0.2646, val_iouf=0.2760, val_loss=0.6096]
Epoch 147/384: 100% 32/32 [03:32<00:00,  3.92s/it, loss=0.6256, train_iouf=0.2608, val_iouf=0.2301, val_loss=0.7829]
Epoch 148/384: 100% 32/32 [03:34<00:00,  3.86s/it, loss=0.6146, train_iouf=0.2744, val_iouf=0.2829, val_loss=0.5371]
Epoch 149/384: 100% 32/32 [03:33<00:00,  3.79s/it, loss=0.6100, 

Epoch 211/384: 100% 32/32 [03:33<00:00,  3.80s/it, loss=0.5628, train_iouf=0.2985, val_iouf=0.2517, val_loss=0.5532]
Epoch 212/384: 100% 32/32 [03:31<00:00,  3.72s/it, loss=0.5867, train_iouf=0.2911, val_iouf=0.2934, val_loss=0.5425]
Epoch 213/384: 100% 32/32 [03:31<00:00,  3.94s/it, loss=0.5977, train_iouf=0.2895, val_iouf=0.2565, val_loss=0.6089]
Epoch 214/384: 100% 32/32 [03:35<00:00,  3.77s/it, loss=0.5973, train_iouf=0.2793, val_iouf=0.2935, val_loss=0.5231]
Epoch 215/384: 100% 32/32 [03:32<00:00,  4.34s/it, loss=0.5828, train_iouf=0.2890, val_iouf=0.2928, val_loss=0.5780]
Epoch 216/384: 100% 32/32 [03:32<00:00,  4.14s/it, loss=0.5806, train_iouf=0.2819, val_iouf=0.2506, val_loss=0.6104]
Epoch 217/384: 100% 32/32 [03:30<00:00,  4.14s/it, loss=0.5922, train_iouf=0.2807, val_iouf=0.3079, val_loss=0.5370]
Epoch 218/384: 100% 32/32 [03:33<00:00,  3.83s/it, loss=0.5655, train_iouf=0.3029, val_iouf=0.3003, val_loss=0.5607]
Epoch 219/384: 100% 32/32 [03:32<00:00,  4.18s/it, loss=0.5887, 

Epoch 281/384: 100% 32/32 [03:34<00:00,  3.97s/it, loss=0.5465, train_iouf=0.3170, val_iouf=0.3132, val_loss=0.5004]
Epoch 282/384: 100% 32/32 [03:33<00:00,  4.12s/it, loss=0.5582, train_iouf=0.3159, val_iouf=0.3251, val_loss=0.5567]
Epoch 283/384: 100% 32/32 [03:30<00:00,  3.91s/it, loss=0.5439, train_iouf=0.3213, val_iouf=0.3098, val_loss=0.4943]
Epoch 284/384: 100% 32/32 [03:33<00:00,  4.00s/it, loss=0.5146, train_iouf=0.3229, val_iouf=0.3062, val_loss=0.5160]
Epoch 285/384: 100% 32/32 [03:34<00:00,  3.96s/it, loss=0.5530, train_iouf=0.3144, val_iouf=0.3206, val_loss=0.5515]
Epoch 286/384: 100% 32/32 [03:34<00:00,  3.92s/it, loss=0.5333, train_iouf=0.3172, val_iouf=0.3134, val_loss=0.5342]
Epoch 287/384: 100% 32/32 [03:32<00:00,  4.16s/it, loss=0.5320, train_iouf=0.3233, val_iouf=0.3119, val_loss=0.5089]
Epoch 288/384: 100% 32/32 [03:32<00:00,  4.17s/it, loss=0.5351, train_iouf=0.3302, val_iouf=0.3288, val_loss=0.5224]
Epoch 289/384: 100% 32/32 [03:34<00:00,  3.95s/it, loss=0.5184, 

Epoch 351/384: 100% 32/32 [03:33<00:00,  4.07s/it, loss=0.4712, train_iouf=0.3784, val_iouf=0.4037, val_loss=0.4933]
Epoch 352/384: 100% 32/32 [03:33<00:00,  3.81s/it, loss=0.4748, train_iouf=0.3775, val_iouf=0.3830, val_loss=0.4790]
Epoch 353/384: 100% 32/32 [03:32<00:00,  3.90s/it, loss=0.4680, train_iouf=0.3863, val_iouf=0.3842, val_loss=0.4660]
Epoch 354/384: 100% 32/32 [03:31<00:00,  4.03s/it, loss=0.4519, train_iouf=0.3791, val_iouf=0.3697, val_loss=0.4882]
Epoch 355/384: 100% 32/32 [03:31<00:00,  4.00s/it, loss=0.4469, train_iouf=0.3888, val_iouf=0.3886, val_loss=0.4567]
Epoch 356/384: 100% 32/32 [03:32<00:00,  4.23s/it, loss=0.4545, train_iouf=0.3845, val_iouf=0.3704, val_loss=0.4603]
Epoch 357/384: 100% 32/32 [03:30<00:00,  3.86s/it, loss=0.4474, train_iouf=0.3920, val_iouf=0.3814, val_loss=0.4662]
Epoch 358/384: 100% 32/32 [03:33<00:00,  3.82s/it, loss=0.4495, train_iouf=0.3852, val_iouf=0.3654, val_loss=0.4863]
Epoch 359/384: 100% 32/32 [03:30<00:00,  3.72s/it, loss=0.4512, 

In [43]:
#keker.plot_kek(logdir=logdir, metrics=["val_iouf", "train_iouf"], step="epoch")

In [44]:
keker.plot_kek(logdir="logdir_stage01", step="batch")