In [2]:
from pathlib import Path
from types import SimpleNamespace
import tensorflow as tf
import wandb
from wandb.keras import WandbCallback

from data_loader import DataLoader
from utils.callbacks import TensorBoard
from utils.util import *
from utils.args_loader import load_model_config

2022-09-07 11:03:07.601654: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-07 11:03:07.601695: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
data_path = Path("/mnt/disks/KITTI/small/")
data_path.exists()

True

In [4]:
# tf.config.run_functions_eagerly(True)

In [5]:
arg = SimpleNamespace(model="squeezesegv2",
                      config="squeezesegv2kitti",
                      data_path=data_path,
                      train_dir="../output",
                      epochs=10)   

config, model = load_model_config(arg.model, arg.config)
# config["DATA_AUGMENTATION"]=False

2022-09-07 11:03:09.583889: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-07 11:03:09.584688: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-07 11:03:09.584768: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-09-07 11:03:09.584838: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-09-07 11:03:09.584928: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

In [6]:
train_dl = DataLoader("train", arg.data_path, config).write_tfrecord_dataset().read_tfrecord_dataset()
val_dl = DataLoader("val", arg.data_path, config).write_tfrecord_dataset().read_tfrecord_dataset()

TFRecord exists at /mnt/disks/KITTI/small/train.tfrecord. Skipping TFRecord writing.
TFRecord exists at /mnt/disks/KITTI/small/val.tfrecord. Skipping TFRecord writing.


In [7]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=config.LEARNING_RATE,
    decay_steps=config.LR_DECAY_STEPS,
    decay_rate=config.LR_DECAY_FACTOR,
    staircase=True)

## Let's log some data

We can use W&B to view the Dataset

In [30]:
def resize(img, factor=1):
    return tf.image.resize(img, [img.shape[0]*factor, img.shape[1]*factor]).numpy()

In [31]:
def log_input_data(lidar_input, label, class_color_map):
    "Log inputs to wandb"
    label = label[:, :]
    label_image = class_color_map[label.numpy().reshape(-1)].reshape([label.shape[0], label.shape[1], 3])
    depth_image = lidar_input.numpy()[:, :, [4]]
    intensity = lidar_input.numpy()[:, :, [3]]
    points = lidar_input.numpy()[:, :, :3]
    points_rgb = np.concatenate([points, (255*label_image).astype(int)], axis=-1).reshape(-1, 6)
    
    depth_image, label_image, intensity_image  = map(resize, [depth_image, label_image, intensity])
    
    # log 2 wandb
    wandb.log({'Images/Label Image': wandb.Image(label_image)})
    wandb.log({'Images/Depth Image': wandb.Image(depth_image)})
    wandb.log({'Images/Intensity Image': wandb.Image(intensity_image)})
    wandb.log({"Images/3D": wandb.Object3D({"type": "lidar/beta", "points":points_rgb})})

In [18]:
with wandb.init(project="small_kitti", entity="av-team", job_type="log_dataset"):
    (lidar_inputs, lidar_masks), labels, weights = val_dl.take(1).get_single_element() # a batch of 32 images
    for i, (lidar_input, label) in enumerate(zip(lidar_inputs, labels)):
        print(f"logging image: {i}")
        log_input_data(lidar_input, label, model.CLS_COLOR_MAP)

logging image: 0


VBox(children=(Label(value='0.066 MB of 0.374 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.176348…

In [48]:
def log_model_predictions(lidar_input, prediction, label, class_color_map):
    "Log pred image and points"
    
    pred_image = class_color_map[prediction.reshape(-1)].reshape([label.shape[0], label.shape[1], 3])
    points = lidar_input.numpy()[...,:3]
    points_preds_rgb = np.concatenate([points, (255*pred_image).astype(int)], axis=-1).reshape(-1, 6)
    
    pred_image = resize(pred_image)
    wandb.log({'Images/Prediction Image':wandb.Image(pred_image)})
    wandb.log({"Images/3D_preds": wandb.Object3D({"type": "lidar/beta", "points":points_preds_rgb})})

In [49]:
num_images = 5

(lidar_inputs, lidar_masks), labels, weights = val_dl.take(1).get_single_element() # a batch of 32 images

In [54]:
labels.shape

TensorShape([5, 64, 1024])

In [50]:
# select a fixed number of inputs
lidar_inputs = lidar_inputs[:num_images, :, :]
lidar_masks = lidar_masks[:num_images, :, :]
labels = labels[:num_images, :, :]
weights = weights[:num_images, :, :]

In [53]:
label.shape

TensorShape([64, 1024])

In [51]:
# forward pass

probabilities, predictions = model([lidar_inputs, lidar_masks])
predictions = predictions.numpy()
with wandb.init(project="small_kitti", entity="av-team", job_type="log_preds"):
    for i, (lidar_input, prediction, label) in enumerate(zip(lidar_inputs, predictions, labels)):
        log_input_data(lidar_input, label, model.CLS_COLOR_MAP)
        log_model_predictions(lidar_input, prediction, label, model.CLS_COLOR_MAP)

VBox(children=(Label(value='0.066 MB of 0.066 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

IndexError: tuple index out of range

In [12]:
import tensorflow as tf
import numpy


class LogSamplesCallback(WandbCallback):
    "A simple Keras callback to log model predictions"
    
    def __init__(self, dataset, **kwargs):
        super().__init__(**kwargs)
        self.dataset = dataset
        self.num_images = 5
    
    
    def on_epoch_end(self, epoch, logs=None):
        super().on_epoch_end(epoch, logs=logs)
        
        # get first batch of dataset
        (lidar_inputs, lidar_masks), labels, weights = self.dataset.take(1).get_single_element() # a batch of images
        
        num_images = min(self.num_images, lidar_inputs.shape[0])
        
        # select a fixed number of inputs
        lidar_inputs = lidar_inputs[:num_images, :, :], 
        lidar_masks = lidar_masks[:num_images, :, :], 
        labels = labels[:num_images, :, :], 
        weights = weights[:num_images, :, :]
        
        # forward pass
        probabilities, predictions = self.model([lidar_inputs, lidar_masks])
        predictions = predictions[:num_images, :, :].numpy()
        
        for i, (lidar_input, label) in enumerate(zip(lidar_inputs, labels)):
            log_input_data(lidar_input, label, model.CLS_COLOR_MAP)
            log_model_predictions(lidar_input, prediction, label, model.CLS_COLOR_MAP)

In [13]:
wandb.init(project="small_kitti", entity="capecape")

wandb_callback = WandbCallback(dataset=val_dl, save_model=True)
# tensorboard_callback = TensorBoard(arg.train_dir, val_dl, profile_batch=(200, 202))
# checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(os.path.join(arg.train_dir, "checkpoint"))

# cbs = [wandb_callback, tensorboard_callback, checkpoint_callback]
cbs = [wandb_callback]

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcapecape[0m. Use [1m`wandb login --relogin`[0m to force relogin




In [14]:
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, clipnorm=config.MAX_GRAD_NORM)

model.compile(optimizer=optimizer, weighted_metrics=[])

In [15]:
model.fit(train_dl,
        validation_data=val_dl,
        epochs=1,
        callbacks = cbs,
        )



     18/Unknown - 104s 5s/step - loss: 2.2799 - miou: 0.0774

KeyboardInterrupt: 

In [16]:
wandb.finish()

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…