# Open-Neural-APC as Tensorflow 2 implementation

In [1]:
# install and update required packages
!pip install --upgrade pip -q
# if you use tensorflow in a docker container:
#   comment 'tensorflow' from the 'requirements.txt' with a '#' otherwise cudnn lstm won't be available
!pip install --upgrade -r requirements.txt -q

# just for the video output/investigation (not necessary for training/testing)
!pip install --upgrade -r optionals.txt -q
# this is also optional, since it is needed for opencv
!apt-get -qq update && apt-get -qq install -y libsm6 libxext6 libxrender1 libfontconfig1

In [9]:
from utils import loadConfig, allow_growth
from tqdm import tqdm
# read the config file
# it includes more or less all hyperparameter used in the model and preprocessing/training step
data_parameter, model_parameter, training_parameter = loadConfig()
# Since I saved the data as "uint8" and the sensor is usually placed at a height of 2 meters
#    the resolution should be just below 1cm
#    but since the noise frames at the end of most sequences produce larger values, this might not be the case
# The "accuracy error niveau" is the absolutely permissible difference so that a prediction 
#    on the label is still counted as correct (remember: it's a regression task)
# The "jump input frames" parameter indicates which frames are used from the original sequence
# The original sequences are at about 40 FPS. The model is trained with just 10 FPS
# The "pretrain" parameter is not used so far but could be utilized with a pretraining of the input layer
# The "safe steps" parameter was used for the non-jupyter version to safe the model every 5 epochs

# switching between the gpus
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# disable annoying tf warnings (retracing etc.)
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
# enable GPU memory growth
allow_growth()

{
  "data parameter": {
    "data directory": "./data/",
    "input scaling factor": 255,
    "labels dtype": "uint32",
    "sequence dtype": "uint8",
    "training label": "train.csv",
    "validation label": "valid.csv"
  },
  "model parameter": {
    "input dimensions": [
      20,
      25
    ],
    "lstm depth": 5,
    "lstm width": 50,
    "output dimensions": 2
  },
  "training parameter": {
    "accuracy error niveau": 0.5,
    "aux scale": 3,
    "batch size": 16,
    "calculation dtype": "float16",
    "calculation epsilon": 1e-07,
    "dropout rate": 0.2,
    "epochs": 10000,
    "jump input frames": 4,
    "learning rate": 0.001,
    "maximum concatenation": 5,
    "minimum concatenation": 5,
    "optimizer clip parameter": [
      1.0,
      2.0
    ],
    "optimizer parameter": [
      0.9,
      0.999,
      1e-07
    ],
    "pretrain": false,
    "restrict dataset size": 1500,
    "safe steps": 5
  }
}


In [10]:
# enable effcient data processing
sequence_dtype = data_parameter["sequence dtype"]
labels_dtype = data_parameter["labels dtype"]
calculation_dtype = training_parameter["calculation dtype"]
calculation_epsilon = training_parameter["calculation epsilon"]

# enable single/half/double precision
import tensorflow.keras.backend as K
K.set_floatx(calculation_dtype)
K.set_epsilon(calculation_epsilon)

# enable mixed precission
if 'float16' in calculation_dtype:
    from tensorflow.keras.mixed_precision import experimental as mixed_precision
    policy = mixed_precision.Policy('mixed_float16')#, loss_scale=1024)
    mixed_precision.set_policy(policy)


import numpy as np
# scale saved input into a normed range 
input_scaling_factor = np.asarray(data_parameter["input scaling factor"],dtype=calculation_dtype)

In [11]:
from napc import NeuralAPC

# First step into using multiple GPU's at once
# Needs adaption of batch size and learning rate (for optimal performance)
# The inference for video creation hat some problems with the distributed strategy
#strategy = tf.distribute.MirroredStrategy().scope()
#strategy = tf.distribute.experimental.CentralStorageStrategy().scope()
strategy = tf.init_scope() # Use again only 1 GPU
with strategy:
    # This is the pure model. It's almost the same as in my bachelors thesis
    # The only difference is an additional loss (auxiliary loss) and a dropout layer after the first dense layer
    # Elsewise it's the same architecture, same optimizer, same 'main' loss
    napc = NeuralAPC(model_parameter,training_parameter)
    napc.compile()
    napc.save()

# copy config into model folder
import shutil
config_path = shutil.copy2('config.json', napc.model_path)
print(f'Config saved under {config_path}')

Config saved under ./models/2020-06-15_01:03:30.703083/config.json


In [12]:
import os
import os.path as path
mode = "training"
data = path.join(data_parameter['data directory'], f'{mode}.dat')
data_lengths = path.join(data_parameter['data directory'], f'{mode}_meta.dat')

# This one solves the problem for all of you, which just click 'Run all' and expect it to work
# Since the training data is not uploaded 'on purpose', the code would usually break for most people
# If the file does not exists, the code immediately jumps to the point, where it loads the 'predefined' model and evaluates it
# Elsewise it trains the model and evalutes the newly trained
data_exists = os.path.isfile(data)

sequence_list = labels_list = None

if data_exists:
    from data_loader import readData
    # due to the expertise of github.com/xor2k, I switched from CSV to memory-mapped files.
    # This reduces the loading/mapping time by a lot.
    # The sequence_list is a list containing all sequences for training (The sequences have the shape of Tx20x25)
    # all sequences are differently long and are normalized in the range between 0. and 1., they have the dtype 'sequence_dtype'
    # The labels_list is a list with all the labels for training in the same order as the sequence_list is.
    # The shape of each label is 2 --> Therefore, the list could have the shape Nx2 as array. The labels have dtype 'labels_dtype'
    sequence_list, labels_list = readData(model_parameter,training_parameter,data,data_lengths,sequence_dtype,labels_dtype)
    # Similar to my bachelors thesis I used just about 1500 sequences (Not necessarily the same ones/ haven't checked it)
    # Not necessary for other people or experiments
    sequence_list, labels_list = sequence_list[:training_parameter["restrict dataset size"]], labels_list[:training_parameter["restrict dataset size"]]

Started reading files: 01:03:32 2020-06-15
Finished reading 1499 sequences. Took 1.430000 seconds.


In [13]:
from data_processing import Preprocess
preprocessor = None
if data_exists:
    # this is class which preprocesses the training data every epoch
    # it creates the necessary labels/bounds and augments the data
    preprocessor = Preprocess(sequence_list,labels_list,input_scaling_factor,training_parameter,calculation_dtype)

In [15]:
from optional_features import customPlot
# train
if data_exists:
    
    # switching to 'better' generators in the near future
    # TODO: use/implement a more sophisticated generator
    def generator():
        for x, y in list(zip(*preprocessor.prepareEpoch())):
            yield x, y
    

    callbacks = []
    # Done: add custom callback for increasing the napc.epoch
    callbacks += [napc.IncreaseEpochCustom(napc)]
    # Done: callback which calls the napc.save() (or use the keras build-in callback)
    callbacks += [tf.keras.callbacks.ModelCheckpoint(napc.model_path+'weights.{epoch:05d}.hdf5', monitor='val_loss', \
                                                     verbose=0, save_best_only=False, save_weights_only=True, \
                                                     mode='auto', save_freq=training_parameter["safe steps"])]
    # Done: Add tensorboard callback
    callbacks += [tf.keras.callbacks.TensorBoard(log_dir=napc.model_path+'/logs', histogram_freq=0, write_graph=True, \
                                                 write_images=False, update_freq='epoch', profile_batch=2, \
                                                 embeddings_freq=0, embeddings_metadata=None)]
    # Done: add custom callback for creating the plots
    '''
    try: 
        callbacks += [customPlot(preprocessor,napc,plot_freq=1000)]
    except ValueError:
        pass
    '''
    
    # TODO: either test the TF 2.2 profiler or use again a stop watch to see the slowest processes
    
    # np.dtype(<some dtype>) is compatible with tf.<some dtype>
    ds = tf.data.Dataset.from_generator(generator,(np.dtype(calculation_dtype), np.dtype(calculation_dtype)),)
    napc.model.fit(ds,epochs=training_parameter['epochs'],initial_epoch=napc.epoch,\
                   max_queue_size=5, workers=3, use_multiprocessing=False, callbacks=callbacks)
    
    # the output of this cell are metrics every 100th epoch and a plot of the counting behaviour every 1000th epoch
    # (*)the training took about 10 hours on a 2080TI while occupying less than 2GB VRAM and 80% GPU-Util.
    # * in the latest TF updates the GPU got significantly slower/more inefficient (even slower than a 75 Watt NVIDIA P2000)
    #   now using even less than 50% GPU utilization for the 2080TI while being slower

AttributeError: module 'tensorflow' has no attribute 'dtype'

In [None]:
# For all of you which don't have the training data and just want to execute it
if not data_exists:
    data_parameter, model_parameter, training_parameter = loadConfig('models/config.json',verbose=False)
    napc = NeuralAPC(model_parameter,training_parameter)
    # Loading the included model (it has no subdirectory)
    napc.loadModel(10000,'models/')
    # The model_path of the model is not 'models/', but the previously created subdirectory
    # You could now train it further/save it/ etc.

In [None]:
# produce videos on all validation sequences
import os.path as path
mode = "validation"
data = path.join(data_parameter['data directory'], f'{mode}.dat')
data_lengths = path.join(data_parameter['data directory'], f'{mode}_meta.dat')

# de-/activate video creation
create_vids = False

# copy dict from training and modify the concatenation and batch size
validation_parameter = training_parameter.copy()
validation_parameter["minimum concatenation"] = 1
validation_parameter["maximum concatenation"] = 1
# validation_parameter["batch size"]=1

# read the validation data
from data_loader import readData
sequence_list, labels_list = readData(model_parameter,validation_parameter,data,data_lengths,sequence_dtype,labels_dtype)

# process them (i need the bounds in y for the accuracy and the videos)
from data_processing import Preprocess
preprocessor = Preprocess(sequence_list,labels_list,input_scaling_factor,validation_parameter,calculation_dtype)
X,Y = preprocessor.prepareEpoch(training=False)

# create my videos
from optional_features import createVideo
accuracy = []
for batch_idx,x,y in list(zip(range(len(X)),X,Y)):
    predictions = napc.model.predict_on_batch(x)
    if create_vids:
        for sample_idx, prediction in enumerate(predictions):
            binary_mask = (np.minimum(0,y[sample_idx,:,4])+1).astype(bool)
            createVideo(napc.epoch,idx,x[sample_idx,binary_mask], \
                                        K.eval(prediction[binary_mask]), \
                                        y[sample_idx,binary_mask,0:2], \
                                        y[sample_idx,binary_mask,2:4])
    accuracy += [K.eval(napc.accuracy(y,predictions))]

In [None]:
# Since I'm not allowed to upload more sequences and I don't want to publish a perfect model
# (therefore, I haven't tested this one) the accuracy is just an approximation of the true capabilities
# The 'validation' data in this case is in fact a test set (last epoch was chosen without selection)
# In practice someone would use k-Fold-Crossvalidation and would reason about the average performance

# So let's have a look how well the model does...
print(f'{mode} accuracy: {100*np.mean(accuracy)} %')