In [13]:
import math
import glob
import time
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, MaxPooling3D, Dropout, Flatten, concatenate, Reshape, UpSampling3D, Lambda, Conv3D, Conv3DTranspose
from tensorflow.keras.layers import BatchNormalization

# from tensorflow_probability.python.layers import MixtureNormal
from tensorflow.keras.losses import BinaryCrossentropy

In [14]:
import math
import glob
import pandas as pd
import numpy as np
import json


GLOBAL_REL_PATH_TO_DATA = "../../data"


def get_surface_nodes(file_ids=(1,385)):
    X_nodes = []
    sample_filenames = glob.glob(GLOBAL_REL_PATH_TO_DATA + '/raw/sample_model/model/*.json')
    for sample_filename in sample_filenames[:len(file_ids)]:
        with open(sample_filename, 'r') as file:
            data = json.load(file)
            surface_node_ids = {d['node_id'] for d in data['surf_elements']}

            surface_nodes = {int(node['node_id']): tuple((math.floor(float(value))+1) for (key,value) in node.items() if key in ['x', 'y', 'z']) for node in data['nodes'] if node['node_id'] in surface_node_ids}
        X_nodes.append(surface_nodes)
    return X_nodes


def get_voxel_shapes_from_nodes_dicts(surface_nodes):
    X = []
    for i in range(len(surface_nodes)):
        surface_nodes_coords = set(surface_nodes[i].values())
        voxel_grid = [[[float((x,y,z) in surface_nodes_coords) for x in range(100)] for y in range(100)] for z in range(600)]
        X.append(voxel_grid)
    return X


def get_conditions(file_ids=range(1,385)):
    X_conditions = []
    filenames = glob.glob(GLOBAL_REL_PATH_TO_DATA + '/preprocessed/pre_*.csv')
    for filename in filenames[:len(file_ids)]:
        x_cond = dict()
        df = pd.read_csv(filename)
        x_cond['box_thickness'] = df['a_thickness'][0]
        x_cond['volume_relation'] = df['r_l'][0] * df['r_w'][0] * df['r_h'][0]
        x_cond['force_application_coord_l'] = df['r_cm_l'][0]  # dist from box-top center to the main point of force application / dist from center to ende along length
        x_cond['force_application_coord_w'] = df['r_cm_w'][0]  # ... along  width

        X_conditions.append(x_cond)
    return X_conditions


def get_surface_nodes_dispositions(file_ids=(1,385)):
    GT_disps = []  # ground truth
    filenames = glob.glob(GLOBAL_REL_PATH_TO_DATA + '/raw/sample_gt/gt/*.csv')
    for i, filename in enumerate(filenames[:len(file_ids)]):
        df = pd.read_csv(filename)
        surface_nodes_dispositions = {row['node_id']:(row['dx'], row['dy'], row['dz']) for (index, row) in df.iterrows()}
        GT_disps.append(surface_nodes_dispositions)
    return GT_disps


def get_disposed_nodes(initial_surface_nodes, surface_nodes_dispositions):
    GT_nodes = []
    for i in range(len(initial_surface_nodes)):
        surface_nodes = dict()
        for node_id in initial_surface_nodes[i].keys():
            if node_id in initial_surface_nodes[i] and node_id in surface_nodes_dispositions[i]:
                surface_nodes[node_id] = tuple(math.ceil(sum(x)) for x in zip(initial_surface_nodes[i][node_id], surface_nodes_dispositions[i][node_id]))
        GT_nodes.append(surface_nodes)
    return GT_nodes


def get_arr_shape(arr):
    shape = []
    while isinstance(arr, list):  # enh: use Iterable but fix it for str
        print(arr)
        shape.append(len(arr))
        arr = arr[0]
    return tuple(shape)


def reshape_voxel_grid_into_np(x_arr):
    return [np.reshape(x, get_arr_shape(x)) for x in x_arr]

Variational AutoEncoder is a combination of an Encoder and a Decoder learning to encode the input instances into a (less dimentional) regularized latent space and to decode them back with minimal reconstruction error. 

Having a trained VAE we can sample new instances from distribution in the latent space, decompress them and get a quite realistic data instance.

For our problem, we use encoder to get the representation of an input shape in the latent space, then concatenate it with the condition vector (e.g. [% coords of the force application point, relative size of applicator, box thickness]) so that it represents a deformed shape in the latent space - and then decode deformed shape representation into a voxel shape. 

## Build the model


In [24]:
%run model.ipynb

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 12)                156       
_________________________________________________________________
reshape_4 (Reshape)          (None, 2, 2, 3, 1)        0         
_________________________________________________________________
conv3d_transpose_8 (Conv3DTr (None, 5, 5, 7, 16)       448       
_________________________________________________________________
up_sampling3d_7 (UpSampling3 (None, 10, 10, 14, 16)    0         
_________________________________________________________________
conv3d_transpose_9 (Conv3DTr (None, 21, 21, 29, 8)     3464      
_________________________________________________________________
up_sampling3d_8 (UpSampling3 (None, 42, 42, 58, 8)     0         
_________________________________________________________________
conv3d_transpose_10 (Conv3DT (None, 85, 44, 60, 8)    

## Train our model
using https://www.tensorflow.org/tutorials/generative/dcgan

In [16]:
CONFIG = {
    'num_epochs': 50,
    'test_percent': 0.2,
    'num_samples_total': 383,
    'num_samples_using': 5,
    'batch_size': 3
}

\* cross-entropy loss (aka log loss) is measuring difference between probabilities.

In [68]:
def generator_loss(discriminator_decision):
    """
    Compute cross-entropy loss between the discriminator's prediction on generated shapes only, and an array of ones.
    """
    return BinaryCrossentropy(tf.ones_like(discriminator_decision), discriminator_decision)


def discriminator_loss(real_output, fake_output):
    cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.SUM)
    
    print(real_output, fake_output)
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    print(real_loss.shape, fake_loss.shape)
    print(tf.ones_like(real_output))
    

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    array = real_output.eval(sess)
    print (array)
    
    print(real_loss, fake_loss)
    total_loss = real_loss + fake_loss
    return total_loss


def wasserstein_loss(y_true, y_pred):
    return K.mean(y_true * y_pred)


generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=vae_model,
                                 discriminator=discriminator_model)

In [40]:
@tf.function
def train_step(x_shape, x_cond, ground_truth):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
#         print("vae_model.input_shape: ", vae_model.input_shape)
#         print("x [0] = ", len(x_shape[0]))
#         print("X: {} x {} x {} --- Cond: {}".format(len(x_shape), len(x_shape[0]), len(x_shape[0][0]), len(x_cond)))
#         print(type(x_shape), type(x_cond))
        
        x_cond = tf.cast(x_cond, tf.float32)
        
        generated_deformation = vae_model([
            tf.expand_dims(tf.expand_dims(x_shape, axis=0), axis=-1), 
            tf.expand_dims(tf.expand_dims(x_cond, axis=0), axis=-1)], training=True)

        real_output = discr_model(tf.expand_dims(tf.expand_dims(ground_truth, axis=-1), axis=0), training=True)
        fake_output = discr_model(tf.expand_dims(tf.expand_dims(x_shape, axis=-1), axis=0), training=True)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

        print(wasserstein_loss(real_output, fake_output))

    gradients_of_generator = gen_tape.gradient(gen_loss, vae_model.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, vae_model.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

In [19]:
def train(X, X_cond, Y):
    for epoch in range(CONFIG['num_epochs']):
        start = time.time()

        print(type(X))
        for i in range(len(X)):
            train_step(X[i], X_cond[i], Y[i])

        if (epoch + 1) % 15 == 0:
            checkpoint.save(file_prefix = checkpoint_prefix)

        print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))
    print('Yay!')

### Training

In [20]:
test_ids = np.random.default_rng().choice(range(1, CONFIG['num_samples_total']), math.floor(CONFIG['test_percent'] * CONFIG['num_samples_total']), replace=False)

test_ids = set(test_ids)
train_ids = set(range(1, CONFIG['num_samples_total'])) - test_ids

if CONFIG['num_samples_using']:
    train_ids = [train_ids.pop() for i in range(CONFIG['num_samples_using'])]
    test_ids = [test_ids.pop() for i in range(CONFIG['num_samples_using'])]
    

In [21]:
X_nodes_dicts = get_surface_nodes(train_ids)

X = get_voxel_shapes_from_nodes_dicts(X_nodes_dicts)
print("X done", len(X))

X_cond = get_conditions(train_ids)
X_cond = [list(cond.values()) for cond in X_cond]
print("cond done", len(X_cond))

Y_nodes_disp_dicts = get_surface_nodes_dispositions(train_ids)
Y_nodes_dicts = get_disposed_nodes(X_nodes_dicts, Y_nodes_disp_dicts)
Y = get_voxel_shapes_from_nodes_dicts(Y_nodes_dicts)
print("Y done", len(Y))

X done 5
cond done 5
Y done 5


In [22]:
# # Reshape to fit model input.
# X = reshape_voxel_grid_into_np(X)
# Y = reshape_voxel_grid_into_np(Y)

X = [tf.convert_to_tensor(x) for x in X]
Y = [tf.convert_to_tensor(x) for x in Y]
X_cond = [tf.convert_to_tensor(x) for x in X_cond]

In [69]:
train(X,X_cond,Y)

<class 'list'>
Tensor("sequential_8/dense_8/BiasAdd:0", shape=(1, 1), dtype=float32) Tensor("sequential_8_1/dense_8/BiasAdd:0", shape=(1, 1), dtype=float32)
() ()
Tensor("ones_like_2:0", shape=(1, 1), dtype=float32)


AttributeError: in converted code:

    <ipython-input-40-1b9cb2befea8>:19 train_step  *
        disc_loss = discriminator_loss(real_output, fake_output)
    <ipython-input-68-dde8b8de12e9>:18 discriminator_loss  *
        init = tf.global_variables_initializer()

    AttributeError: module 'tensorflow' has no attribute 'global_variables_initializer'
