# __Generate topologies using TopoGen-VAE__
### <u>Overview</u>: Generate topologies using a trained Topology-Generating Variational Autoencoder (TopoGen-VAE).
### This notebook contains two different ways to generate topologies - individually or in sets:
### 1. individaully - generate a randomly sampled vector, select a target volume fraction and append to vector, decode into array, convert to binary.
### 2. In sets - randomly sample vectors as the topology vector, iteratively attach volume fractions to the vector, decode and convert to binary. This produces sets of topologies with varying volume fractions, which adheres to the structure of the source dataset.

In [1]:
# For setting the directory references for the entire package
from ML_workflow_utils_v3.PackageDirectories import PackageDirectories as PD   

# This code automatically sets the rootpath as the directory the entire package is contained in, which is then called to initialize the PackageDirectories class below
import os
# check current path if desired
# currentpath = os.getcwd()
# print(currentpath)

os.chdir('../../../')
rootpath = os.getcwd()
# print(rootpath)

# Alternately, rootpath can be set manually
# rootpath = 'filepath/containing/entire/ML_package/'

directory = PD(rootpath = rootpath)

In [2]:
import pandas as pd
import numpy as np
import json
import glob
import os

from ML_workflow_utils_v3.Dataset_Preprocessor import Dataset_Preprocessor as DataP
source_data_path = directory.source_data_path
voxel_dir = directory.voxeltopo_path


import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F


import torch.utils.data as data
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
nbpath = directory.nb_2_2_path

"""
If loading the TopoGen-VAE that was shipped with this package, execute these three lines of code
"""
cp_dir = directory.topogen_vae_path
cp_name = "TopoGen_VAE_pretrained_model_weights.pth"
cp_path = os.path.join(cp_dir, cp_name)


"""
If loading a model trained in Notebook 2.1 (2_1_Train_TopoGen_VAE), comment out the three lines of code
above by highlighting and pressing Ctrl+/, or putting '#' at the beginning of each line
"""
# cp_dir = os.path.join(directory.nb_2_1_path, 'model_CPs')
# cp_name = 'TopoGen_VAE_28AUG24' # placeholder, replace
# cp_path = os.path.join(cp_dir, cp_name)

In [4]:
# Defining variables for VAE configuration

# latent_dim is the dimension of the "encoded"
latent_dim = 16

# 1 material properties - Volume Fraction
matprops = ['volFrac']

In [5]:
# if using pre-trained model
from ML_workflow_utils_v3.TopoGen_VAE_pretrained import TGVAE_pretrained
tgvae = TGVAE_pretrained(latent_dim, matprops)

# if using model trained in notebook 2_1
# from ML_workflow_utils_v3.TopoGen_VAE import TGVAE
# tgvae = TGVAE(latent_dim, matprops)


from ML_workflow_utils_v3.Model_Weights_Util import convert_state_dict


In [6]:
# Create the VAE model

# If multiple GPUs are available, set to True
gpu_parallel = False

if gpu_parallel:
    tgvae = torch.nn.DataParallel(tgvae).to(device)
else:
    tgvae = tgvae.to(device)



In [None]:
"""
This command loads the trained model's weights

The included pre-trained model weights were produced using a multi-GPU training setup, therefore the key of each layer contains "module.", 
which is how torch.nn.DataParallel() creates state dictionaries. The call of "convert_state_dict"
If using more than one GPU for topology production, set "convert_weight_keys_to" to 'parallel'
"""

convert_weight_keys_to = 'non-parallel'

model_weights = convert_state_dict(cp_path, convert_to = convert_weight_keys_to)


tgvae.load_state_dict(model_weights)

In [None]:
# Set TopoGen-VAE to evaluation mode
tgvae.eval()

In [None]:
# Set a variable to call the decoder module of the TopoGen-VAE and set it to evaluation mode
if convert_weight_keys_to == 'non-parallel':
    decoder = tgvae.decoder
elif convert_weight_keys_to == 'parallel':
    decoder = tgvae.module.decoder

decoder.to(device)

# Generating a single topology

In [10]:
# Set target volume fraction -- between 0.0 and 1.0. For design purposes, we recommend below 0.5
tgt_volfrac_val = 0.4
tgt_volfrac = torch.Tensor([tgt_volfrac_val]).unsqueeze(0)

In [11]:
# Generate embedding vector
latent_vec = torch.randn(latent_dim).unsqueeze(0)

In [12]:
# Combine embedding vector with volume fraction
full_vec = torch.cat((latent_vec, tgt_volfrac), axis=1)

In [13]:
# Decode full vector into topology

if torch.cuda.is_available():
    # decoded_array = np.squeeze(decoder(full_vec.cuda()).detach().cpu().numpy(), axis=(0,1))
    decoded_array = np.squeeze(decoder(full_vec.to(device)).detach().cpu().numpy(), axis=(0,1))

else:
    decoded_array = np.squeeze(decoder(full_vec).detach().numpy(), axis=(0,1))


In [14]:
from ML_workflow_utils_v3.Voxel_Mesh_Utils import target_binarray_threshold, Plot_Array

In [15]:
# Convert continuous valued output to binary

binary_array, threshold, binary_volfrac = target_binarray_threshold(decoded_array, tgt_volfrac_val)

In [16]:
# Set path for saving outputs
savedir = os.path.join(nbpath, 'synthetic_topos_individual')

# this name can be anything, but we recommend including volume fraction for reference
arrayname = f'test_vf_{str(tgt_volfrac_val)}'
arraypath = os.path.join(savedir, arrayname)

In [17]:
# Plot continuous array if desired - see DataPrep.py for fields. As shipped, this call will display the plot but not save it as a PNG file.

# if desired to save plot, set to True
export = False
if export:
    plotpath = os.path.join(savedir, f'{arraypath}_continuous')
else:
    plotpath = None
# if desired to not display, set to False
showplot = False

Plot_Array(decoded_array, binary=False, show=showplot, export_png=export, marker_size = 6, symbol='square', x_range=(0,64), scale_markers=True, plotpath = plotpath)

In [None]:
# Plot binary array

# if desired to save plot, set to True
export = False
if export:
    plotpath = os.path.join(savedir, f'{arraypath}_binary')
else:
    plotpath = None
# if desired to not display, set to False
showplot = True

Plot_Array(binary_array, binary=True, show=showplot, export_png=export, marker_size = 6, symbol='square', x_range=(0,64), plotpath = plotpath)

In [25]:
# Saving the array, if desired for later FEA analysis

# Select save format
# as a .mat file for MATLAB
matlab_format = True
# as numpy format .npy
numpy_format = True
# as numpy compressed format .npz
numpy_compressed_format = True


if matlab_format:
    from scipy.io import savemat
    matfile_dict = {'arr_0': binary_array,
                    'target volume fraction': tgt_volfrac_val}

    matpath = f'{arraypath}.mat'
    savemat(matpath, matfile_dict)

if numpy_format:
    np.save(arraypath, binary_array)

if numpy_compressed_format:
    np.savez(arraypath, binary_array)

# __Generate a batch of topologies with varying volume fractions__

## __Note__: The names of topologies consist of a noun and an adjective drawn using the NLTK library

In [26]:
threshold = True

In [None]:
import nltk
import re
import random
from nltk.corpus import wordnet as wn

nltk.download('words')
nltk.download('wordnet')

In [28]:
# Function to get a random synset word without hyphens or underscores

def get_random_word(pos):
    while True:
        synsets = list(wn.all_synsets(pos))
        random_synset = random.choice(synsets)
        word = random_synset.lemmas()[0].name()
        if not re.search(r'[-_/]', word) and len(word) <= 8:
            return word.capitalize()

In [29]:
# Number of unique topologies to generate
num_topos = 25

# Generate unique topology names (adjective + noun) for each vector
topo_names = []
for _ in range(num_topos):
    adj = get_random_word("a")  # Adjective
    noun = get_random_word("n")  # Noun
    UC_topo_name = adj + noun
    topo_names.append(UC_topo_name)

In [None]:
print(topo_names)

#### Set the range and increments of volume fractions for each generated topology. As shipped, the volume fraction range generates 13 increments between 0.1 and 0.61 as follows:
##### 0.1, 0.14, 0.18, 0.22, 0.26, 0.3, 0.34, 0.38, 0.42, 0.46, 0.5, 0.54, 0.58

In [31]:
# Set volume fractions using numpy.arange, use numpy.round to round all values to 2 decimal places
volfracs = np.round(np.arange(0.1,0.61,0.04), 2)

### Latent vector generation
Latent vectors can be drawn from a random-normal distribution or can be distributed evenly using 
[Latin Hypercube Sampling](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.qmc.Latin,Hypercube.html) on [0,1] with an inverse normal sampling to the Gaussian distribution

In [32]:
sampling = 'LHS' # or 'randnorm'


if sampling == 'LHS':
    from scipy.stats.qmc import LatinHypercube as LHS
    from scipy.stats import norm

    LHsampler = LHS(d=16)

    lhs_array = LHsampler.random(n=num_topos)

    vector_array = lhs_norm_array = norm.ppf(lhs_array)

elif sampling == 'randnorm':

    vector_array = torch.randn(num_topos, latent_dim)

In [None]:
# examine latent vectors if desired
print(vector_array)

In [34]:
vectors = []
for i in range(vector_array.shape[0]):
    
    if sampling == 'LHS':
        vec = np.expand_dims(vector_array[i,:], 0)
        vec = torch.from_numpy(vec).to(dtype=torch.float32)
        vectors.append(vec)
    elif sampling == 'randnorm':
        vec = np.expand_dims(vector_array[i,:], 0)
        vectors.append(vec)

In [35]:
# Dictionary of the vectors and their topology names
vectordic = {tname: vector for tname, vector in zip(topo_names, vectors)}

In [36]:
# Dictionary for holding each topology
gen_data_dic = {}

In [37]:
# Set up folders for produced batches
batch_num = 1
date = '15SEP24'

# If desired to save the binary arrays as .mat files for FEA in MATLAB, set to True
save_as_mat = True

# If desired to save the binary arrays as .npz files (numpy format), set to True
save_as_npz = True

batch = f'batch{batch_num}_{num_topos}topos_{date}'

batchpath = os.path.join(nbpath, f'synthetic_topos_batch/{batch}')

os.makedirs(batchpath, exist_ok=True)

if save_as_npz:
    npz_path = os.path.join(batchpath, 'voxel_array_files')
    os.makedirs(npz_path, exist_ok=True)

if save_as_mat:
    from scipy.io import savemat
    mat_dir= os.path.join(batchpath, 'mat_files')
    os.makedirs(mat_dir, exist_ok=True)

# If desired to plot one of each produced array for reference, uncomment these lines
# Folder for plots of continuous-valued arrays
continuous_plot_path = os.path.join(batchpath, 'continuous_voxel_plots')
os.makedirs(continuous_plot_path, exist_ok=True)
# Folder for plots of binary arrays
binary_plot_path = os.path.join(batchpath, 'binary_voxel_plots')
os.makedirs(binary_plot_path, exist_ok=True)

In [None]:
# While producing the topologies, select a volume fraction to plot from the range of volume fractions, as shipped = 38%
plotvf = 0.38
# Flags for plotting continuous and/or binary valued arrays
plot_continuous = True
plot_binary = True

# If desired to save each synthetic array as an .npz file, set to True
save_as_npz = True

for topo, vector in vectordic.items():
    gen_data_dic[topo] = {}
    gen_data_dic[topo]['base vector'] = vector
    
    for num, vf in enumerate(volfracs):
        voxdic = {}
        vfrnd = np.round(vf*100,1)
        vf_entry = 'VF'+f'{vfrnd:.0f}'+'%'
        # voxdic[vf_entry] = {}

        save_name = f'{topo}_{vf}'
        
        # Modify the base vector for volume fraction
        # vf = vf.astype(np.float32)
        vf_tensor = torch.tensor([[vf.astype(np.float32)]])
        # vf_tensor = torch.tensor([[vf]]).astype(float32)
        vec = torch.cat((vector, vf_tensor), dim=1)
        # voxdic[vf_entry]['modified vector'] = vec

        # Decode the vector into the topology voxel array
        # decoded_array = np.squeeze(decoder(vec.cuda()).detach().cpu().numpy(), axis=(0,1))
        decoded_array = np.squeeze(decoder(vec.to(device)).detach().cpu().numpy(), axis=(0,1))

        
        # voxdic[vf_entry]['decoded array'] = dec_array
        
        # Convert the array to binary based on a threshold
        binary_array, threshold, volfrac = target_binarray_threshold(decoded_array, vf, 64)

        
        # voxdic[vf_entry]['binary array'] = binary_array
        
        # Compute the volume fraction of the binary array
        volfrac = (np.sum(binary_array == 1)) / (64**3)
        
        # Format the volume fraction
        volfrac_pct = str(np.round(volfrac,3) * 100)+' %'
        volfrac_str = f'{volfrac:.5f}'
        

        
        voxdic = {'Target volume fraction': vf,
                  'Modified vector': vec,
                  'Sampling method': sampling,
                  'decoded array': decoded_array,
                  'binary array': binary_array,
                  'Actual volume fraction': {'percent': volfrac_pct,
                                     'decimal': volfrac,
                                     'string':  volfrac_str},
                 }
        
        if save_as_mat:
            mat_mesh_dict = {'arr_0': binary_array}
            mat_name = f'{save_name}.mat'
            mat_path = os.path.join(mat_dir, mat_name)
            savemat(mat_path, mat_mesh_dict)

        if save_as_npz:
            array_path = os.path.join(npz_path, save_name)

            np.savez(array_path, decoded_array)

        
        gen_data_dic[topo][vf_entry] = voxdic

        if vf == plotvf:
            # print(f"plotvf reached! - {vf}")
            if plot_continuous:
                plotpath = os.path.join(continuous_plot_path, f'{save_name}_continuous')
                # print(plotpath)
                Plot_Array(decoded_array, plotpath = plotpath, binary=False, scale_markers=True, export_png=True, show=False)

            if plot_binary:
                # print('plotting binary!')
                plotpath = os.path.join(binary_plot_path, f'{save_name}_binary')
                Plot_Array(binary_array, plotpath = plotpath, binary=True, bincolor='black', lincolor='gray', marker_size=5, export_png=True, show=False)

        else:
            pass
                




# Save the dictionary of generated topologies to a python Pickle file

In [39]:
# Functions for saving dictionary to / loading from Python pickle file
from ML_workflow_utils_v3.Misc_Utils import save_dict_to_pickle

In [40]:
gen_data_dic_path = os.path.join(batchpath, f'generated_topos_dict_{batch}.pkl')

In [41]:
save_dict_to_pickle(gen_data_dic_path, gen_data_dic)

# __End of Notebook__