# Pre-process and write to npz for GNN training and testing

In [1]:
import re
import numpy as np
import random

re_int_sci = r'[-\d\.]+e?[-+\d]*'
re_sci = r'[+-]?\d+\.\d+e[+-]?[\d]+'

def parse_fragment_simulation(file):
    '''
    Extract info from LSDYNA txt, including particle coordinates, particle types, and effective plastic strain (eps)."
    Input: Txt from LYDYNA, e.g., C_80_480_Cc_20_strain.txt
    Output: np arrays of shapes, 
            tracjectory (timesteps, num_particles, 3), particle_type (num_particles,), eps (timesteps, num_particles).
    '''
                   
    with open(file, 'r') as f:
        lines = f.readlines() 

    # Find all "particle position" lines and "plastic strain" lines using key words
    pos_lines_start, pos_lines_end = [], []
    strain_lines_start, strain_lines_end = [], []
    for idx, line in enumerate(lines):
        if line.startswith("*NODE"):
            pos_lines_start.append(idx)
        elif line.startswith("*END"):  
            pos_lines_end.append(idx)
        elif line.startswith("$SPH_ELEMENT_RESULTS"):
            strain_lines_start.append(idx)

    num_steps = len(pos_lines_start)
    pos_lines_end, strain_lines_end = pos_lines_end[:num_steps], pos_lines_end[num_steps:]

    # Extact particle types
    type_line_start = pos_lines_start[0]
    type_line_end = pos_lines_end[0]
    particle_types = []
    eids = []
    for line in lines[type_line_start:type_line_end]:
        num_str = re.findall(re_int_sci, line)  # Regular expression findign integers
        if len(num_str) == 4:
            eid = int(num_str[0])
            particle_type = 1 if eid < 263914 else 0
            particle_types.append(particle_type)
    particle_types = np.array(particle_types).astype(int)

    # Extact particle positions 
    trajectory = []
    for line_start, line_end in zip(pos_lines_start, pos_lines_end):
        pos_lines = lines[line_start:line_end]   # lines that contains positions in one time step
        pos_one_step = []
        for line in pos_lines:
            num_str = re.findall(re_sci, line)  # Regular expression findign scitific numbers
            if len(num_str) == 3:
                pos = [float(x) for x in num_str] #last one is volume
                pos = tuple(pos)
                pos_one_step.append(pos)
        trajectory.append(pos_one_step) 
    trajectory = np.array(trajectory).astype(float)

    # Extract effective plastic strain (eps)
    strains = []
    for line_start, line_end in zip(strain_lines_start, strain_lines_end):
        strain_lines = lines[line_start+1:line_end]   # lines that contains positions in one time step
        strains_one_step = []
        for line in strain_lines:
            num_str = re.findall(re_sci, line)  # Regular expression findign scitific numbers
            if len(num_str) == 1:
                num = float(num_str[0]) 
                strains_one_step.append(num)
        strains_one_step = [0]*6248 + strains_one_step   # Add zero strain to all beam particles
        strains.append(strains_one_step)  
    strains = np.array(strains).astype(float)
    
    return trajectory, particle_types, strains


if __name__ == "__main__":
    pass

In [9]:
import numpy as np
import glob
import json
import random
import math
import pathlib
       

dataset = 'Fragment'
in_dir = f'/home/jovyan/work/data_temp/fragment/120/'
out_dir = f'/home/jovyan/work/data_temp/fragment/{dataset}/'
pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True)

strain_mean, strain_std = 0.8868453123315391, 0.6590170029193022

# Grab all simulation cases from corresponding data folder
simulations = glob.glob(in_dir +'*')
random.shuffle(simulations)

## Larger step size leads to shorter trajectory and hence better rollout performance
## But lower precision of the simulation
## Current simulation are of absolute time 30 ms
## Step size=1 means 53 steps, each of which 0.6 ms
STEP_SIZE = 2

## Initialisation placeholders for data
n_trajectory = len(simulations)
ds_train, ds_valid, ds_test = {}, {}, {}
vels = np.array([]).reshape(0, 3)
accs = np.array([]).reshape(0, 3)
strain_stats = np.array([])
file_train, file_valid, file_test = [], [], []

## Main loop for data extraction
for idx, simulation in enumerate(simulations):
    print(f"{idx}/{n_trajectory} Reading {simulation}...")
    positions, particle_types, strains = parse_fragment_simulation(simulation)
    dim = positions.shape[-1]
    
    positions = positions[20::STEP_SIZE, :, :]
    
    strains = strains[20::STEP_SIZE, :]
    strains = (strains - strain_mean) / strain_std   ## standardize based on overall mean and std
       
    # print for debug
    print(f"Dim: {dim}")
    print(f"Position min:{positions.min(axis=(0,1))}, max:{positions.max(axis=(0,1))}")
    print(f"Strain min:{strains.min(axis=(0,1))}, max:{strains.max(axis=(0,1))}")
    print(f"Shape, pos: {positions.shape}, types: {particle_types.shape}, strain: {strains.shape}")
    print(f"Unique particle types: {np.unique(particle_types)}")
    
    # Data splits: train(80%), valid(10%), test(10%)
    key = f'trajectory_{idx}' 
    if idx < 40:
        print('to train')
        ds_train[key] = [positions, particle_types, strains]
        file_train.append(simulation)
    if idx >= 40:
        print('to valid')
        ds_valid[key] = [positions, particle_types, strains]
        file_valid.append(simulation)
    if idx >= 40:
        print('to test')
        ds_test[key] = [positions, particle_types, strains]
        file_test.append(simulation)
        
    # Extract Vel and Acc statistics
    # positions of shape [timestep, particles, dimensions]
    vel_trajectory = positions[1:,:,:] - positions[:-1,:,:]
    acc_trajectory = vel_trajectory[1:,:,:]- vel_trajectory[:-1,:,:]
    
    vels = np.concatenate((vels, vel_trajectory.reshape(-1, dim)), axis=0)
    accs = np.concatenate((accs, acc_trajectory.reshape(-1, dim)), axis=0)

# Extract vel, acc statistics for normalisation
vel_mean, vel_std = list(vels.mean(axis=0)), list(vels.std(axis=0))
acc_mean, acc_std = list(accs.mean(axis=0)), list(accs.std(axis=0))

# # Save datasets in numpy format
# np.savez(out_dir + 'train.npz', **ds_train)
# np.savez(out_dir + 'valid.npz', **ds_valid)
# np.savez(out_dir + 'test.npz', **ds_test)

print(f"{len(ds_train)} trajectories saved to train.npz.")
print(f"{len(ds_valid)} trajectories saved to valid.npz.")
print(f"{len(ds_test)}  trajectories saved to test.npz.")

# Save meta data
in_file = '/home/jovyan/share/gns_data/Concrete2D-C/metadata.json'
out_file = f'/home/jovyan/share/gns_data/{dataset}/metadata.json'

with open(in_file, 'r') as f:
    meta_data = json.load(f)

# In GNN, the suggested connection radius is 4.5r, or 5.625 mm (aounrd 20 neighbors)
# If R is 5 mm before normalization, 
meta_data['dim'] = 3
meta_data['default_connectivity_radius'] = 11 
meta_data['sequence_length'] = positions.shape[0]
meta_data['vel_mean'] = vel_mean
meta_data['vel_std'] = vel_std
meta_data['acc_mean'] = acc_mean
meta_data['acc_std'] = acc_std
meta_data['strain_mean'] = strain_mean
meta_data['strain_std'] = strain_std

meta_data['dt'] = 0.0006 * STEP_SIZE
meta_data['bounds'] = [[-500, 500], [-1000, 1000], [4, 124]]
meta_data['file_train'] = file_train
meta_data['file_valid'] = file_valid
meta_data['file_test'] = file_test
print(meta_data)

# with open(out_file, 'w') as f:
#     json.dump(meta_data, f)

0/45 Reading /home/jovyan/work/data_temp/fragment/120/120_2_0.3C50...
Dim: 3
Position min:[-498.66158   -995.           3.2785579], max:[497.87956 995.      201.36161]
Strain min:-1.3457093040134123, max:1.6891136082034723
Shape, pos: (41, 246248, 3), types: (246248,), strain: (41, 246248)
Unique particle types: [0 1]
to train
1/45 Reading /home/jovyan/work/data_temp/fragment/120/120_3_0.5C30...


KeyboardInterrupt: 

# Read MPS

In [20]:
import re
import numpy as np
import random

re_int_sci = r'[-\d\.]+e?[-+\d]*'
re_sci = r'[+-]?\d+\.\d+e[+-]?[\d]+'

def parse_fragment_mps(file):
    '''
    Extract info from LSDYNA txt, including particle coordinates, particle types, and effective plastic strain (eps)."
    Input: Txt from LYDYNA, e.g., C_80_480_Cc_20_strain.txt
    Output: np arrays of shapes, 
            tracjectory (timesteps, num_particles, 3), particle_type (num_particles,), eps (timesteps, num_particles).
    '''
                   
    with open(file, 'r') as f:
        lines = f.readlines() 

    # Find all "particle position" lines and "plastic strain" lines using key words
    strain_lines_start, strain_lines_end = [], []
    for idx, line in enumerate(lines):
        if line.startswith("$RESULT OF  Max Prin Strain"):
            strain_lines_start.append(idx)
        elif line.startswith("*END"):
            strain_lines_end.append(idx)

    # Extract effective plastic strain (eps)
    strains = []
    for line_start, line_end in zip(strain_lines_start, strain_lines_end):
        strain_lines = lines[line_start+1:line_end]   # lines that contains positions in one time step
        strains_one_step = []
        for line in strain_lines:
            num_str = re.findall(re_sci, line)  # Regular expression findign scitific numbers
            if len(num_str) == 1:
                num = float(num_str[0]) 
                strains_one_step.append(num)
        strains_one_step = [0]*6248 + strains_one_step   # Add zero strain to all beam particles
        strains.append(strains_one_step)  
    strains = np.array(strains).astype(float)
    
    return strains


if __name__ == "__main__":
    pass

In [14]:
file = '/home/jovyan/work/data_temp/fragment/120_4_0.3C50'
trajectory, particle_types, strains = parse_fragment_simulation(file)
print(trajectory.shape, particle_types.shape, strains.shape)

(101, 246248, 3) (246248,) (101, 246248)


# Modify metadata

In [3]:
in_file = '/home/jovyan/share/gns_data/Fragment/metadata.json'
out_file = f'/home/jovyan/work/data_temp/fragment/Fragment/metadata.json'

with open(in_file, 'r') as f:
    meta_data = json.load(f)

meta_data['dim'] = 3
meta_data['bounds'] = [[-500, 500], [-1000, 1000], [0, 255]]

print(meta_data)

with open(out_file, 'w') as f:
    json.dump(meta_data, f)

{'bounds': [[-500, 500], [-1000, 1000], [0, 255]], 'sequence_length': 41, 'default_connectivity_radius': 11, 'dim': 3, 'dt': 0.0012, 'vel_mean': [0.00025711230813995186, 7.838677173801261e-05, 0.11218872474841513], 'vel_std': [0.04329208019487968, 0.050169923732292544, 0.21707507435839715], 'acc_mean': [4.228977435470556e-06, -1.0883637271305478e-05, -0.0007385390517020932], 'acc_std': [0.016808747817365178, 0.02908448767480537, 0.01776673986455495], 'file_train': ['/home/jovyan/work/data_temp/fragment/120/120_5_0.4C30', '/home/jovyan/work/data_temp/fragment/120/120_6_0.3C30', '/home/jovyan/work/data_temp/fragment/120/120_5_0.5C80', '/home/jovyan/work/data_temp/fragment/120/120_3_0.5C50', '/home/jovyan/work/data_temp/fragment/120/120_5_0.3C50', '/home/jovyan/work/data_temp/fragment/120/120_5_0.4C50', '/home/jovyan/work/data_temp/fragment/120/120_2_0.4C30', '/home/jovyan/work/data_temp/fragment/120/120_2_0.3C80', '/home/jovyan/work/data_temp/fragment/120/120_4_0.3C80', '/home/jovyan/wor

# Test regular expression for number extraction

In [2]:
import re

strs = ['20742   1.4952594e+03   -1.0499660e+02   1.6313647e-02   9.9995575e+02',
        '    32365   1.4051317e+00',
        '   10826       1   15757   15758   15784   15783   11311   11312   11338   11337',
        '$Total Solid element Volume =    7.5878880e+07'
       ]

pattern = r'[+-]?\d+\.\d+e[+-]?[\d]+'
for str in strs:
    print(re.findall(pattern, str))
    
pattern = r'[-\d\.]+e?[-+\d]*'
for str in strs:
    print(re.findall(pattern, str))

['1.4952594e+03', '-1.0499660e+02', '1.6313647e-02', '9.9995575e+02']
['1.4051317e+00']
[]
['7.5878880e+07']
['20742', '1.4952594e+03', '-1.0499660e+02', '1.6313647e-02', '9.9995575e+02']
['32365', '1.4051317e+00']
['10826', '1', '15757', '15758', '15784', '15783', '11311', '11312', '11338', '11337']
['7.5878880e+07']


# Plot Fragment data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation


# Create a colormap
cmap = plt.get_cmap("rainbow")

# Create a 3D scatter plot with custom figure size
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection="3d")
ax.set_box_aspect([1, 2, 0.1])

# Initial scatter plot
scatter = ax.scatter([], [], [], c=[], cmap=cmap, vmin=strains.min(), vmax=strains.max())

# Add a colorbar
cbar = fig.colorbar(scatter)
cbar.set_label("Strain")

# Set axis labels
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")


def update(timestep):
    ax.clear()
    ax.set_box_aspect([4, 8, 1])
    ax.set_xlabel("X")
    ax.set_ylabel("Y")
    ax.set_zlabel("Z")
    
    ax.set_title(f"Time step: {timestep}")

    pos = position[timestep]
    strains_t = strain[timestep]
    scatter = ax.scatter(pos[:, 0], pos[:, 1], pos[:, 2], c=strains_t, cmap=cmap)

    return scatter,


ani = FuncAnimation(fig, update, frames=range(strains.shape[0]), interval=200, blit=False)

# Uncomment the following line to save the animation as a GIF
ani.save("pred.gif", writer="pillow", fps=5, dpi=50)

In [8]:
import pickle

path = '/home/jovyan/work/data_temp/fragment/Fragment/test.npz'
data = [item for _, item in np.load(path, allow_pickle=True).items()]

print(data[0][2].shape)

(41, 246248)
