In [2]:
import os
import math
import json
from pathlib import Path

import numpy as np
import h5py

In [2]:
particles_number = 120
train_valid_ratio = 0.9
data_3d = False
dim_number = None
if data_3d:
    dim_number = 3
else:
    dim_number = 2

json_rollouts_directory = Path("data/test_data_sample")
h5_rollouts_directory = Path("h5_data")

In [7]:
max_particles_number = particles_number
dt = 1./30.

def combine_stat(stat_0, stat_1):
    mean_0, std_0, n_0 = stat_0[:, 0], stat_0[:, 1], stat_0[:, 2]
    mean_1, std_1, n_1 = stat_1[:, 0], stat_1[:, 1], stat_1[:, 2]

    mean = (mean_0 * n_0 + mean_1 * n_1) / (n_0 + n_1)
    std = np.sqrt((std_0**2 * n_0 + std_1**2 * n_1 + \
                   (mean_0 - mean)**2 * n_0 + (mean_1 - mean)**2 * n_1) / (n_0 + n_1))
    n = n_0 + n_1
    return np.stack([mean, std, n], axis=-1)

def filter_json_particles_data(json_data_dict, current_rollout_filename):
    good = True
    invalid_particle_ids = []
    for particle_id, particle in enumerate(json_data_dict["particle_positions"]):
        if len(particle) != json_data_dict["end_frame"]+1:
            invalid_particle_ids.append(particle_id)
    
    if len(invalid_particle_ids):
        print(current_rollout_filename)
        print(invalid_particle_ids)
        good = False
    
    # for invalid_particle_id in invalid_particle_ids:
    #     del json_data_dict["particle_positions"][invalid_particle_id]
    return good

def preprocess_json_data_dict(json_rollouts_directory, particles_number = math.inf):
    max_particles_number = particles_number
    for current_rollout_filename in os.listdir(json_rollouts_directory):
        print(current_rollout_filename)
        current_json_rollout_path = json_rollouts_directory / current_rollout_filename
        with open(current_json_rollout_path) as f:
            json_data_dict = json.load(f) 
        good = filter_json_particles_data(json_data_dict, current_rollout_filename)
        if not good:
            os.remove(current_json_rollout_path) 
            continue
        # print("particle_positions: ", len(json_data_dict["particle_positions"]))
        # max_particles_number = min(len(json_data_dict["particle_positions"]), max_particles_number)
        # with open(current_json_rollout_path, "w") as f:
        #     json.dump(json_data_dict, f)

        # print(max_particles_number)

    # for current_rollout_filename in os.listdir(json_rollouts_directory):
    #     current_json_rollout_path = json_rollouts_directory / current_rollout_filename
    #     with open(current_json_rollout_path) as f:
    #         json_data_dict = json.load(f) 
    #     if len(json_data_dict["particle_positions"]) > max_particles_number:
    #         number_to_delete = len(json_data_dict["particle_positions"])-max_particles_number
    #         del json_data_dict["particle_positions"][-number_to_delete:]
    #     with open(current_json_rollout_path, "w") as f:
    #         json.dump(json_data_dict, f)

def make_h5_dataset(h5_rollouts_directory, json_rollouts_directory, train_valid_ratio):
    h5_rollouts_directory.mkdir(parents=True, exist_ok=True)

    positions_stat = np.zeros((dim_number, 3))
    velocities_stat = np.zeros((dim_number, 3))

    objects_number = None
    particles_number = None
    left_border = None
    right_border = None
    bottom_border = None
    top_border = None

    train_rollouts_number = int(len(os.listdir(json_rollouts_directory))*train_valid_ratio)
    valid_rollouts_number = len(os.listdir(json_rollouts_directory))-train_rollouts_number

    for i, current_rollout_filename in enumerate(os.listdir(json_rollouts_directory)):
        current_json_rollout_path = json_rollouts_directory / current_rollout_filename

        with open(current_json_rollout_path) as f:
            json_data_dict = json.load(f)
        
        particle_positions = np.array(json_data_dict["particle_positions"])
        object_positions = np.array(json_data_dict["object_positions"])
        if not (objects_number or 
                particles_number or 
                left_border or 
                right_border or 
                bottom_border or 
                top_border):
            objects_number = len(object_positions)
            particles_number = len(particle_positions)

            left_border = json_data_dict["left_border"]
            right_border = json_data_dict["right_border"]
            bottom_border = json_data_dict["bottom_border"]
            top_border = json_data_dict["top_border"]

        if dim_number > 2:
            particle_positions_new_shape = list(particle_positions.shape)
            object_positions_new_shape = list(object_positions.shape)
            particle_positions_new_shape[-1] = dim_number
            object_positions_new_shape[-1] = dim_number
            new_particle_positions = np.zeros(tuple(particle_positions_new_shape))
            new_object_positions = np.zeros(tuple(object_positions_new_shape))
            new_particle_positions[:, :, :-1] = particle_positions
            new_object_positions[:, :, :-1] = object_positions
            particle_positions = new_particle_positions
            object_positions = new_object_positions

        positions = np.vstack((particle_positions, object_positions))
        velocities = np.empty(positions.shape)
        number = np.empty((dim_number))
        number[:] = positions.shape[0]*positions.shape[1]
        
        current_h5_rollout_directory = None
        if i < train_rollouts_number:
            (h5_rollouts_directory / "train").mkdir(parents=True, exist_ok=True)
            current_h5_rollout_directory = h5_rollouts_directory / "train" / str(len(os.listdir(h5_rollouts_directory / "train")))
        else:
            (h5_rollouts_directory / "valid").mkdir(parents=True, exist_ok=True)
            current_h5_rollout_directory = h5_rollouts_directory / "valid" / str(len(os.listdir(h5_rollouts_directory / "valid")))
        current_h5_rollout_directory.mkdir(parents=True, exist_ok=True)

        for frame in range(json_data_dict["end_frame"]+1):
            current_frame_positions = positions[:, frame]
            current_frame_velocities = None
            if frame == 0:
                current_frame_velocities = np.zeros(current_frame_positions.shape)
            else:
                previous_frame_positions = positions[:, frame-1]
                current_frame_velocities = (current_frame_positions - previous_frame_positions)/dt
            velocities[:, frame] = current_frame_velocities

            h5_f = h5py.File(current_h5_rollout_directory / (str(frame) + ".h5"), "w")
            h5_f.create_dataset("positions", data=current_frame_positions, dtype="f4")
            h5_f.create_dataset("velocities", data=current_frame_velocities, dtype="f4")
            h5_f.close()

        current_rollout_number = int(current_json_rollout_path.stem)

        current_positions_stat = np.stack([positions.mean(axis=(0, 1)), positions.std(axis=(0, 1)), number], axis=-1)
        current_velocities_stat = np.stack([velocities.mean(axis=(0, 1)), velocities.std(axis=(0, 1)), number], axis=-1)

        positions_stat = combine_stat(positions_stat, current_positions_stat)
        velocities_stat = combine_stat(velocities_stat, current_velocities_stat)

    h5_f = h5py.File(h5_rollouts_directory / "stat.h5", "w")
    h5_f.create_dataset("positions", data=positions_stat, dtype="f4")
    h5_f.create_dataset("velocities", data=velocities_stat, dtype="f4")
    h5_f.close()

    dataset_info = {"objects_number": objects_number,
                    "particles_number": particles_number,
                    "left_border": left_border,
                    "right_border": right_border,
                    "bottom_border": bottom_border,
                    "top_border": top_border}
    file = open(h5_rollouts_directory / "info.json", "w")
    file.write(json.dumps(dataset_info))
    file.close()

    print(dataset_info)

In [19]:
preprocess_json_data_dict(json_rollouts_directory)

483.json
250.json
1.json
540.json
374.json
395.json
31.json
501.json
152.json
108.json
30.json
533.json
467.json
211.json
168.json
219.json
40.json
138.json
42.json
238.json
14.json
528.json
93.json
8.json
196.json
13.json
318.json
420.json
61.json
337.json
433.json
328.json
158.json
566.json
418.json
291.json
129.json
102.json
597.json
130.json
210.json
80.json
49.json
112.json
185.json
441.json
6.json
363.json
277.json
95.json
47.json
392.json
396.json
167.json
287.json
472.json
235.json
275.json
249.json
52.json
294.json
194.json
227.json
557.json
492.json
445.json
51.json
460.json
160.json
111.json
146.json
148.json
572.json
443.json
187.json
15.json
110.json
24.json
513.json
55.json
164.json
233.json
524.json
265.json
183.json
41.json
54.json
213.json
105.json
121.json
244.json
193.json
375.json
90.json
338.json
307.json
126.json
439.json
220.json
204.json
366.json
144.json
593.json
583.json
38.json
226.json
493.json
505.json
21.json
39.json
465.json
115.json
48.json
246.json
71.j

In [8]:
make_h5_dataset(h5_rollouts_directory, json_rollouts_directory, train_valid_ratio)

127755
[[-7.91367773e-02  2.52118260e+00  1.27755000e+05]
 [ 1.54715917e-01  2.56570177e+00  1.27755000e+05]]
[[ 2.01199627e-02  2.83799128e-01  1.27755000e+05]
 [-2.32127138e-02  2.50359352e-01  1.27755000e+05]]
127755
[[-1.65551938e-01  2.57348607e+00  2.55510000e+05]
 [ 3.77180909e-02  2.60082554e+00  2.55510000e+05]]
[[3.15245130e-02 3.36880163e-01 2.55510000e+05]
 [9.04791320e-04 2.56358944e-01 2.55510000e+05]]


KeyboardInterrupt: 

In [32]:
test_data_path = 'data/test_data_sample/0.json'
data_dict = None
with open(test_data_path) as f:
    data_dict0 = json.load(f)

In [33]:
len(data_dict0["particle_positions"])

199

In [5]:
data_dict0["end_frame"]

300

In [57]:
stat = h5py.File('../DPI-Net/data/data_DustBox/train/5/237.h5', 'r')

In [58]:
np.array(stat["velocities"]).shape

(255, 2)

In [49]:
stats = h5py.File('../DPI-Net/data/data_DustBox/stat.h5', 'r')

In [53]:
stats["velocities"]

<HDF5 dataset "velocities": shape (2, 3), type "<f8">

In [70]:
data = load_data(["positions", "velocities"], '../DPI-Net/data/data_DustBox/train/5/237.h5')

In [73]:
p, v = data

In [74]:
v.shape

(255, 2)

In [5]:
info_path = 'h5_data_fin/info.json'

time_step = 501
data_names = ['positions', 'velocities']

position_dim = 2

phases_dict = {}
phases_dict["instance_idx"] = [0, 200, 200+56]
phases_dict["root_num"] = [[], []]
phases_dict["instance"] = ['dust', 'air_rigid']
phases_dict["material"] = ['dust', 'air_rigid']

def load_data(data_names, path):
    hf = h5py.File(path, 'r')
    data = []
    for i in range(len(data_names)):
        d = np.array(hf.get(data_names[i]))
        data.append(d)
    hf.close()

    return data

with open(info_path) as f:
    info_dict = json.load(f)

tmp_json_data = {'end_frame': time_step-3,
                'left_border': info_dict['left_border'],
                'right_border': info_dict['right_border'],
                'bottom_border': info_dict['bottom_border'],
                'top_border': info_dict['top_border'],
                'object_positions': [[]for _ in range(phases_dict["instance_idx"][2]-phases_dict["instance_idx"][1])], 
                'particle_positions': [[]for _ in range(phases_dict["instance_idx"][1]-phases_dict["instance_idx"][0])],
                'particle_velocities': [[]for _ in range(phases_dict["instance_idx"][1]-phases_dict["instance_idx"][0])]}

for step in range(time_step - 1):
    data_path = 'h5_data_fin/valid/0/'+str(step) + '.h5'
    data_nxt_path = 'h5_data_fin/valid/0/'+str(step + 1) + '.h5'

    data = load_data(data_names, data_path)
    data_nxt = load_data(data_names, data_nxt_path)
    # velocities_nxt = data_nxt[1]

    if step == 0:
        positions, velocities = data
        n_shapes = 0
        scene_params = np.zeros(1)

        count_nodes = positions.shape[0]
        n_particles = count_nodes - n_shapes
        print("n_particles", n_particles)
        print("n_shapes", n_shapes)

        p_gt = np.zeros((time_step - 1, n_particles + n_shapes, position_dim))
        v_nxt_gt = np.zeros((time_step - 1, n_particles + n_shapes, position_dim))

        p_pred = np.zeros((time_step - 1, n_particles + n_shapes, position_dim))

    # p_gt[step] = data[0][:, -position_dim:]
    # v_nxt_gt[step] = data_nxt[1][:, -position_dim:]

    p_gt[step] = data[0]
    v_nxt_gt[step] = data_nxt[1]

    # print(step, np.sum(np.abs(v_nxt_gt[step, :args.n_particles])))

    # positions = positions + data[1] * args.dt

    # for i, position in enumerate(data[0][:phases_dict["instance_idx"][1]]):
    #     tmp_json_data['particle_positions'][i].append(position.tolist())
    # for i, position in enumerate(data[0][phases_dict["instance_idx"][1]:phases_dict["instance_idx"][2]]):
    #     tmp_json_data['object_positions'][i].append(position.tolist())
    # for i, velocity in enumerate(data_nxt[1][:phases_dict["instance_idx"][1]]):
    #     tmp_json_data['particle_velocities'][i].append(velocity.tolist())

    for i, position in enumerate(p_gt[step, :phases_dict["instance_idx"][1]]):
        tmp_json_data['particle_positions'][i].append(position.tolist())
    for i, position in enumerate(p_gt[step, phases_dict["instance_idx"][1]:phases_dict["instance_idx"][2]]):
        tmp_json_data['object_positions'][i].append(position.tolist())
    for i, velocity in enumerate(v_nxt_gt[step, :phases_dict["instance_idx"][1]]):
        tmp_json_data['particle_velocities'][i].append(velocity.tolist())
    
file = open("test.json", "w")
file.write(json.dumps(tmp_json_data))
file.close()

n_particles 256
n_shapes 0
