##  Load the data

This code is to understand `data.py`.

This code is to load `PTVflow3D` dataset.

In [34]:
import os
import sys
import glob
import numpy as np
from scipy.spatial.transform import Rotation
from torch.utils.data import Dataset

In [35]:
fp = 'data/training_set/PTVflow3D_norm/TRAINisotropic1024coarse_s025_t003_00001.npz'
data = np.load(fp)
#pos1 = data['pos1'].astype('float32')            
keys = data.files
print('keys of data:',keys)
print('shape of pos1,pos2 and flow:',data['pos1'].shape,data['pos2'].shape,data['flow'].shape)

keys of data: ['pos1', 'pos2', 'flow']
shape of pos1,pos2 and flow: (2048, 3) (2048, 3) (2048, 3)


## Deal with training data

Note here index of n2 and index of n1 are not the same.

Note here index of n1 and index of flow are the same.

Then use the center of `pos1`  as the origin. `flow` are relative values so we do not need to remove them. 

In [43]:
npoints = 512
n1 = pos1.shape[0]
sample_idx1 = np.random.choice(n1, npoints, replace=False)
n2 = pos2.shape[0]
sample_idx2 = np.random.choice(n2, npoints, replace=False)
pos1 = pos1[sample_idx1, :]
pos2 = pos2[sample_idx2, :] 
flow = flow[sample_idx1, :]

In [44]:
# translation
pos1_center = np.mean(pos1, 0)
pos1 -= pos1_center
pos2 -= pos1_center

## Deal with test data

All use first n points.

use the center of `pos1`  as the origin

In [45]:
pos1 = pos1[:npoints, :]
pos2 = pos2[:npoints, :]

flow = flow[:npoints, :]

pos1_center = np.mean(pos1, 0)
pos1 -= pos1_center
pos2 -= pos1_center

## Understand the Dataset Class

In [11]:
class FluidflowDataset3D(Dataset):
    def __init__(self, npoints=2048, root='data_preprocessing/data_processed_maxcut_35_both_mask_20k_2k', partition='train'):
        self.npoints = npoints
        self.partition = partition
        self.root = root
        if self.partition=='train':
            self.datapath = glob.glob(os.path.join(self.root, 'TRAIN*.npz'))
        else:
            self.datapath = glob.glob(os.path.join(self.root, 'TEST*.npz'))
        self.cache = {}
        self.cache_size = 30000

        ###### deal with one bad datapoint with nan value
        self.datapath = [d for d in self.datapath if 'TRAIN_C_0140_left_0006-0' not in d]
        ######
        self.datapath.sort()
        print('length of ',self.partition, ': ',len(self.datapath))
        # print(self.partition, ': ',self.datapath)
        # datalist=np.array(self.datapath)
        # np.save('test_result/eval_allflow/datalist.npy', datalist)

    def __getitem__(self, index):
        if index in self.cache:
            # pos1, pos2, color1, color2, flow = self.cache[index]
            pos1, pos2, flow = self.cache[index]
        else:
            fn = self.datapath[index] # data/training_set/PTVflow3D_norm/TRAINisotropic1024coarse_s025_t003_00001.npz
            with open(fn, 'rb') as fp:
                data = np.load(fp)
                pos1 = data['pos1'].astype('float32')
                pos2 = data['pos2'].astype('float32')
                # color1 = data['intensity1'].astype('float32')
                # color2 = data['intensity2'].astype('float32')
                flow = data['flow'].astype('float32')
                #mask1 = data['valid_mask1']
            
            # if 's025' in fn:
            #     pos1 = pos1 * 8.
            #     pos2 = pos2 * 8.
            #     flow = flow * 8.
            # elif 's050' in fn:
            #     pos1 = pos1 * 4.
            #     pos2 = pos2 * 4.
            #     flow = flow * 4.
            # elif 's100' in fn:
            #     pos1 = pos1 * 2.
            #     pos2 = pos2 * 2.
            #     flow = flow * 2.
            # elif 'beltrami' in fn :
            #     pos1 = (pos1 + 1.) * np.pi
            #     pos2 = (pos2 + 1.) * np.pi
            #     flow = flow * np.pi

            if len(self.cache) < self.cache_size:
                # self.cache[index] = (pos1, pos2, color1, color2, flow)
                self.cache[index] = (pos1, pos2, flow)

        if self.partition == 'train':
            n1 = pos1.shape[0]
            sample_idx1 = np.random.choice(n1, self.npoints, replace=False)
            n2 = pos2.shape[0]
            sample_idx2 = np.random.choice(n2, self.npoints, replace=False)
            

            pos1 = pos1[sample_idx1, :]
            pos2 = pos2[sample_idx2, :]
            # color1 = color1[sample_idx1, :]
            # color2 = color2[sample_idx2, :]
            flow = flow[sample_idx1, :]
            #mask1 = mask1[sample_idx1]
        else:
            pos1 = pos1[:self.npoints, :]
            pos2 = pos2[:self.npoints, :]
            # color1 = color1[:self.npoints, :]
            # color2 = color2[:self.npoints, :]
            flow = flow[:self.npoints, :]
            #mask1 = mask1[:self.npoints]

        pos1_center = np.mean(pos1, 0)
        pos1 -= pos1_center
        pos2 -= pos1_center
        return pos1, pos2, flow # color1, color2, 

In [12]:
num_points = 512
folder = 'PTVflow3D_norm' # 'data_sample'
dataset_path = os.path.join('data/training_set', folder)
train_dataset = FluidflowDataset3D(npoints=num_points, root = dataset_path, partition='train')
val_dataset = FluidflowDataset3D(npoints= num_points, root = dataset_path, partition='test')
test_dataset = val_dataset

length of  train :  1
length of  test :  10
