In [87]:
import numpy as np
import math
import xml.etree.ElementTree as ET
import vtk
from vtk.util import numpy_support as VN

In [88]:
filename = 'amr_nyx.vthb'
tree = ET.parse(filename)
root = tree.getroot()

In [89]:
root.tag


'VTKFile'

In [90]:
for child in root:
    print(child.tag, child.attrib)

vtkOverlappingAMR {'origin': '0 0 0', 'grid_description': 'XYZ'}
FieldData {}


In [105]:
def create_acceptance_histogram(frac, count, orig_counts, nbins=32):
    tot_samples = frac*np.sum(count)
    #tot_samples = frac*np.sum(orig_count)
    print('looking for',tot_samples,'samples')
    # create a dictionary first
    my_dict = dict() 
    ind = 0
    for i in count:
        my_dict[ind] = i
        ind = ind + 1
    print(my_dict)
    sorted_count = sorted(my_dict, key=lambda k: my_dict[k])
    print(sorted_count)
    ## now distribute across bins
    target_bin_vals = int(tot_samples/nbins)
    print('ideal cut:',target_bin_vals)
    new_count = np.copy(count)
    ind = 0
    remain_tot_samples = tot_samples
    for i in sorted_count:
        if my_dict[i]>target_bin_vals:
            val = target_bin_vals
        else:
            val = my_dict[i]
            remain = target_bin_vals-my_dict[i]
        new_count[i]=val
        #print(new_count[i], target_bin_vals)
        ind = ind + 1
        remain_tot_samples = remain_tot_samples-val
        if ind < nbins:
            target_bin_vals = int(remain_tot_samples/(nbins-ind))
    print(new_count)  
    print(count) 
    acceptance_hist = new_count/count
    np.nan_to_num(acceptance_hist,nan=0.0)
    ## new code for AMR
    now_expected_samples = np.sum(acceptance_hist*orig_counts)
    orig_expected_samples = frac*np.sum(orig_counts)
    cur_factor = now_expected_samples/orig_expected_samples
    acceptance_hist = acceptance_hist/cur_factor
    return acceptance_hist

def sample_using_acceptance(data,acceptance_hist,bound_min, bound_max):
    s = data
    tot_pts = np.size(s)
    prob_vals = np.zeros_like(s)
    stencil = np.zeros_like(s)
    rand_vals = np.random.random_sample(tot_pts)
    # bound_min = np.min(s)
    # bound_max = np.max(s)
    for i in range(tot_pts):
        loc = s.flatten()[i]
        x_id = int(nbins * (loc - bound_min) / (bound_max - bound_min))
        if x_id == nbins:
            x_id = x_id - 1
        prob_vals[i]=acceptance_hist[x_id]
    stencil[rand_vals<prob_vals]=1
    #print("actually generating samples: ",np.sum(stencil))
    # now use this stencil array to store the locations
    int_inds = np.where(stencil>0.5)
    # plt.imshow(one_img, cmap='Greys')
    # plt.show()
    # plt.imshow(stencil.reshape((28,28)), cmap='Greys')
    # plt.show()
    return int_inds,stencil

def read_from_VTI_file(infile):
            reader = vtk.vtkXMLImageDataReader()
            reader.SetFileName(infile)
            reader.Update()
            data = reader.GetOutput()
            dim = data.GetDimensions()

            x = np.zeros(data.GetNumberOfPoints())
            y = np.zeros(data.GetNumberOfPoints())
            z = np.zeros(data.GetNumberOfPoints())

            for i in range(data.GetNumberOfPoints()):
                x[i],y[i],z[i] = data.GetPoint(i)

            name = data.GetCellData().GetArrayName(0)
            vals = data.GetCellData().GetArray(name)
            # print(vals)
            vals_np = VN.vtk_to_numpy(vals)
            #print(np.shape(vals_np))
            return x,y,z,vals_np

In [101]:
spacings = []
files = []
tot_pts = 0
glob_min = 999999999999999.9
glob_max = -99999999.9


for neighbor in root.iter('Block'):
    #print(neighbor.attrib['spacing'])
    spacing = float(neighbor.attrib['spacing'].split()[0])
    #print(spacing)
    spacings.append(spacing)
    for data in neighbor.iter('DataSet'):
        #print(data.attrib,data.attrib['file'])
        file = data.attrib['file']
        files.append(file)
        amr_box = data.attrib['amr_box']
        #print(file, amr_box)
        x,y,z,var_vals = read_from_VTI_file(file)
        max_val = np.max(var_vals)
        min_val = np.min(var_vals)
        if glob_max<max_val:
            glob_max=max_val
        if glob_min>min_val:
            glob_min=min_val
            
        tot_pts = tot_pts + np.size(var_vals)
            
print(glob_max, glob_min, tot_pts)

70163645943.81984 1070439457.9953088 569344


In [102]:
np_spacings = np.asarray(spacings)
min_val = np_spacings[np_spacings.argmin()]
int_spacing = np.rint(np_spacings/min_val).astype(int)

nbins = 32
# hist_arr = np.zeros((nbins),'int64')
# orig_hist_arr = np.zeros((nbins),'int64')

hist_arr = np.zeros((nbins),'float64')
orig_hist_arr = np.zeros((nbins),'float64')

In [103]:
cnt = 0
for neighbor in root.iter('Block'):
    mult_fact = (int_spacing[cnt])**1 # 3
    #mult_fact = 1
    cnt += 1
    for data in neighbor.iter('DataSet'):
        #print(data.attrib,data.attrib['file'])
        file = data.attrib['file']
        files.append(file)
        amr_box = data.attrib['amr_box']
        #print(file, amr_box)
        x,y,z,var_vals = read_from_VTI_file(file)
        arr, edgs = np.histogram(var_vals,bins=nbins, range = [glob_min,glob_max])
        hist_arr = hist_arr + arr*mult_fact
        orig_hist_arr = orig_hist_arr + arr
        print(mult_fact, np.sum(arr),np.sum(arr*mult_fact))

16 32768 524288
16 32768 524288
16 32768 524288
16 32768 524288
16 32768 524288
16 32768 524288
16 32768 524288
16 32768 524288
8 16384 131072
8 16384 131072
8 8192 65536
8 8192 65536
8 16384 131072
8 8192 65536
4 16384 65536
4 16384 65536
4 8192 32768
4 8192 32768
4 16384 65536
4 8192 32768
2 16384 32768
2 16384 32768
2 16384 32768
2 16384 32768
2 16384 32768
2 16384 32768
1 16384 16384
1 16384 16384
1 8192 8192
1 8192 8192
1 8192 8192
1 4096 4096


In [107]:
print("scaled histogram:",hist_arr, np.sum(hist_arr))
print("original histogram:",orig_hist_arr, np.sum(orig_hist_arr))
sampling_rate = 0.01
accept_hist = create_acceptance_histogram(0.01,hist_arr,orig_hist_arr)
print(accept_hist)
orig_expected_samples = np.sum(orig_hist_arr*sampling_rate)
cur_exepected_samples = np.sum(orig_hist_arr*accept_hist)
print(orig_expected_samples,cur_exepected_samples,np.sum(orig_hist_arr),np.sum(hist_arr))



scaled histogram: [6.017600e+04 1.532524e+06 1.972372e+06 8.603980e+05 3.728490e+05
 1.890370e+05 1.022500e+05 6.558000e+04 4.697100e+04 3.596100e+04
 2.725400e+04 2.200000e+04 1.653900e+04 9.306000e+03 6.108000e+03
 4.168000e+03 3.092000e+03 2.511000e+03 1.682000e+03 1.055000e+03
 8.780000e+02 7.470000e+02 5.680000e+02 5.460000e+02 5.900000e+02
 7.800000e+02 6.360000e+02 3.300000e+02 8.200000e+01 5.600000e+01
 3.000000e+01 1.200000e+01] 5337088.0
original histogram: [3.7680e+03 9.8478e+04 1.4047e+05 8.5489e+04 5.9057e+04 3.9698e+04
 2.4952e+04 2.1026e+04 1.9045e+04 1.8086e+04 1.5017e+04 1.2564e+04
 9.9300e+03 6.1740e+03 4.2270e+03 2.9810e+03 2.0960e+03 1.7330e+03
 1.1330e+03 7.4300e+02 5.2900e+02 4.9200e+02 3.0700e+02 2.6700e+02
 2.5100e+02 3.2800e+02 2.7900e+02 1.4200e+02 3.7000e+01 2.6000e+01
 1.3000e+01 6.0000e+00] 569344.0
looking for 53370.880000000005 samples
{0: 60176.0, 1: 1532524.0, 2: 1972372.0, 3: 860398.0, 4: 372849.0, 5: 189037.0, 6: 102250.0, 7: 65580.0, 8: 46971.0, 9: 3

In [108]:
samps_taken = 0
gx_list = []
gy_list = []
gz_list = []
gval_list = []
for neighbor in root.iter('Block'):
    for data in neighbor.iter('DataSet'):
        x_list = []
        y_list = []
        z_list = []
        val_list = []
        #print(data.attrib,data.attrib['file'])
        file = data.attrib['file']
        files.append(file)
        amr_box = data.attrib['amr_box']
        #print(file, amr_box)
        x,y,z,var_vals = read_from_VTI_file(file)
        
        local_data = var_vals
        #data_locs = np.where(ghost<0.5)
        #data_locs_inds = np.asarray(data_locs)[0]
        arr, edgs = np.histogram(local_data,bins=nbins, range = [glob_min,glob_max])
        #print('local histogram:',arr)
        inds, stencil = sample_using_acceptance(local_data, accept_hist,glob_min, glob_max)
        #print(np.sum(stencil))
        samps_taken += np.sum(stencil)
        val_list = np.append(val_list, local_data[inds])
        gval_list = np.append(gval_list, local_data[inds])
        
        x_list = np.append(x_list,x[inds])
        y_list = np.append(y_list,y[inds])
        z_list = np.append(z_list,z[inds])

        gx_list = np.append(gx_list,x[inds])
        gy_list = np.append(gy_list,y[inds])
        gz_list = np.append(gz_list,z[inds])

print("samples actually taken:",samps_taken)
# print(x_list)
np.save('x_vals.npy',np.asarray(gx_list))
np.save('y_vals.npy',np.asarray(gy_list))
np.save('z_vals.npy',np.asarray(gz_list))
np.save('var_vals.npy',np.asarray(gval_list))
        

local histogram: [  106  4900 12302  8552  3830  1597   797   324   157    90    39    38
    14     8     6     2     2     1     0     0     0     0     1     0
     0     2     0     0     0     0     0     0]
40.0
local histogram: [  586 14350 12281  3746  1118   410   172    52    28    11     7     5
     1     1     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0]
16.0
local histogram: [  289 10428 14280  5433  1450   537   213    96    33     9     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0]
17.0
local histogram: [  374 12116 14425  4465  1018   239    84    29     8     7     3     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0]
21.0
local histogram: [  637 11901 14351  4310  1081   286   123    39    24     7     5     2
     2     0     0     0     0     0  