In [1]:
import numpy as np
import pandas as pd 
import h5py
import time 

In [5]:
#getting a dataframe first
def get_2D_data_from_h5_filtered_np(h5_path, part_name, Slice_name):
    #opening h5 and getting the data
    start_time = time.time()

    with h5py.File(h5_path,'r') as h5:

        Y_Axis = np.array(h5[part_name][Slice_name]['Y-Axis'][:]).astype(int)
        Area = np.array(h5[part_name][Slice_name]['Area'][:]).astype(int)
        Intensity = np.array(h5[part_name][Slice_name]['Intensity'][:]).astype(int)
        X_Axis = np.array(h5[part_name][Slice_name]['X-Axis'][:]).astype(int)

        X_Axis_size = X_Axis.size
        Y_Axis_size = Y_Axis.size
        Area_size = Area.size
        Intensity_size = Intensity.size

        #if dimensions aren't equal the following code block is entered
        if not X_Axis_size == Y_Axis_size == Area_size == Intensity_size:

            #determine the lowest value among the different sizes
            size_arr = np.array([X_Axis_size, Y_Axis_size, Area_size, Intensity_size])
            min_size = size_arr.min()

            if X_Axis_size != min_size:
                diff_size_x = X_Axis_size - min_size #calculating the difference between the actual value and the minimum and substracting it from the array
                X_Axis_new = np.delete(X_Axis, -diff_size_x)
                X_Axis = X_Axis_new
                X_Axis_size = X_Axis.size

            if Y_Axis_size != min_size:
                diff_size_y = Y_Axis_size - min_size
                Y_Axis_new = np.delete(Y_Axis, -diff_size_y)
                Y_Axis = Y_Axis_new
                Y_Axis_size = Y_Axis.size

            if Area_size != min_size:
                diff_size_area = Area_size - min_size
                Area_new = np.delete(Area, -diff_size_area)
                Area = Area_new
                Area_size = Area.size

            if Intensity_size != min_size:
                diff_size_intensity = Intensity_size - min_size
                Intensity_new = np.delete(Intensity, -diff_size_intensity)
                Intensity = Intensity_new
                Intensity_size = Intensity.size

            #by reducing all the dimensions to the minimum equal dimensions are guaranteed
            #there is a risk of deleting more than just one datapoint without noticing -> maybe add an alert after more than 5(?) while iterations
        print(str(X_Axis_size)+ ' datapoints found')
        combos = np.stack((X_Axis, Y_Axis, Area, Intensity), axis=-1)

        #filtering out the datapoints where area and intensity are =0
        area_zeros = np.where(combos[:,2]== 0)
        intensity_zeros = np.where(combos[:,3]==0)
        zero_area_intensity_indices = np.intersect1d(area_zeros, intensity_zeros) #array of indices where area AND intensity are = 0

        #deleting all the datapoints where area AND intensity are = 0
        combos_wo_only_zeros = np.delete(combos, zero_area_intensity_indices, axis=0)
        print(str(combos_wo_only_zeros.shape[0]) + ' datapoints where area != 0 AND intensity != 0')

        combos_wo_only_zeros_unique, unique_indices = np.unique(combos_wo_only_zeros[:,[0,1]],axis=0, return_index = True)
        combos_unique = combos_wo_only_zeros[unique_indices]
        print(str(combos_unique.shape[0]) + ' unique datapoints where area != 0 AND intensity != 0')

        Index_range = np.arange(combos_wo_only_zeros.shape[0])
        indices_of_interest = np.setdiff1d(Index_range, unique_indices) #all the indices belonging to non unique x,y-combinations

        combo_processed_array = np.empty([0,4],dtype= int)
        start_time = time.time()
        combos_wo_only_zeros_copy = np.copy(combos_wo_only_zeros)
        index_counter = 0
        shape_counter = 0
        indices_list = []

        print("vor iterieren %s seconds ---" % (time.time() - start_time))
        for index in indices_of_interest:
            xy_combo = combos_wo_only_zeros[:,[0,1]][index]
            if np.where((combo_processed_array[:,0] == xy_combo[0])*(combo_processed_array[:,1] == xy_combo[1]))[0].size == 0:
                index_counter += 1
                xy_combo = combos_wo_only_zeros[:,[0,1]][index]
                indices_relevant = np.where((combos_wo_only_zeros[:,0] == xy_combo[0])*(combos_wo_only_zeros[:,1] == xy_combo[1]))[0]
                max_area_of_combo = np.amax(combos_wo_only_zeros[:,2][indices_relevant])
                max_intensity_of_combo = np.amax(combos_wo_only_zeros[:,3][indices_relevant])

                max_combos = np.stack((xy_combo[0], xy_combo[1], max_area_of_combo, max_intensity_of_combo), axis=-1)

                combos_wo_only_zeros_copy = np.vstack((combos_wo_only_zeros_copy, max_combos))
                shape_counter += indices_relevant.shape[0]
                indices_list.append(list(indices_relevant))

                combo_processed_array =  np.vstack((combo_processed_array, max_combos))

        indices_relevant = np.hstack(indices_list)
        combos_wo_only_zeros_copy = np.delete(combos_wo_only_zeros_copy, indices_relevant, axis = 0)
        #df = pd.DataFrame(combos_wo_only_zeros_copy, columns=['x','y','area','intensity'])
        print("df creation took %s seconds ---" % (time.time() - start_time))
        return(combos_wo_only_zeros_copy)

In [15]:
data_array = get_2D_data_from_h5_filtered_np('/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5', '0_00003_Canti3_cls', 'Slice00064')

143642 datapoints found
81568 datapoints where area != 0 AND intensity != 0
64081 unique datapoints where area != 0 AND intensity != 0
vor iterieren 0.0002865791320800781 seconds ---
df creation took 8.063780069351196 seconds ---


In [16]:
h5_path = '/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5'
Slice_name = 'Slice00064'
part_name = '0_00003_Canti3_cls'

In [17]:
def get_true_min_maxX (h5_path, part_name, max_slice_number):

    minX = []
    maxX = []
    for num_slice in range(max_slice_number):
        with h5py.File(h5_path,'r') as h5:
            X_Axis = h5[part_name]['Slice'+str("{:05d}".format(num_slice+1))]['X-Axis']
            x_axis_array = np.array(X_Axis)
            minX.append(x_axis_array.min())
            maxX.append(x_axis_array.max())
    minX_array = np.asarray(minX)
    maxX_array = np.asarray(maxX)
    return minX_array.min(), maxX_array.max()

In [18]:
def get_true_min_maxY (h5_path, part_name, max_slice_number):

    minY = []
    maxY = []
    for num_slice in range(max_slice_number):
        with h5py.File(h5_path,'r') as h5:
            Y_Axis = h5[part_name]['Slice'+str("{:05d}".format(num_slice+1))]['Y-Axis']
            y_axis_array = np.array(Y_Axis)
            minY.append(y_axis_array.min())
            maxY.append(y_axis_array.max())
    minY_array = np.asarray(minY)
    maxY_array = np.asarray(maxY)
    return minY_array.min(), maxY_array.max()

In [19]:
minX, maxX = get_true_min_maxX ('/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5', '0_00003_Canti3_cls', 142)
minY, maxY = get_true_min_maxY ('/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5', '0_00003_Canti3_cls', 142)

In [20]:
def dock_array_to_zero(array, minX, minY):
    if minX >= 0 and minY >=0:
        array[:,0] = array[:,0] - minX
        array[:,1] = array[:,1] - minY
    elif minX < 0 and minY <0:
        array[:,0] = array[:,0] + abs(minX)
        array[:,1] = array[:,1] + abs(minY)
    elif minX >= 0 and minY <0:
        array[:,0] = array[:,0] - minX
        array[:,1] = array[:,1] + abs(minY)
    elif minX < 0 and min >= 0:
        array[:,0] = array[:,0] + abs(minX)
        array[:,1] = array[:,1] - minY
    return array

In [21]:
array_docked = dock_array_to_zero(data_array, minX, minY)
array_docked

array([[4911, 3528,   59,    0],
       [4896, 3528,  118,  506],
       [4889, 3528,  166,  584],
       ...,
       [4846, 8977,  230,  537],
       [4927, 8937,  684, 1524],
       [4926, 8977,  254,  453]])

In [40]:
def create_single_voxel_array (current_n_vox_x, current_n_vox_y, voxel_size, array):
    x_min_voxel = current_n_vox_x * voxel_size
    x_max_voxel = (current_n_vox_x + 1)*voxel_size
    y_min_voxel = current_n_vox_y * voxel_size
    y_max_voxel = (current_n_vox_y + 1)*voxel_size

    x_axis_voxel =  np.repeat(np.arange(x_min_voxel,x_max_voxel,1),voxel_size)
    y_axis_voxel =  np.tile(np.arange(y_min_voxel,y_max_voxel,1),voxel_size)
    Zero_array = np.zeros(voxel_size*voxel_size, dtype=int)
    
    voxel_array = np.empty([0,4],dtype= int)
    
    #check if datapoints in array are in the region of the voxel
    indices_relevant = np.where((array_docked[:,0] >= x_min_voxel)*(array_docked[:,0] < x_max_voxel)*(array_docked[:,1] >= y_min_voxel)*(array_docked[:,1] < y_max_voxel))[0]
    
    if indices_relevant.size != 0: 
        relevant_array = array[indices_relevant]
        relevant_array[:,0] = relevant_array[:,0] - x_min_voxel
        relevant_array[:,1] = relevant_array[:,1] - y_min_voxel
        
        final_voxel_array = relevant_array  
    else:
        final_voxel_array = voxel_array
    
    return final_voxel_array

In [41]:
create_single_voxel_array(489,352,10,array_docked)

array([[  6,   8, 118, 506],
       [  8,   8, 179, 517],
       [  7,   8, 190, 528],
       [  4,   8, 176, 520],
       [  2,   8, 162, 530],
       [  0,   8, 162, 515]])

In [35]:
create_single_voxel_array(489,352,10,array_docked)

4890
3520
[[4896 3528  118  506]
 [4898 3528  179  517]
 [4897 3528  190  528]
 [4894 3528  176  520]
 [4892 3528  162  530]
 [4890 3528  162  515]]


UnboundLocalError: local variable 'final_voxel_array' referenced before assignment

In [116]:
#df[(df['x'] > x_min_voxel ) & (df['x'] < x_max_voxel ) & (df['y'] > y_min_voxel) & (df['y'] < y_max_voxel)].shape[0] != 0:

x_min_voxel = 4890
x_max_voxel = 4900
y_min_voxel = 3520
y_max_voxel = 3530
voxel_size = 10


indices = np.where((array_docked[:,0] >= x_min_voxel)*(array_docked[:,0] < x_max_voxel)*(array_docked[:,1] >= y_min_voxel)*(array_docked[:,1] < y_max_voxel))[0]#.size 
x_axis_voxel =  np.repeat(np.arange(x_min_voxel,x_max_voxel,1),voxel_size)
y_axis_voxel =  np.tile(np.arange(y_min_voxel,y_max_voxel,1),voxel_size)
Zero_array = np.zeros(voxel_size*voxel_size, dtype=int)   
dummy_array = np.stack((x_axis_voxel, y_axis_voxel, Zero_array, Zero_array), axis=-1)

In [117]:
dummy_array

array([[4890, 3520,    0,    0],
       [4890, 3521,    0,    0],
       [4890, 3522,    0,    0],
       [4890, 3523,    0,    0],
       [4890, 3524,    0,    0],
       [4890, 3525,    0,    0],
       [4890, 3526,    0,    0],
       [4890, 3527,    0,    0],
       [4890, 3528,    0,    0],
       [4890, 3529,    0,    0],
       [4891, 3520,    0,    0],
       [4891, 3521,    0,    0],
       [4891, 3522,    0,    0],
       [4891, 3523,    0,    0],
       [4891, 3524,    0,    0],
       [4891, 3525,    0,    0],
       [4891, 3526,    0,    0],
       [4891, 3527,    0,    0],
       [4891, 3528,    0,    0],
       [4891, 3529,    0,    0],
       [4892, 3520,    0,    0],
       [4892, 3521,    0,    0],
       [4892, 3522,    0,    0],
       [4892, 3523,    0,    0],
       [4892, 3524,    0,    0],
       [4892, 3525,    0,    0],
       [4892, 3526,    0,    0],
       [4892, 3527,    0,    0],
       [4892, 3528,    0,    0],
       [4892, 3529,    0,    0],
       [48

In [120]:
ex_array #auffüllen mit 0en

array([[4896, 3528,  118,  506],
       [4898, 3528,  179,  517],
       [4897, 3528,  190,  528],
       [4894, 3528,  176,  520],
       [4892, 3528,  162,  530],
       [4890, 3528,  162,  515]])

In [121]:
stacked_array = np.vstack((ex_array, dummy_array))

In [122]:
stacked_array

array([[4896, 3528,  118,  506],
       [4898, 3528,  179,  517],
       [4897, 3528,  190,  528],
       [4894, 3528,  176,  520],
       [4892, 3528,  162,  530],
       [4890, 3528,  162,  515],
       [4890, 3520,    0,    0],
       [4890, 3521,    0,    0],
       [4890, 3522,    0,    0],
       [4890, 3523,    0,    0],
       [4890, 3524,    0,    0],
       [4890, 3525,    0,    0],
       [4890, 3526,    0,    0],
       [4890, 3527,    0,    0],
       [4890, 3528,    0,    0],
       [4890, 3529,    0,    0],
       [4891, 3520,    0,    0],
       [4891, 3521,    0,    0],
       [4891, 3522,    0,    0],
       [4891, 3523,    0,    0],
       [4891, 3524,    0,    0],
       [4891, 3525,    0,    0],
       [4891, 3526,    0,    0],
       [4891, 3527,    0,    0],
       [4891, 3528,    0,    0],
       [4891, 3529,    0,    0],
       [4892, 3520,    0,    0],
       [4892, 3521,    0,    0],
       [4892, 3522,    0,    0],
       [4892, 3523,    0,    0],
       [48

In [126]:
ex_array = array_docked[indices]

In [123]:
stacked_unique_xy, unique_indices = np.unique(stacked_array[:,[0,1]],axis=0, return_index = True)

In [128]:
stacked_array[unique_indices]

array([[4890, 3520,    0,    0],
       [4890, 3521,    0,    0],
       [4890, 3522,    0,    0],
       [4890, 3523,    0,    0],
       [4890, 3524,    0,    0],
       [4890, 3525,    0,    0],
       [4890, 3526,    0,    0],
       [4890, 3527,    0,    0],
       [4890, 3528,  162,  515],
       [4890, 3529,    0,    0],
       [4891, 3520,    0,    0],
       [4891, 3521,    0,    0],
       [4891, 3522,    0,    0],
       [4891, 3523,    0,    0],
       [4891, 3524,    0,    0],
       [4891, 3525,    0,    0],
       [4891, 3526,    0,    0],
       [4891, 3527,    0,    0],
       [4891, 3528,    0,    0],
       [4891, 3529,    0,    0],
       [4892, 3520,    0,    0],
       [4892, 3521,    0,    0],
       [4892, 3522,    0,    0],
       [4892, 3523,    0,    0],
       [4892, 3524,    0,    0],
       [4892, 3525,    0,    0],
       [4892, 3526,    0,    0],
       [4892, 3527,    0,    0],
       [4892, 3528,  162,  530],
       [4892, 3529,    0,    0],
       [48

In [31]:
def create_single_voxel_df (current_n_vox_x, current_n_vox_y, voxel_size, df):
    start_time = time.time()
    x_min_voxel = current_n_vox_x * voxel_size
    x_max_voxel = (current_n_vox_x + 1)*voxel_size
    y_min_voxel = current_n_vox_y * voxel_size
    y_max_voxel = (current_n_vox_y + 1)*voxel_size

    x_axis_voxel_df =  np.repeat(np.arange(x_min_voxel,x_max_voxel,1),voxel_size)
    y_axis_voxel_df =  np.tile(np.arange(y_min_voxel,y_max_voxel,1),voxel_size)
    Zero_array = np.zeros(voxel_size*voxel_size, dtype=int)

    help_arr = np.column_stack((x_axis_voxel_df, y_axis_voxel_df, Zero_array, Zero_array))
    df_voxel = pd.DataFrame(help_arr, columns=['x','y','area','intensity'])


    if df[(df['x'] > x_min_voxel ) & (df['x'] < x_max_voxel ) & (df['y'] > y_min_voxel) & (df['y'] < y_max_voxel)].shape[0] != 0:
        df_voxel_added = df_voxel.append(df[(df['x'] > x_min_voxel ) & (df['x'] < x_max_voxel ) & (df['y'] > y_min_voxel) & (df['y'] < y_max_voxel)])
        df_voxel_wo_dupl = df_voxel_added.drop_duplicates(['x','y'], keep = 'last')
        df_voxel_final = df_voxel_wo_dupl.sort_values(by=['x','y'])

    else:
        df_voxel_final = df_voxel
    print(time.time()-start_time)
    return df_voxel_final


In [37]:
create_single_voxel_df(5,5,100,df_docked)

0.006753683090209961


Unnamed: 0,x,y,area,intensity
0,500,500,0,0
1,500,501,0,0
2,500,502,0,0
3,500,503,0,0
4,500,504,0,0
...,...,...,...,...
9995,599,595,0,0
9996,599,596,0,0
9997,599,597,0,0
9998,599,598,0,0
