In [2]:
import pandas as pd
import numpy as np
import h5py
import time 
import matplotlib.pyplot as plt

In [3]:
def get_2D_data_from_h5_filtered(h5_path, part_name, Slice_name, mode):
    #Step 1: getting the data from the h5
    start_time = time.time()
    with h5py.File(h5_path,'r') as h5:
        X_Axis = h5[part_name][Slice_name]['X-Axis']
        Y_Axis = h5[part_name][Slice_name]['Y-Axis']
        Area = h5[part_name][Slice_name]['Area']
        Intensity = h5[part_name][Slice_name]['Intensity']

        X_Axis_size = X_Axis.size
        Y_Axis_size = Y_Axis.size
        Area_size = Area.size
        Intensity_size = Intensity.size

        #if dimensions aren't equal the following code block is entered
        if not X_Axis_size == Y_Axis_size == Area_size == Intensity_size:

            #determine the lowest value among the different sizes
            size_arr = np.array([X_Axis_size, Y_Axis_size, Area_size, Intensity_size])
            min_size = size_arr.min()

            if X_Axis_size != min_size:
                diff_size_x = X_Axis_size - min_size #calculating the difference between the actual value and the minimum and substracting it from the array
                X_Axis_new = np.delete(X_Axis, -diff_size_x)
                X_Axis = X_Axis_new
                X_Axis_size = X_Axis.size

            if Y_Axis_size != min_size:
                diff_size_y = Y_Axis_size - min_size
                Y_Axis_new = np.delete(Y_Axis, -diff_size_y)
                Y_Axis = Y_Axis_new
                Y_Axis_size = Y_Axis.size

            if Area_size != min_size:
                diff_size_area = Area_size - min_size
                Area_new = np.delete(Area, -diff_size_area)
                Area = Area_new
                Area_size = Area.size

            if Intensity_size != min_size:
                diff_size_intensity = Intensity_size - min_size
                Intensity_new = np.delete(Intensity, -diff_size_intensity)
                Intensity = Intensity_new
                Intensity_size = Intensity.size


        #by reducing all the dimensions to the minimum equal dimensions are guaranteed
        #there is a risk of deleting more than just one datapoint without noticing -> maybe add an alert after more than 5(?) while iterations
        help_arr = np.column_stack((X_Axis, Y_Axis, Area, Intensity))
        df_raw = pd.DataFrame(help_arr, columns=['x','y','area','intensity'])

    #Step 2: change floats to ints and remove duplicates
    df_int = df_raw.astype(int).drop_duplicates()

    #remove all rows with 0 for area and intensity
    df_int = df_int.loc[(df_int['area'] != 0) & (df_int['intensity'] != 0)]


    #Step 3: Get a df with all the rows where a certain x,y combination occurs multiple times
    df_multi_xy = df_int[df_int.duplicated(['x','y'], keep = False)]

    #Step 4: get a new df out of df_multi_xy with x,y and mean/max of area and intensity for all x,y occurences
    df_compact = pd.DataFrame(columns=['x','y','area','intensity']) #initialize df_compact

    print("vor iterieren %s seconds ---" % (time.time() - start_time))
    for ind in range (df_multi_xy.shape[0]):
        if mode == 'mean':
            area_mean = df_multi_xy.loc[(df_multi_xy['x']== df_multi_xy.iloc[ind]['x']) & (df_multi_xy['y'] == df_multi_xy.iloc[ind]['y'])]['area'].mean().astype(int)
            intensity_mean = df_multi_xy.loc[(df_multi_xy['x']== df_multi_xy.iloc[ind]['x']) & (df_multi_xy['y'] == df_multi_xy.iloc[ind]['y'])]['intensity'].mean().astype(int)
            df_compact = df_compact.append({'x': df_multi_xy.iloc[ind]['x'], 'y':df_multi_xy.iloc[ind]['y'], 'area':area_mean , 'intensity':intensity_mean}, ignore_index=True)
        if mode == 'max':
            area_max = df_multi_xy.loc[(df_multi_xy['x']== df_multi_xy.iloc[ind]['x']) & (df_multi_xy['y'] == df_multi_xy.iloc[ind]['y'])]['area'].max().astype(int)
            intensity_max = df_multi_xy.loc[(df_multi_xy['x']== df_multi_xy.iloc[ind]['x']) & (df_multi_xy['y'] == df_multi_xy.iloc[ind]['y'])]['intensity'].max().astype(int)
            df_compact = df_compact.append({'x': df_multi_xy.iloc[ind]['x'], 'y':df_multi_xy.iloc[ind]['y'], 'area':area_max , 'intensity':intensity_max}, ignore_index=True)
    df_compact = df_compact.drop_duplicates()

    #Step 5: remove df_multi_xy from df_int and append df_compact
    df_multi_xy_removed = df_int.drop(df_int[df_int.duplicated(['x','y'], keep = False)].index)

    df_final = df_multi_xy_removed.append(df_compact)
    print("df creation took %s seconds ---" % (time.time() - start_time))
    return (df_final)

In [4]:
df_undocked = get_2D_data_from_h5_filtered('/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5', '0_00003_Canti3_cls', 'Slice00003', 'mean')

vor iterieren 0.06362318992614746 seconds ---
df creation took 84.35257935523987 seconds ---


In [5]:
def get_true_min_maxX (h5_path, part_name, max_slice_number):

    minX = []
    maxX = []
    for num_slice in range(max_slice_number):
        with h5py.File(h5_path,'r') as h5:
            X_Axis = h5[part_name]['Slice'+str("{:05d}".format(num_slice+1))]['X-Axis']
            x_axis_array = np.array(X_Axis)
            minX.append(x_axis_array.min())
            maxX.append(x_axis_array.max())
    minX_array = np.asarray(minX)
    maxX_array = np.asarray(maxX)
    return minX_array.min(), maxX_array.max()

In [6]:
def get_true_min_maxY (h5_path, part_name, max_slice_number):

    minY = []
    maxY = []
    for num_slice in range(max_slice_number):
        with h5py.File(h5_path,'r') as h5:
            Y_Axis = h5[part_name]['Slice'+str("{:05d}".format(num_slice+1))]['Y-Axis']
            y_axis_array = np.array(Y_Axis)
            minY.append(y_axis_array.min())
            maxY.append(y_axis_array.max())
    minY_array = np.asarray(minY)
    maxY_array = np.asarray(maxY)
    return minY_array.min(), maxY_array.max()

In [7]:
minX = get_true_min_maxX('/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5', '0_00003_Canti3_cls', 142)[0]
maxX = get_true_min_maxX('/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5', '0_00003_Canti3_cls', 142)[1]
minY = get_true_min_maxY('/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5', '0_00003_Canti3_cls', 142)[0]
maxY = get_true_min_maxY('/home/jan/Documents/CodeTDMStoHDF/Ausgangsdaten/examplerRun.h5', '0_00003_Canti3_cls', 142)[1]

In [8]:
length_x = int(maxX-minX)

In [9]:
length_y = int(maxY-minY)

In [10]:
def dock_df_to_zero(df, minX, minY):
    if minX >= 0 and minY >=0:
        df['x'] = df['x'] - minX
        df['y'] = df['y'] - minY
    elif minX < 0 and minY <0:
        df['x'] = df['x'] + abs(minX)
        df['y'] = df['y'] + abs(minY)
    elif minX >= 0 and minY <0:
        df['x'] = df['x'] - minX
        df['y'] = df['y'] + abs(minY)
    elif minX < 0 and min >= 0:
        df['x'] = df['x'] + abs(minX)
        df['y'] = df['y'] - minY
    return df

In [11]:
df = dock_df_to_zero(df_undocked, minX, minY)

In [13]:
#creating an empty dataframe with all possible combinations of x and y values 

In [17]:
x_Axis = np.repeat(np.arange(0,length_x+1,1),length_y+1)
y_Axis = np.tile(np.arange(0,length_y+1,1),length_x+1)

In [18]:
x_Axis

array([   0,    0,    0, ..., 4946, 4946, 4946])

In [19]:
y_Axis.size

44508159

In [29]:
#creating a dataframe to be filled with x_Axis, y_Axis data and zeros for intensity and area

In [20]:
array_0s  = np.zeros((x_Axis.size,), dtype=int)

In [21]:
help_arr = np.column_stack((x_Axis, y_Axis, array_0s, array_0s))
df_0s= pd.DataFrame(help_arr, columns=['x','y','area','intensity'])

In [27]:
df_0s = 0

In [22]:
df

Unnamed: 0,x,y,area,intensity
971,3519,3578,203,545
973,3535,3578,248,520
974,3541,3578,221,521
975,3551,3578,194,1017
976,3557,3578,265,932
...,...,...,...,...
17075,3528,8817,298,530
17082,3649,8977,347,577
17088,3528,8897,240,504
17095,3569,8977,183,474


In [23]:
df_with_dupl = df_0s.append(df)

In [28]:
df_with_dupl = 0

In [24]:
df_wo_dupl = df_with_dupl.drop_duplicates(['x','y'], keep = 'last')

In [25]:
df_wo_dupl.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 44508159 entries, 0 to 17102
Data columns (total 4 columns):
x            object
y            object
area         object
intensity    object
dtypes: object(4)
memory usage: 1.7+ GB


In [29]:
df_final = df_wo_dupl.sort_values(by=['x', 'y']).reset_index(drop = True)

In [30]:
df_final.tail()

Unnamed: 0,x,y,area,intensity
44508154,4946,8992,0,0
44508155,4946,8993,236,531
44508156,4946,8994,0,0
44508157,4946,8995,212,537
44508158,4946,8996,232,555


In [23]:
df_ex_1 = pd.DataFrame({"x": [5,6,7,8],
                        "y": [1,1,1,1],
                        "area": [10, 13, 21, 10],
                        "intensity": [12,43,34,12]})

In [24]:
df_ex_1

Unnamed: 0,x,y,area,intensity
0,5,1,10,12
1,6,1,13,43
2,7,1,21,34
3,8,1,10,12


In [25]:
df_ex_2 = pd.DataFrame({"x": [1,2,3,4,5,6,7,8],
                        "y": [1,1,1,1,1,1,1,1],
                        "area": [0,0,0,0,0,0,0,0],
                        "intensity": [0,0,0,0,0,0,0,0]})

In [26]:
df_ex_2

Unnamed: 0,x,y,area,intensity
0,1,1,0,0
1,2,1,0,0
2,3,1,0,0
3,4,1,0,0
4,5,1,0,0
5,6,1,0,0
6,7,1,0,0
7,8,1,0,0


In [27]:
df_ex_3 = df_ex_2.append(df_ex_1)

In [28]:
df_ex_4 = df_ex_3.drop_duplicates(['x','y'], keep = 'last')

In [30]:
df_ex_5 = df_ex_4.sort_values(by=['area', 'intensity']).reset_index(drop = True)

In [31]:
df_ex_5

Unnamed: 0,x,y,area,intensity
0,1,1,0,0
1,2,1,0,0
2,3,1,0,0
3,4,1,0,0
4,5,1,10,12
5,8,1,10,12
6,6,1,13,43
7,7,1,21,34


In [32]:
#practicing the creation of a new df out of the old one based on conditions

In [45]:
result = pd.concat([df_ex_5.iloc[:2],  df_ex_5.iloc[[2,4]]])

In [46]:
result

Unnamed: 0,x,y,area,intensity
0,1,1,0,0
1,2,1,0,0
2,3,1,0,0
4,5,1,10,12


In [49]:
voxel_size = 20

In [73]:
start_time = time.time()
voxel_df = df_final[(df_final['x'] > 50 ) & (df_final['x'] < 300 ) & (df_final['y'] < 300) & (df_final['y'] > 50)]
print("voxel filling took %s seconds ---" % (time.time() - start_time))                                              
#dfObj[(dfObj['x'] < 20) & (dfObj['y'] < 20) ]

voxel filling took 6.743647575378418 seconds ---


In [75]:
voxel_df['y'].values.astype(int)

array([ 51,  52,  53, ..., 297, 298, 299])

In [70]:
voxel_hdf = h5py.File('/home/jan/Documents/Voxel_Erstellung/HDFs/voxel_new_filling_method.hdf5', "w")
voxel_hdf.close()

In [61]:
voxel_size = 100
np.repeat(np.arange(0,voxel_size,1),voxel_size)

array([ 0,  0,  0, ..., 99, 99, 99])

In [71]:
num_voxels_x = 20
num_voxels_y = 20
voxel_size = 100
num_z = 1

for n_vox_y_init in range(num_voxels_y): #iterating over number of voxels in y-direction
    for n_vox_x_init in range(num_voxels_x):#iterating over number of voxels in x-direction
        #print('n_vox_x_init: '+ str(n_vox_x_init))
            start_time = time.time()
            voxel_df = df_final[(df_final['x'] >= voxel_size*num_voxels_x ) & (df_final['x'] < voxel_size*(num_voxels_x+1)) & (df_final['y'] >= voxel_size*num_voxels_y) & (df_final['y'] < voxel_size*(num_voxels_y+1))]
            
            with h5py.File('/home/jan/Documents/Voxel_Erstellung/HDFs/voxel_new_filling_method.hdf5', "a") as voxel_hdf:
                #creating a voxel with the numbers of voxels in both direction in its name and filling it with data
                #if group is already existing don't create a new group
                if 'voxel_{}_{}_{}'.format(n_vox_x_init,n_vox_y_init, num_z) not in voxel_hdf:
                    voxel_hdf.create_group('voxel_{}_{}_{}'.format(n_vox_x_init,n_vox_y_init,num_z))
                #voxel_hdf['voxel_{}_{}_{}'.format(n_vox_x_init,n_vox_y_init,num_z)].create_group('slice_1') #-num_z*num_slices_vox wegen
                voxel_hdf['voxel_{}_{}_{}'.format(n_vox_x_init,n_vox_y_init,num_z)].create_dataset('X-Axis',data = np.repeat(np.arange(0,voxel_size,1),voxel_size))
                voxel_hdf['voxel_{}_{}_{}'.format(n_vox_x_init,n_vox_y_init,num_z)].create_dataset('Y-Axis',data = np.tile(np.arange(0,voxel_size,1),voxel_size))
                voxel_hdf['voxel_{}_{}_{}'.format(n_vox_x_init,n_vox_y_init,num_z)].create_dataset('Area', data = voxel_df['area'].values.astype(int))
                voxel_hdf['voxel_{}_{}_{}'.format(n_vox_x_init,n_vox_y_init,num_z)].create_dataset('Intensity', data = voxel_df['intensity'].values.astype(int))
           
            print("voxel filling took %s seconds ---" % (time.time() - start_time))  
            print('filling voxel_{}_{}_{}'.format(n_vox_x_init,n_vox_y_init, num_z))
    

voxel filling took 6.828061580657959 seconds ---
filling voxel_0_0_1
voxel filling took 6.7900002002716064 seconds ---
filling voxel_1_0_1
voxel filling took 6.729922771453857 seconds ---
filling voxel_2_0_1
voxel filling took 6.728012561798096 seconds ---
filling voxel_3_0_1
voxel filling took 6.7176947593688965 seconds ---
filling voxel_4_0_1
voxel filling took 6.805440902709961 seconds ---
filling voxel_5_0_1
voxel filling took 6.812559604644775 seconds ---
filling voxel_6_0_1
voxel filling took 6.849747657775879 seconds ---
filling voxel_7_0_1
voxel filling took 6.8462207317352295 seconds ---
filling voxel_8_0_1
voxel filling took 6.822354793548584 seconds ---
filling voxel_9_0_1
voxel filling took 6.875306844711304 seconds ---
filling voxel_10_0_1
voxel filling took 7.002715110778809 seconds ---
filling voxel_11_0_1
voxel filling took 7.098147392272949 seconds ---
filling voxel_12_0_1
voxel filling took 7.107427597045898 seconds ---
filling voxel_13_0_1
voxel filling took 8.243619

voxel filling took 6.53455662727356 seconds ---
filling voxel_18_5_1
voxel filling took 6.608061790466309 seconds ---
filling voxel_19_5_1
voxel filling took 6.477171421051025 seconds ---
filling voxel_0_6_1
voxel filling took 6.450547456741333 seconds ---
filling voxel_1_6_1
voxel filling took 6.6956586837768555 seconds ---
filling voxel_2_6_1
voxel filling took 6.576360464096069 seconds ---
filling voxel_3_6_1
voxel filling took 6.476171255111694 seconds ---
filling voxel_4_6_1
voxel filling took 6.329339981079102 seconds ---
filling voxel_5_6_1
voxel filling took 6.250988245010376 seconds ---
filling voxel_6_6_1
voxel filling took 6.3370749950408936 seconds ---
filling voxel_7_6_1
voxel filling took 6.428545951843262 seconds ---
filling voxel_8_6_1
voxel filling took 6.413644552230835 seconds ---
filling voxel_9_6_1
voxel filling took 6.444775104522705 seconds ---
filling voxel_10_6_1
voxel filling took 6.462272882461548 seconds ---
filling voxel_11_6_1
voxel filling took 6.42671942

KeyboardInterrupt: 