# accessing turbulence isotropic cube files on filedb cluster from sciserver
* created volume container (turb) showing all /turb folders on filedb system
* access to turbinfo for metadata about these files
 * requires copying DataPath table
* morton curve code pip installed in sciserver container
* some special code to take into account 8x8x8 blobs with z-y-x ordering

In [3]:
%pip install morton-py

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import sys
import h5py
import math
import time
import morton
#import psutil
#import tracemalloc
#import struct
import numpy as np
import SciServer.CasJobs as cj

class IsoCube:
    def __init__(self, cube_num, cube_dimensions = 3, cube_title = ''):
        # cube size.
        self.N = cube_num
        
        # turbulence dataset name, e.g. "isotropic8192" or "isotropic1024fine".
        self.cube_title = cube_title
        
        # setting up Morton curve.
        bits = int(math.log(self.N, 2))
        self.mortoncurve = morton.Morton(dimensions = cube_dimensions, bits = bits)
        
        self.initcache()
        
    def initcache(self):
        # read SQL metadata for all of the turbulence data files into the cache
        sql = f"""
        select dbm.ProductionMachineName
        , dbm.ProductionDatabaseName
        , dbm.minLim, dbm.maxLim
        , dbm.minTime, dbm.maxTime
        , dp.path
        from databasemap dbm
           join datapath{str(self.N)} dp
             on dp.datasetid=dbm.datasetid
           and dp.productionmachinename=dbm.productionmachinename
           and dp.ProductionDatabaseName=dbm.ProductionDatabaseName
        where dbm.datasetname = '{self.cube_title}'
        order by minlim
        """
        df = cj.executeQuery(sql, "turbinfo")
        
        x, y, z = self.mortoncurve.unpack(df['minLim'].values)
        df['x_min'] = x
        df['y_min'] = y
        df['z_min'] = z
        
        x, y, z = self.mortoncurve.unpack(df['maxLim'].values)
        df['x_max'] = x
        df['y_max'] = y 
        df['z_max'] = z
        
        self.cache = df
    
    # defines some helper functions, all hardcoded (double-check this when other datasets are available)
    def parseCornerPoints(self, x_min, x_max, y_min, y_max, z_min, z_max):
        # only points 1, 2, 4, and 5 are required for finding the correct sub-boxes.
        # corner 1 is the bottom left back side origin point.
        # corner 2 is the bottom right back side corner point (same as corner 1 except at the maximum x-position).
        # corner 4 is the bottom left front side corner point (same as corner 1 except at the maximum y-positon).
        # corner 5 is the top left back corner point (same as corner 1 except at the maximum z-positon).
        # corners 2, 3, and 4 travel around the bottom plane of the box clockwise from corner 1.
        # corners 6, 7, and 8 travel around the top plane of the box clockwise from corner 5.
        c1 = (x_min, y_min, z_min)
        c2 = (x_max, y_min, z_min)
        #c3 = (x_max, y_max, z_min)
        c4 = (x_min, y_max, z_min)
        c5 = (x_min, y_min, z_max)
        #c6 = (x_max, y_min, z_max)
        #c7 = (x_max, y_max, z_max)
        #c8 = (x_min, y_max, z_max)
        
        corner_points = (c1, c2, c4, c5)
        
        return corner_points
        
    def getFilesForCornerPoints(self, x_range, y_range, z_range, var, timepoint):
        # define the corner points.
        x_min = x_range[0]; x_max = x_range[1];
        y_min = y_range[0]; y_max = y_range[1];
        z_min = z_range[0]; z_max = z_range[1];
        
        # retrieve the corner points.
        c_points = self.parseCornerPoints(x_min, x_max, y_min, y_max, z_min, z_max)
        
        database_files = []
        
        # only points 1, 2, 4, and 5 are required for finding the correct sub-boxes.
        c1_info = self.getFileForPoint(c_points[0][0], c_points[0][1], c_points[0][2], var, timepoint)
        c1_file = c1_info[0]
        database_files.append(c1_file)
        
        c2_info = self.getFileForPoint(c_points[1][0], c_points[1][1], c_points[1][2], var, timepoint)
        c2_file = c2_info[0]
        database_files.append(c2_file)
        
        c4_info = self.getFileForPoint(c_points[2][0], c_points[2][1], c_points[2][2], var, timepoint)
        c4_file = c4_info[0]
        database_files.append(c4_file)
        
        c5_info = self.getFileForPoint(c_points[3][0], c_points[3][1], c_points[3][2], var, timepoint)
        c5_file = c5_info[0]
        database_files.append(c5_file)
        
        return database_files
    
    def findSubBoxEndPoint(self, axis_range, datapoint, axis_position, db_file_comparison, var, timepoint):
        # placeholder end point value. 
        end_point = -1
        # if the difference between the axis range end points is <= to this value, then the end_point
        # has been found.
        axis_range_difference = 2
        
        end_point_found = False
        while not end_point_found:
            mid_point = math.floor((axis_range[0] + axis_range[1]) / 2)
            
            # stops recursively shrinking the box once the difference between the two end points is <= axis_range_difference.
            if (axis_range[1] - axis_range[0]) <= axis_range_difference:
                end_point_found = True
            
            # updates the datapoint to the new mid point.
            datapoint[axis_position] = mid_point
            
            # gets the db file for the new datapoint.
            datapoint_info = self.getFileForPoint(datapoint[0], datapoint[1], datapoint[2], var, timepoint)
            datapoint_file = datapoint_info[0]
            
            # compares the db file for datapoint to the origin point.
            if datapoint_file == db_file_comparison:
                end_point = mid_point
                axis_range[0] = mid_point
            else:
                end_point = mid_point - 1
                axis_range[1] = mid_point
            
            # used for checking that there were no redundant calculations
            #print(f'midpoint = {mid_point}')
            #print(f'endpoint = {end_point}')
            #print('-')
                
        return end_point
    
    def recursiveSingleDatabaseFileSubBoxes(self, box, var, timepoint, single_file_boxes):
        db_files = self.getFilesForCornerPoints(box[0], box[1], box[2], var, timepoint)
        num_db_files = len(set(db_files))

        if num_db_files == 1:
            unique_db_file = list(set(db_files))[0]
            if unique_db_file in single_file_boxes:
                raise Exception(f'{unique_db_file} is already in single_file_boxes')
            
            # stores the minLim of the box for use later when reading in the data.
            box_info = self.getFileForPoint(box[0][0], box[1][0], box[2][0], var, timepoint)
            box_minLim = box_info[3]
            
            single_file_boxes[unique_db_file] = (box, box_minLim)
            
            return
        elif db_files[0] != db_files[1]:
            # this means that the x_range was sufficiently large such that all of the points were
            # not contained in a singular database file.  i.e. the database files were different for
            # corners 1 and 2.  the data x_range will now be recursively split in half to find the first databse file endpoint
            # along this axis.

            # this value is specified as 0 because the x-axis index is 0.  this is used for determing which 
            # point (X, Y, or Z) the midpoint is going to be tested for.  in this case, this section of code
            # is adjusting only the x-axis.
            axis_position = 0
            # stores the c1 corner point (X, Y, Z) of the box to be used for finding the first box end point
            # when shrinking the x-axis into sub-boxes.
            datapoint = [box[0][0], box[1][0], box[2][0]]
            # which axis is sub-divided, in this case it is the x-axis.
            axis_range = list(box[0])
            # determine where the end x-axis point is for the first sub-box.
            first_box_end_point = self.findSubBoxEndPoint(axis_range, datapoint, axis_position, db_files[0], \
                                                                       var, timepoint)

            first_sub_box = [[box[0][0], first_box_end_point], box[1], box[2]]
            second_sub_box = [[first_box_end_point + 1, box[0][1]], box[1], box[2]]
            
            sub_boxes = []
            sub_boxes.append(first_sub_box)
            sub_boxes.append(second_sub_box)
            
            for sub_box in sub_boxes:
                self.recursiveSingleDatabaseFileSubBoxes(sub_box, var, timepoint, single_file_boxes)
        elif db_files[0] != db_files[2]:
            # this means that the y_range was sufficiently large such that all of the points were
            # not contained in a singular database file.  i.e. the database files were different for
            # corners 1 and 4.  the data y_range will now be recursively split in half to find the first databse file endpoint
            # along this axis.

            # this value is specified as 1 because the y-axis index is 1.  this is used for determing which 
            # point (X, Y, or Z) the midpoint is going to be tested for.  in this case, this section of code
            # is adjusting only the y-axis.
            axis_position = 1
            # stores the c1 corner point (X, Y, Z) of the box to be used for finding the first box end point 
            # when shrinking the y-axis into sub-boxes.
            datapoint = [box[0][0], box[1][0], box[2][0]]
            # which axis is sub-divided, in this case it is the y-axis.
            axis_range = list(box[1])
            # determine where the end y-axis point is for the first sub-box.
            first_box_end_point = self.findSubBoxEndPoint(axis_range, datapoint, axis_position, db_files[0], \
                                                                       var, timepoint)

            first_sub_box = [box[0], [box[1][0], first_box_end_point], box[2]]
            second_sub_box = [box[0], [first_box_end_point + 1, box[1][1]], box[2]]

            sub_boxes = []
            sub_boxes.append(first_sub_box)
            sub_boxes.append(second_sub_box)
            
            for sub_box in sub_boxes:
                self.recursiveSingleDatabaseFileSubBoxes(sub_box, var, timepoint, single_file_boxes)
        elif db_files[0] != db_files[3]:
            # this means that the z_range was sufficiently large such that all of the points were
            # not contained in a singular database file.  i.e. the database files were different for
            # corners 1 and 5.  the data z_range will now be recursively split in half to find the first databse file endpoint
            # along this axis.

            # this value is specified as 2 because the z-axis index is 2.  this is used for determing which 
            # point (X, Y, or Z) the midpoint is going to be tested for.  in this case, this section of code
            # is adjusting only the z-axis.
            axis_position = 2
            # stores the c1 corner point (X, Y, Z) of the box to be used for finding the first box end point 
            # when shrinking the z-axis into sub-boxes.
            datapoint = [box[0][0], box[1][0], box[2][0]]
            # which axis is sub-divided, in this case it is the z-axis.
            axis_range = list(box[2])
            # determine where the end z-axis point is for the first sub-box.
            first_box_end_point = self.findSubBoxEndPoint(axis_range, datapoint, axis_position, db_files[0], \
                                                          var, timepoint)

            first_sub_box = [box[0], box[1], [box[2][0], first_box_end_point]]
            second_sub_box = [box[0], box[1], [first_box_end_point + 1, box[2][1]]]
            
            sub_boxes = []
            sub_boxes.append(first_sub_box)
            sub_boxes.append(second_sub_box)
            
            for sub_box in sub_boxes:
                self.recursiveSingleDatabaseFileSubBoxes(sub_box, var, timepoint, single_file_boxes)
    
    def identifySingleDatabaseFileSubBoxes(self, x_range, y_range, z_range, var, timepoint):
        # initially assumes the user specified box contains points in different files. the boxes will be split up until all the points
        # in each box are from a single database file.
        box = [x_range, y_range, z_range]
        single_file_boxes = {}
        self.recursiveSingleDatabaseFileSubBoxes(box, var, timepoint, single_file_boxes)
            
        return single_file_boxes
    
    def boxesContained(self, sub_box, user_box):
        contained = False
        # checks if the sub-divided box is fully contained within the user-specified box.
        if (sub_box[0][0] >= user_box[0][0] and sub_box[0][1] <= user_box[0][1]) and \
            (sub_box[1][0] >= user_box[1][0] and sub_box[1][1] <= user_box[1][1]) and \
            (sub_box[2][0] >= user_box[2][0] and sub_box[2][1] <= user_box[2][1]):
            contained = True
        
        return contained
    
    def boxesOverlap(self, sub_box, user_box):
        overlap = False
        # checks if the sub-divided box and the user-specified box overlap on all 3 axes
        if (sub_box[0][0] <= user_box[0][1] and user_box[0][0] <= sub_box[0][1]) and \
            (sub_box[1][0] <= user_box[1][1] and user_box[1][0] <= sub_box[1][1]) and \
            (sub_box[2][0] <= user_box[2][1] and user_box[2][0] <= sub_box[2][1]):
            overlap = True
            
        return overlap
    
    def determineMinOverlapPoint(self, voxel, user_box, axis):
        min_point = None
        
        # checks if the user-specified box minimum value along the given axis is <= the voxel minimum value along the same axis.  if so, then the minimum
        # value is stored as voxel minimum value.  otherwise, the minimum value is stored as the user-specified box minimum value.
        if user_box[axis][0] <= voxel[axis][0]:
            min_point = voxel[axis][0]
        else:
            min_point = user_box[axis][0]
            
        return min_point
    
    def determineMaxOverlapPoint(self, voxel, user_box, axis):
        max_point = None
        
        # checks if the user-specified box maximum value along the given axis is >= the voxel maximum value along the same axis.  if so, then the maximum
        # value is stored as voxel maximum value.  otherwise, the maximum value is stored as the user-specified box maximum value.
        if user_box[axis][1] >= voxel[axis][1]:
            max_point = voxel[axis][1]
        else:
            max_point = user_box[axis][1]
            
        return max_point
    
    def voxelRangesInUserBox(self, voxel, user_box):
        # determine the minimum and maximum values of the overlap, along each axis, between voxel and the user-specified box for a partially overlapped voxel.
        # axis 0 corresponds to the x-axis.
        # axis 1 corresponds to the y-axis.
        # axis 2 corresponds to the z-axis.
        voxel_x_min = self.determineMinOverlapPoint(voxel, user_box, axis = 0)
        voxel_x_max = self.determineMaxOverlapPoint(voxel, user_box, axis = 0)
        
        voxel_y_min = self.determineMinOverlapPoint(voxel, user_box, axis = 1)
        voxel_y_max = self.determineMaxOverlapPoint(voxel, user_box, axis = 1)
        
        voxel_z_min = self.determineMinOverlapPoint(voxel, user_box, axis = 2)
        voxel_z_max = self.determineMaxOverlapPoint(voxel, user_box, axis = 2)
        
        voxel_data = [[voxel_x_min, voxel_x_max], [voxel_y_min, voxel_y_max], [voxel_z_min, voxel_z_max]]
        
        return voxel_data
        
    def recursiveSubBoxesInFile(self, box, user_db_box, morton_voxels_to_read, voxel_side_length = 8):
        # recursively sub-divides the database file cube until the entire user-specified box is mapped by morton cubes.
        box_x_range = box[0]
        box_y_range = box[1]
        box_z_range = box[2]
        
        # only need to check one axes since each sub-box is a cube. this value will be compared to voxel_side_length to limit the recursive
        # shrinking algorithm.
        sub_box_axes_length = box_x_range[1] - box_x_range[0] + 1
        
        # checks if the sub-box corner points are all inside the portion of the user-specified box in the database file.
        box_fully_contained = self.boxesContained(box, user_db_box)
        box_partially_contained = self.boxesOverlap(box, user_db_box)
        # recursively shrinks to voxel-sized boxes (8 x 8 x 8), and stores all of the necessary information regarding these boxes
        # that will be used when reading in data from the database file.
        if sub_box_axes_length == voxel_side_length:
            if box_fully_contained or box_partially_contained:
                # converts the box (X, Y, Z) minimum and maximum points to morton indices for the database file.
                morton_index_min = self.mortoncurve.pack(box[0][0], box[1][0], box[2][0])
                morton_index_max = self.mortoncurve.pack(box[0][1], box[1][1], box[2][1])
                
                # stores the x-, y-, an z- ranges of the user-specified box that the voxel is contained in.
                voxel_ranges = []
                if box_fully_contained:
                    # sub-box is fully contained within the user-specified box.
                    voxel_ranges = [box_x_range, box_y_range, box_z_range]
                else:
                    # sub-box is partially contained within the user-specified box.
                    voxel_ranges = list(self.voxelRangesInUserBox(box, user_db_box))
                
                # stores the voxel information so that the data values can be mapped back to (X, Y, Z) points efficiently. specifically,
                # the cornercode of the box, the minimum and maximum morton indices of the box, and the box x-axis, y-axis, and z-axis ranges are stored.
                # the cornercode and offset correspond to the sub-box minimum axes point (X_min, Y_min, Z_min) - this should mean the offset is always 0.
                voxel_data = self.getOffset(box[0][0], box[1][0], box[2][0])
                voxel_cornercode = voxel_data[0]
                voxel_offset = voxel_data[1]
                voxel_info = [voxel_cornercode, voxel_offset, voxel_ranges]
                
                # stores the morton indices for reading from the database file efficiently and voxel info for parsing out the voxel information.
                if morton_voxels_to_read == list():
                    morton_voxel_info = [[morton_index_min, morton_index_max], voxel_info]
                    morton_voxels_to_read.append(morton_voxel_info)
                else:
                    # check if the most recent sub-box maximum is 1 index less than the new sub-box minimum.  if so, then 
                    # extend the range of the previous sub-box morton maximum to stitch these two boxes together. also, append
                    # the new voxel info so that the voxel information can be parsed out since the morton index range now spans
                    # more than one voxel.
                    if morton_voxels_to_read[-1][0][1] == (morton_index_min - 1):
                        morton_voxels_to_read[-1][0][1] = morton_index_max
                        morton_voxels_to_read[-1].append(voxel_info)
                    else:
                        # start a new morton sequence that will be read in separately.
                        morton_voxel_info = [[morton_index_min, morton_index_max], voxel_info]
                        morton_voxels_to_read.append(morton_voxel_info)

            return
        else:
            if box_partially_contained:
                # sub-divide the box into 8 sub-cubes (divide the x-, y-, and z- axes in half) and recursively check each box if 
                # it is inside the user-specified box, if necessary.
                box_x_range_midpoint = math.floor((box_x_range[0] + box_x_range[1]) / 2)
                box_y_range_midpoint = math.floor((box_y_range[0] + box_y_range[1]) / 2)
                box_z_range_midpoint = math.floor((box_z_range[0] + box_z_range[1]) / 2)
                
                # ordering sub-boxes 1-8 below in this order maintains the morton-curve index structure, such that 
                # the minimum (X, Y, Z) morton index for a new box only needs to be compared to the last 
                # sub-boxes' maximum (X, Y, Z) morton index to see if they can be stitched together.
                
                # new_sub_box_1 is the sub-box bounded by [x_min, x_midpoint], [y_min, y_midpoint], and [z_min, z_midpoint]
                # new_sub_box_2 is the sub-box bounded by [x_midpoint + 1, x_max], [y_min, y_midpoint], and [z_min, z_midpoint]
                # new_sub_box_3 is the sub-box bounded by [x_min, x_midpoint], [y_midpoint + 1, y_max], and [z_min, z_midpoint]
                # new_sub_box_4 is the sub-box bounded by [x_midpoint + 1, x_max], [y_midpoint + 1, y_max], and [z_min, z_midpoint]
                new_sub_box_1 = [[box_x_range[0], box_x_range_midpoint], [box_y_range[0], box_y_range_midpoint], [box_z_range[0], box_z_range_midpoint]]
                new_sub_box_2 = [[box_x_range_midpoint + 1, box_x_range[1]], [box_y_range[0], box_y_range_midpoint], [box_z_range[0], box_z_range_midpoint]]
                new_sub_box_3 = [[box_x_range[0], box_x_range_midpoint], [box_y_range_midpoint + 1, box_y_range[1]], [box_z_range[0], box_z_range_midpoint]]
                new_sub_box_4 = [[box_x_range_midpoint + 1, box_x_range[1]], [box_y_range_midpoint + 1, box_y_range[1]], [box_z_range[0], box_z_range_midpoint]]
                
                # new_sub_box_5 is the sub-box bounded by [x_min, x_midpoint], [y_min, y_midpoint], and [z_midpoint + 1, z_max]
                # new_sub_box_6 is the sub-box bounded by [x_midpoint + 1, x_max], [y_min, y_midpoint], and [z_midpoint + 1, z_max]
                # new_sub_box_7 is the sub-box bounded by [x_min, x_midpoint], [y_midpoint + 1, y_max], and [z_midpoint + 1, z_max]
                # new_sub_box_8 is the sub-box bounded by [x_midpoint + 1, x_max], [y_midpoint + 1, y_max], and [z_midpoint + 1, z_max]
                new_sub_box_5 = [[box_x_range[0], box_x_range_midpoint], [box_y_range[0], box_y_range_midpoint], [box_z_range_midpoint + 1, box_z_range[1]]]
                new_sub_box_6 = [[box_x_range_midpoint + 1, box_x_range[1]], [box_y_range[0], box_y_range_midpoint], [box_z_range_midpoint + 1, box_z_range[1]]]
                new_sub_box_7 = [[box_x_range[0], box_x_range_midpoint], [box_y_range_midpoint + 1, box_y_range[1]], [box_z_range_midpoint + 1, box_z_range[1]]]
                new_sub_box_8 = [[box_x_range_midpoint + 1, box_x_range[1]], [box_y_range_midpoint + 1, box_y_range[1]], [box_z_range_midpoint + 1, box_z_range[1]]]

                new_sub_boxes = []
                new_sub_boxes.append(new_sub_box_1)
                new_sub_boxes.append(new_sub_box_2)
                new_sub_boxes.append(new_sub_box_3)
                new_sub_boxes.append(new_sub_box_4)
                new_sub_boxes.append(new_sub_box_5)
                new_sub_boxes.append(new_sub_box_6)
                new_sub_boxes.append(new_sub_box_7)
                new_sub_boxes.append(new_sub_box_8)
                    
                for new_sub_box in new_sub_boxes:
                    # checks if a sub-box is at least partially contained inside the user-specified box. if so, then the sub-box will 
                    # be recursively searched until an entire sub-box is inside the user-specified box.
                    new_sub_box_partially_contained = self.boxesOverlap(new_sub_box, user_db_box)

                    if new_sub_box_partially_contained:
                        self.recursiveSubBoxesInFile(new_sub_box, user_db_box, morton_voxels_to_read, voxel_side_length)
        return
        
    def identifySubBoxesInFile(self, user_db_box, var, timepoint, voxel_side_length = 8):
        # initially assumes the user-specified box in the file is not the entire box representing the file. the database file box will 
        # be sub-divided into morton cubes until the user-specified box is completely mapped by all of these sub-cubes.
        user_db_box_x_range = user_db_box[0]
        user_db_box_y_range = user_db_box[1]
        user_db_box_z_range = user_db_box[2]
        
        user_db_box_x_min = user_db_box_x_range[0]
        user_db_box_y_min = user_db_box_y_range[0]
        user_db_box_z_min = user_db_box_z_range[0]
        
        # retrieve the morton index limits (minLim, maxLim) of the cube representing the whole database file
        f, cornercode, offset, minLim, maxLim = self.getFileForPoint(user_db_box_x_min, user_db_box_y_min, user_db_box_z_min, var, timepoint)
        minLim_xyz = self.mortoncurve.unpack(minLim)
        maxLim_xyz = self.mortoncurve.unpack(maxLim)
        
        # get the box for the entire database file so that it can be recursively broken down into cubes
        db_box = [[minLim_xyz[0], maxLim_xyz[0]], [minLim_xyz[1], maxLim_xyz[1]], [minLim_xyz[2], maxLim_xyz[2]]]
        
        # these are the constituent file sub-cubes that make up the part of the user-specified box in the database file
        morton_voxels_to_read = []
        self.recursiveSubBoxesInFile(db_box, user_db_box, morton_voxels_to_read, voxel_side_length)

        return morton_voxels_to_read
        
    def getVelocitiesForAllPoints(self, x_range, y_range, z_range, min_step = 1):
        # manually retrieves the velocities for all points inside the box. this is computationally expensive and not efficient, and 
        # this function is deprecated.
        x_min = x_range[0]; x_max = x_range[1];
        y_min = y_range[0]; y_max = y_range[1];
        z_min = z_range[0]; z_max = z_range[1];
        
        current_x_max = x_max
        current_y_max = y_max
        current_z_max = z_max
        
        velocity_map = {}
        velocity_data = np.array([-1, -1, -1])
        for x_point in np.arange(x_min, x_max + 1, min_step):
            for y_point in np.arange(y_min, y_max + 1, min_step):
                for z_point in np.arange(z_min, z_max + 1, min_step):
                    velocity_data = self.getISO_Point(x_point, y_point, z_point, var = 'vel', timepoint = 0, verbose = False)
                    
                    velocity_map[(x_point, y_point, z_point)] = velocity_data
                    #print(x_point)
                    #print(cornercode, offset)
        
        return velocity_map
        
    def getOffset(self, X, Y, Z):
        """
        TODO is this code correct for velocity as well?  YES
        """
        # morton curve index corresponding to the user specified X, Y, and Z values
        code = self.mortoncurve.pack(X, Y, Z)
        # always looking at an 8 x 8 x 8 box around the grid point, so the shift is always 9 bits to determine 
        # the bottom left corner of the box. the cornercode (bottom left corner of the 8 x 8 x 8 box) is always 
        # in the same file as the user-specified grid point.
        # equivalent to 512 * (math.floor(code / 512))
        cornercode = (code >> 9) << 9
        corner = np.array(self.mortoncurve.unpack(cornercode))
        # calculates the offset between the grid point and corner of the box and converts it to a 4-byte float.
        offset = np.sum((np.array([X, Y, Z]) - corner) * np.array([1, 8, 64]))
        
        return cornercode, offset
    
    def getFileForPoint(self, X, Y, Z, var = 'pr', timepoint = 0):
        """
        querying the cached SQL metadata for the file for the user specified grid point
        """
        cornercode, offset = self.getOffset(X, Y, Z)
        t = self.cache[(self.cache['minLim'] <= cornercode) & (self.cache['maxLim'] >= cornercode)]
        t = t.iloc[0]
        dataN = t.path.split("/")
        f = f'/home/idies/workspace/turb/data{t.ProductionMachineName[-2:]}_{dataN[2][-2:]}/{dataN[-1]}/{t.ProductionDatabaseName}_{var}_{timepoint}.bin'
        return f, cornercode, offset, t.minLim, t.maxLim
        
    def getISO_Points(self, db_file, morton_voxels_to_read, output_data, \
                      db_minLim, x_min, y_min, z_min, \
                      num_values_per_datapoint = 1, bytes_per_datapoint = 4, voxel_side_length = 8, verbose = False):
        """
        retrieve the values for the specified var(iable) in the user-specified box and at the specified timepoint.
        """
        # used to check the memory usage so that it could be minimized.
        #process = psutil.Process(os.getpid())
        #print(f'memory usage 1 (gigabytes) = {(process.memory_info().rss) / (1024**3)}')  # in bytes 

        # volume of the voxel cube.
        voxel_cube_size = voxel_side_length**3
        
        # iterates over the groups of morton adjacent voxels to minimize the number I/O operations when reading the data.
        for morton_data in morton_voxels_to_read:
            # the continuous range of morton indices compiled from adjacent voxels that can be read in from the file at the same time.
            morton_index_range = morton_data[0]
            # the voxels that will be parsed out from the data that is read in. the voxels need to parsed separately because the data is sequentially
            # ordered within a voxel as opposed to morton ordered outside a voxel.
            voxel_data = morton_data[1:]
            
            # morton_index_min is equivalent to "cornercode + offset" because morton_index_min is defined as the corner of a voxel.
            morton_index_min = morton_index_range[0]
            morton_index_max = morton_index_range[1]
            morton_index_diff = (morton_index_max - morton_index_min) + 1
            
            # the point to seek to in order to start reading the file for this morton index range.
            seek_distance = num_values_per_datapoint * bytes_per_datapoint * (morton_index_min - db_minLim)
            # number of bytes to read in from the database file.
            read_length = num_values_per_datapoint * bytes_per_datapoint * morton_index_diff
            
            # read the data. this method is deprecated because it is slow.
            #with open(db_file, 'rb') as b:
            #    b.seek(seek_distance)
            #    xraw = b.read(read_length)
            #
            # unpack the data as 4-byte floats.
            #l = struct.unpack('f' * num_values_per_datapoint * morton_index_diff, xraw)
            
            # used to check the memory usage so that it could be minimized.
            #print(f'memory usage 1a (gigabytes) = {(process.memory_info().rss) / (1024**3)}')  # in bytes 
            
            # read the data efficiently.
            l = np.fromfile(db_file, dtype = 'f', count = read_length, offset = seek_distance)
            l = l[np.arange(0, l.size - num_values_per_datapoint + 1, num_values_per_datapoint)[:, None] + np.arange(num_values_per_datapoint)]
            
            # used to check the memory usage so that it could be minimized.
            #print(f'memory usage 1b (gigabytes) = {(process.memory_info().rss) / (1024**3)}')  # in bytes 
            
            # iterate over each voxel in voxel_data.
            for voxel_count, voxel_info in enumerate(voxel_data):
                # retrieve the x-, y-, and z-ranges for the voxel. these ranges are already adjusted if the voxel was only partially contained
                # inside the user-specified box.
                voxel_ranges = voxel_info[2]
                
                # voxel axes ranges.
                voxel_x_range = voxel_ranges[0]
                voxel_y_range = voxel_ranges[1]
                voxel_z_range = voxel_ranges[2]
                
                # pull out the data that corresponds to this voxel.
                sub_l_array = l[voxel_count * voxel_cube_size : (voxel_count + 1) * voxel_cube_size]
                
                # reshape the sub_l array into a voxel matrix.
                sub_l_array = sub_l_array.reshape(voxel_side_length, voxel_side_length, voxel_side_length, num_values_per_datapoint)
                # swap the x- and z- axes to maintain the correct structure.
                sub_l_array = np.swapaxes(sub_l_array, 0, 2)
                # remove parts of the voxel that are outside of the user-specified box.
                sub_l_array = sub_l_array[voxel_x_range[0] % voxel_side_length : (voxel_x_range[1] % voxel_side_length) + 1, \
                                          voxel_y_range[0] % voxel_side_length : (voxel_y_range[1] % voxel_side_length) + 1, \
                                          voxel_z_range[0] % voxel_side_length : (voxel_z_range[1] % voxel_side_length) + 1]
                
                # insert sub_l_array into output_data.
                output_data[voxel_x_range[0] - x_min : voxel_x_range[1] - x_min + 1, \
                            voxel_y_range[0] - y_min : voxel_y_range[1] - y_min + 1, \
                            voxel_z_range[0] - z_min : voxel_z_range[1] - z_min + 1] = np.array(sub_l_array)
                
                # clear sub_l_array to free up memory.
                sub_l_array = None
                
            # clear l to free up memory.
            l = None
        
        # used to check the memory usage so that it could be minimized.
        #print(f'memory usage 2 (gigabytes) = {(process.memory_info().rss) / (1024**3)}')  # in bytes 
            
    def getISO_Point_original(self, X, Y, Z, var = 'pr', timepoint = 0, verbose = False):
        """
        find the value for the specified var(iable) at the specified point XYZ and specified time.
        Position is assumed to be a point of the grid, i.e. should be integers, and time should be an integer between 0 and 5.
        """
        f, cornercode, offset, minLim, maxLim = self.getFileForPoint(X, Y, Z, var, timepoint)
        if verbose:
            print(f'filename : {f}')
            print(f'cornercode : {cornercode}')
            print(f'corner : {np.array(self.mortoncurve.unpack(cornercode))}')
            print(f'offset : {offset}')
            print(f'minLim : {minLim}')
            print(f'maxLim : {maxLim}')
            #print(f, cornercode, offset, minLim, maxLim)
        
        N = 1
        if var == 'vel':
            N = 3
        
        with open(f, 'rb') as b:
            b.seek(N * 4 * (cornercode + offset - minLim))
            xraw = b.read(4 * N)
        
        l = struct.unpack('f' * N, xraw)
        
        return l
    
    def writeOutputMatrixToHDF5(self, output_data, output_path, output_filename, dataset_name):
        # write output_data to a hdf5 file.
        with h5py.File(output_path + output_filename + '.h5', 'w') as h5f:
            h5f.create_dataset(dataset_name, data = output_data)
    
"""
driver functions for processing the data and retrieving the data values for all points inside of a user-specified box.
"""
def retrieveDataForPoint(X, Y, Z, output_data, x_range, y_range, z_range):
    # finds the indices corresponding the to the (X, Y, Z) datapoint that the user is asking for and returns the stored data.
    # minimum values along each axis for the user-specified box.
    x_min = x_range[0]
    y_min = y_range[0]
    z_min = z_range[0]

    # maximum values along each axis for the user-specified box.
    x_max = x_range[1]
    y_max = y_range[1]
    z_max = z_range[1]

    # checks if the X, Y, and Z datapoints are inside of the user-specified box that data was retrieved for.
    if not (x_min <= X <= x_max):
        raise IndexError(f'X datapoint, {X}, must be in the range of [{x_min}, {x_max}]')

    if not (y_min <= Y <= y_max):
        raise IndexError(f'Y datapoint, {Y}, must be in the range of [{y_min}, {y_max}]')

    if not (z_min <= Z <= z_max):
        raise IndexError(f'Z datapoint, {Z}, must be in the range of [{z_min}, {z_max}]')

    # converts the X, Y, and Z datapoints to their corresponding indices in the output_data array.
    x_index = X - x_min
    y_index = Y - y_min
    z_index = Z - z_min

    # retrieves the values stored in the output_data array for the (X, Y, Z) datapoint.
    data_value = output_data[x_index][y_index][z_index]

    return data_value
    
def processData(cube_num, cube_dimensions, cube_title, output_path, x_range, y_range, z_range, var, timepoint):
    # calculate how much time it takes to run the code.
    start_time = time.perf_counter()
    
    # checking the memory usage of the program.
    # starting the tracemalloc library.
    #tracemalloc.start()
    # memory used at program start.
    #print(f'tracemalloc memory used in GBs [current, peak] = {[mem_value / (1024**3) for mem_value in tracemalloc.get_traced_memory()]}')
    # memory used by tracemalloc.
    #print(f'memory used by tracemalloc in GBs = {tracemalloc.get_tracemalloc_memory() / (1024**3)}')
    #print('-' * 5 + '\n')

    # gets velocity for all points inside the user specified box.
    iso_data = IsoCube(cube_num = cube_num, cube_dimensions = cube_dimensions, cube_title = cube_title)

    # data constants
    # bytes per value associated with a datapoint.
    bytes_per_datapoint = 4
    # maximum data size allowed to be retrieved, in gigabytes (GB).
    max_data_size = 3.0
    # smallest sub-box size to recursively shrink to. if this size box is only partially contained in the user-specified box, then
    # the (X, Y, Z) points outside of the user-specified box will be trimmed.  the value is the length of one side of the cube.
    voxel_side_length = 8

    # the number of values to read per datapoint. for pressure data this value is 1.  for velocity
    # data this value is 3, because there is a velocity measurement along each axis.
    num_values_per_datapoint = 1
    if var == 'vel':
        num_values_per_datapoint = 3

    # used for determining the indices in the output array for each X, Y, Z datapoint.
    x_min = x_range[0]
    y_min = y_range[0]
    z_min = z_range[0]

    # used for creating the 3-D output array using numpy, and also checking that the user did not request too much data.
    x_axis_length = x_range[1] - x_range[0] + 1
    y_axis_length = y_range[1] - y_range[0] + 1
    z_axis_length = z_range[1] - z_range[0] + 1

    # total number of datapoints, used for checking if the user requested too much data.
    num_datapoints = x_axis_length * y_axis_length * z_axis_length
    # total size of data, in GBs, requested by the user's box.
    requested_data_size = (num_datapoints * bytes_per_datapoint * num_values_per_datapoint) / float(1024**3)
    # maximum number of datapoints that can be read in. currently set to 3 GBs worth of datapoints.
    max_datapoints = int((max_data_size * (1024**3)) / (bytes_per_datapoint * float(num_values_per_datapoint)))
    # approximate max size of a cube representing the maximum data points. this number is rounded down.
    approx_max_cube = int(max_datapoints**(1/3))

    if requested_data_size > max_data_size:
        raise ValueError(f'Please specify a box with fewer than {max_datapoints} data points. This represents an approximate cube size ' + \
                         f'of ({approx_max_cube} x {approx_max_cube} x {approx_max_cube}).')

    # begin processing of data.
    # -----
    print('Note: For smaller boxes, up to 256-cubed, processing will take approximately 20 seconds or less.  For larger boxes, e.g. 512-cubed, processing ' + \
          'will take approximately 2 minutes or more...\n' + '-' * 5)
    
    # -----
    # get a map of the database files where all the data points are in.
    print('\nStep 1: Determining which database files the user-specified box is found in...\n' + '-' * 25)

    #%time user_single_db_boxes = iso_data.identifySingleDatabaseFileSubBoxes(x_range, y_range, z_range, var, timepoint)
    user_single_db_boxes = iso_data.identifySingleDatabaseFileSubBoxes(x_range, y_range, z_range, var, timepoint)

    print(f'number of database files that the user-specified box is found in:\n{len(user_single_db_boxes)}\n')
    # for db_file in sorted(user_single_db_boxes, key = lambda x: os.path.basename(x)):
    #     print(db_file)
    #     print(user_single_db_boxes[db_file])

    print('Successfully completed.\n' + '-' * 5)
    
    # -----
    # recursively break down each single file box into sub-boxes, each of which is exactly one of the sub-divided cubes of the database file.
    print('\nStep 2: Recursively breaking down the portion of the user-specified box in each database file into voxels...\n' + '-' * 25)
    
    sub_db_boxes = {}
    for db_file in sorted(user_single_db_boxes, key = lambda x: os.path.basename(x)):
        user_db_box = user_single_db_boxes[db_file][0]

        #%time sub_boxes, read_byte_sequences = iso_data.identifySubBoxesInFile(user_db_box, var, timepoint, voxel_side_length)
        morton_voxels_to_read = iso_data.identifySubBoxesInFile(user_db_box, var, timepoint, voxel_side_length)

        sub_db_boxes[db_file] = morton_voxels_to_read
        
    print('sub-box statistics for the database file(s):\n-')
    print(f'minimum number of sub-boxes to read in a database file:\n{np.min([len(sub_db_boxes[db_file]) for db_file in sub_db_boxes])}')
    print(f'maximum number of sub-boxes to read in a database file:\n{np.max([len(sub_db_boxes[db_file]) for db_file in sub_db_boxes])}\n')
    #for db_file in sorted(sub_db_boxes, key = lambda x: os.path.basename(x)):
    #    print(db_file)
    #    print(f'number of boxes = {len(sub_db_boxes[db_file])}')
    #    #print(sub_db_boxes[db_file])
    
    print('Successfully completed.\n' + '-' * 5)

    # -----
    # read the data.
    print('\nStep 3: Reading the data from all of the database files and storing the values into a matrix...\n' + '-' * 25)
    
    # pre-fill the output data 3-d array that will be filled with the data that is read in.
    output_data = np.empty((x_axis_length, y_axis_length, z_axis_length, num_values_per_datapoint), dtype = 'f')
    
    # iterate over the database files and morton sub-boxes to read the data from.
    for db_file in sub_db_boxes:
        morton_voxels_to_read = sub_db_boxes[db_file]
        db_minLim = user_single_db_boxes[db_file][1]

        iso_data.getISO_Points(db_file, morton_voxels_to_read, output_data, \
                               db_minLim, x_min, y_min, z_min, \
                               num_values_per_datapoint, bytes_per_datapoint, voxel_side_length, verbose = False)
    
    # checks to make sure that data was read in for all points.
    if None in output_data:
        raise Exception(f'output_data was not filled correctly')
    
    print('\nSuccessfully completed.\n' + '-' * 5)
    
    # -----
    # write the output file.
    print('\nStep 4: Writing the output matrix to a hdf5 file...\n' + '-' * 25)
    
    # write output_data to a hdf5 file.
    # the output filename specifies the title of the cube, and the x-, y-, and z-ranges so that the file is unique. 1 is added to all of the 
    # ranges because python uses 0-based indices, and the output is desired to be 1-based indices.
    output_filename = f'{cube_title}_x{x_range[0] + 1}-{x_range[1] + 1}_y{y_range[0] + 1}-{y_range[1] + 1}_z{z_range[0] + 1}-{z_range[1] + 1}'
    # formats the dataset name for the hdf5 output file. "untitled" is a placeholder.
    dataset_name = 'Untitled'
    if var == 'vel':
        dataset_name = 'Velocity'
    elif var == 'pr':
        dataset_name = 'Pressure'
        
    # adds the timpoint information, formatted with leading zeros out to 1000, to dataset_name. 1 is added to timepoint because python uses
    # 0-based indices, and the output is desired to be 1-based indices.
    dataset_name += '_' + str(timepoint + 1).zfill(4)
    
    # writes the output file.
    iso_data.writeOutputMatrixToHDF5(output_data, output_path, output_filename, dataset_name)
    
    print('\nSuccessfully completed.\n' + '-' * 5)
    
    # memory used during processing.
    #print(f'\ntracemalloc memory used in GBs [current, peak] = {[mem_value / (1024**3) for mem_value in tracemalloc.get_traced_memory()]}')
    # memory used by tracemalloc.
    #print(f'memory used by tracemalloc in GBs = {tracemalloc.get_tracemalloc_memory() / (1024**3)}')
    # stopping the tracemalloc library.
    #tracemalloc.stop()

    end_time = time.perf_counter()
    
    # see how long the program took to run
    print(f'\ntotal time elapsed = {round(end_time - start_time, 3)} seconds ({round((end_time - start_time) / 60, 3)} minutes)')
    
    print('\nData processing pipeline has completed successfully.\n' + '-' * 5)
    
    return output_data

In [21]:
# user-defined parameters for processing data.
# size of the model cube that data will be retrieved for.
cube_num = 8192
# number of dimensions that model data exists in.  default is 3 (i.e. X, Y, and Z dimensions).
cube_dimensions = 3
# turbulence dataset name, e.g. "isotropic8192" or "isotropic1024fine".
cube_title = 'isotropic8192'
# folder name to write the hdf5 output files to.
output_folder_name = 'turbulence_hdf5_output'

# user specified box rather for which data values will be retrieved for each point inside the box.
x_range = [0, 511]
y_range = [0, 511]
z_range = [0, 511]

# variable of interest, currently set to velocity.
var = 'vel'
# time point.
timepoint = 0

# process the data.
# -----
# create the output folder directory if it does not already exist.
dir_path = os.path.dirname(os.path.realpath('__file__')) + '/'
output_path = dir_path + output_folder_name + '/'
if not os.path.exists(output_path):
    os.mkdir(output_path)

# parse the database files, generate the output_data matrix, and write the matrix to an hdf5 file.
output_data = processData(cube_num, cube_dimensions, cube_title, output_path, x_range, y_range, z_range, var, timepoint)

Note: For smaller boxes, up to 256-cubed, processing will take approximately 20 seconds or less.  For larger boxes, e.g. 512-cubed, processing will take approximately 2 minutes or more...
-----

Step 1: Determining which database files the user-specified box is found in...
-------------------------
number of database files that the user-specified box is found in:
1

Successfully completed.
-----

Step 2: Recursively breaking down the portion of the user-specified box in each database file into voxels...
-------------------------
sub-box statistics for the database file(s):
-
minimum number of sub-boxes to read in a database file:
312
maximum number of sub-boxes to read in a database file:
312

Successfully completed.
-----

Step 3: Reading the data from all of the database files and storing the values into a matrix...
-------------------------

Successfully completed.
-----

Step 4: Writing the output matrix to a hdf5 file...
-------------------------

Successfully completed.
-----

to

In [20]:
# retrieve the data value for a datapoint (X, Y, Z).
X = 3
Y = 4
Z = 7

data_value = retrieveDataForPoint(X, Y, Z, output_data, x_range, y_range, z_range)

print(f'data value ("{var}") for datapoint ({X}, {Y}, {Z}):\n{data_value}\n' + '-' * 5)

data value ("vel") for datapoint (3, 4, 7):
[0.3287481 0.8840852 4.188107 ]
-----


In [47]:
# for trials on  http://turbulence.pha.jhu.edu/webquery/query.aspx
# converts the X, Y, and Z points to the domain of [0, 2*pi]
dxyz=2*math.pi/8192
x=X*dxyz
y=Y*dxyz
z=Z*dxyz
# enter these values in UI
x,y,z

(0.0023009711818284618, 0.0030679615757712823, 0.005368932757599744)

## compare direct access to cutout
To get raw data in HDF5 format one can run a job at http://turbulence.idies.jhu.edu/cutout/jobs.
Result will be put on scratch. Here an example reading the result of such a job, using the parameters.txt to find the location.

In [8]:
import h5py
import json
from tqdm import tqdm

# ran job x in [1000,1010], y and z in [1,10]
#folder='/home/idies/workspace/Temporary/gerard/scratch/jobs/__turbcutout__/20211012/20211012094603-148997/'
folder = '/home/idies/workspace/Temporary/mschnau1/scratch/jobs/__turbcutout__/20211112/20211112132339-159781/'
p=f'{folder}parameters.txt' 
with open(p,'r') as f:
    pars=json.load(f)
pars

f=f'{folder}isotropic8192.h5' 
h5=h5py.File(f,'r')

h5['Velocity_0001'].shape

x=h5['xcoor']
y=h5['ycoor']
z=h5['zcoor']
vel=h5['Velocity_0001']

var='vel'
timepoint=pars['ts']-1   # in cutout time starts at 1
    
# choose an offset in the retrieved cutout
#dx = 8; dy = 8; dz = 8;

# 1-based index.
x_range_dx = [3, 101]
y_range_dy = [7, 31]
z_range_dz = [8, 21]

for dx in tqdm(range(x_range_dx[0], x_range_dx[1])):
    for dy in range(y_range_dy[0], y_range_dy[1]):
        for dz in range(z_range_dz[0], z_range_dz[1]):
            v1=vel[dz,dy,dx,:]

            # calculate position in the file
            X=pars['xs']+dx-1
            Y=pars['ys']+dy-1  # cutout starts at 1
            Z=pars['zs']+dz-1

            # gets velocity for all points inside the user specified box.
            #iso_data = IsoCube(cube_num = 8192, cube_dimensions = 3, cube_title = 'isotropic8192')

            #v2 = np.asarray(iso_data.getISO_Point_original(X, Y, Z, var, timepoint, verbose = False))

            # data from above code.
            v3 = retrieveDataForPoint(X, Y, Z, output_data, x_range, y_range, z_range)
            #v3 = output_data[5][5][6]

            # hope this would be zeros
            #v1-v2

            #all_zeros_v1v2 = np.all((v1-v2) == 0)
            all_zeros_v1v3 = np.all((v1-v3) == 0)
            #all_zeros_v2v3 = np.all((v2-v3) == 0)

            #if not (all_zeros_v1v2 and all_zeros_v1v3 and all_zeros_v2v3):
            if not all_zeros_v1v3:
                print(v1)
                print(v3)
                raise Exception(f'({X}, {Y}, {Z}) datapoint did not produce the same results between the different methods')
                
print('complete')

100%|██████████| 98/98 [00:17<00:00,  5.73it/s]

complete





In [32]:
# compares with my cutout as well. 
# note: my cutout is ordered (X, Y, Z), as opposed to the website cutout (Z, Y, X). the hdf5 format may require (Z, Y, X), so I may have to change my code.
import h5py
import json
from tqdm import tqdm

# ran job x in [1000,1010], y and z in [1,10]
#folder='/home/idies/workspace/Temporary/gerard/scratch/jobs/__turbcutout__/20211012/20211012094603-148997/'
folder = '/home/idies/workspace/Temporary/mschnau1/scratch/jobs/__turbcutout__/20211112/20211112150007-159783/'
p=f'{folder}parameters.txt' 
with open(p,'r') as f:
    pars=json.load(f)
pars

f=f'{folder}isotropic8192.h5' 
h5=h5py.File(f,'r')

h5['Velocity_0001'].shape

x=h5['xcoor']
y=h5['ycoor']
z=h5['zcoor']
vel=h5['Velocity_0001']

# data from the h5 file stored by my algorithm.
folder_my = '/home/idies/workspace/Storage/mschnau1/persistent/turbulence_hdf5_output/'

f_my=f'{folder_my}isotropic8192_x413-512_y376-512_z387-512.h5' 
h5_my=h5py.File(f_my,'r')

h5_my['Velocity_0001'].shape

vel_my=h5_my['Velocity_0001']

var='vel'
timepoint=pars['ts']-1   # in cutout time starts at 1
    
# choose an offset in the retrieved cutout
#dx = 8; dy = 8; dz = 8;

# 1-based index.
x_range_dx = [500, 512]
y_range_dy = [450, 512]
z_range_dz = [435, 512]

for dx in tqdm(range(x_range_dx[0], x_range_dx[1])):
    for dy in range(y_range_dy[0], y_range_dy[1]):
        for dz in range(z_range_dz[0], z_range_dz[1]):
            v1=vel[dz,dy,dx,:]
            v1_my = vel_my[dx - x_range[0],dy - y_range[0],dz - z_range[0],:]

            # calculate position in the file
            X=pars['xs']+dx-1
            Y=pars['ys']+dy-1  # cutout starts at 1
            Z=pars['zs']+dz-1

            # gets velocity for all points inside the user specified box.
            #iso_data = IsoCube(cube_num = 8192, cube_dimensions = 3, cube_title = 'isotropic8192')

            #v2 = np.asarray(iso_data.getISO_Point_original(X, Y, Z, var, timepoint, verbose = False))

            # data from above code.
            v3 = retrieveDataForPoint(X, Y, Z, output_data, x_range, y_range, z_range)
            #v3 = output_data[5][5][6]

            # hope this would be zeros
            #v1-v2

            #all_zeros_v1v2 = np.all((v1-v2) == 0)
            all_zeros_v1v3 = np.all((v1-v3) == 0)
            all_zeros_v1v1my = np.all((v1-v1_my) == 0)
            #all_zeros_v2v3 = np.all((v2-v3) == 0)

            #if not (all_zeros_v1v2 and all_zeros_v1v3 and all_zeros_v2v3):
            if not (all_zeros_v1v3 and all_zeros_v1v1my):
                print(v1)
                print(v1_my)
                print(v3)
                raise Exception(f'({X}, {Y}, {Z}) datapoint did not produce the same results between the different methods')
                
print('complete')

100%|██████████| 12/12 [00:56<00:00,  4.72s/it]

complete





In [6]:
import SciServer.CasJobs as cj
sql = """
declare @box dbo.Box=dbo.Box::New(0,0,0,8192,8192,8192)
declare @query dbo.Shape=dbo.Shape::Parse('BOX[0,0,0,513,512,512]')
select * from dbo.fcover('M',13,@box,1,@query)
"""

df = cj.executeQuery(sql, "simulationDB")

df

Unnamed: 0,FullOnly,KeyMin,KeyMax,ShiftX,ShiftY,ShiftZ
0,False,0,134217728,0,0,0
1,False,134217730,134217730,0,0,0
2,False,134217732,134217732,0,0,0
3,False,134217734,134217734,0,0,0
4,False,134217744,134217744,0,0,0
...,...,...,...,...,...,...
262139,False,249261478,249261478,0,0,0
262140,False,249261488,249261488,0,0,0
262141,False,249261490,249261490,0,0,0
262142,False,249261492,249261492,0,0,0
