# Preamble 

In [34]:
import sys, os,glob,random,datetime

# change path as needed or comment
sys.path.append(r"C:\projects\OpenVisus\build\RelWithDebInfo")
from OpenVisus import *

# EXAMPLE sdd
# idx_filename='c:/visus-datasets/raw/visus.idx'
# bin_dir='c:/visus-datasets/raw'

# EXAMPLE hdd
idx_filename='D:/GoogleSci/visus_dataset/2kbit1/huge/raw/visus.idx'
bin_dir='D:/GoogleSci/visus_dataset/2kbit1/huge/raw'

# EXAMPLE gdrive
# idx_filename='E:/My Drive/visus-datasets/llnl/battery.idx'
# bin_dir='E:/My Drive/visus-datasets/llnl/battery'

# EXAMPLE wasabi
# idx_filename = 'G:/visus-datasets/llnl/battery.idx'
# bin_dir='G:/visus-datasets/llnl/battery'

print("idx_filename",idx_filename)
Assert(os.path.isfile(idx_filename))

print("bin_dir",bin_dir)
Assert(os.path.isdir(bin_dir))

# how long to run each test
max_seconds=20

KB,MB,GB=1024,1024*1024,1024*1024*1024

random.seed()

T1=None

def stop(): 
    global T1,max_seconds
    return T1.elapsedSec()>=max_seconds

def beginStats(name):
    global T1
    T1=Time.now()
    File.global_stats().resetStats()

def endStats():
    sec=T1.elapsedSec()
    bytes=File.global_stats().getReadBytes()
    nopen=File.global_stats().getNumOpen()
    print(sec,"seconds","{}MB {:.2f}MB/sec".format(int(bytes/MB),(bytes/MB)/sec),"nopen",nopen)   

idx_filename D:/GoogleSci/visus_dataset/2kbit1/huge/raw/visus.idx
bin_dir D:/GoogleSci/visus_dataset/2kbit1/huge/raw


# Open IDX

In [35]:
db=LoadDataset(idx_filename)
samples_per_block=1<<db.idxfile.bitsperblock
field=db.getField()
blocksize=field.dtype.getByteSize(samples_per_block)
DIMS=db.getLogicSize()
nblocks=db.getTotalNumberOfBlocks()
print("DIMS",DIMS)
print("dtype",field.dtype.toString())
print("blocksize",blocksize)
print("nblocks",nblocks)

access = db.createAccessForBlockQuery()
buffer=Array(blocksize, DType.fromString("uint8")) 
file=File()

DIMS [8192 8192 4096]
dtype uint8
blocksize 65536
nblocks 4194304


# Find all binary files

In [36]:
# find binary files for raw speed test
filenames = glob.glob(bin_dir + '/**/*.bin',recursive=True)
filenames=[it for it in filenames if os.path.getsize(it) >= blocksize]
filesizes={}
for filename in filenames:
    filesizes[filename]=os.path.getsize(filename)
print("found",len(filenames),"*.bin files")
print("minsize",min([filesizes[filename] for filename in filesizes]))
print("maxsize",max([filesizes[filename] for filename in filesizes]))
nfiles=len(filenames)

found 8192 *.bin files
minsize 33574952
maxsize 33574952


# read-seq

In [37]:
beginStats("read-seq")
# do not start from file 0 to avoid caching issues
I=random.randint(0,nfiles-1)
while not stop():
    filename=filenames[I % nfiles]
    filesize=filesizes[filename]
    Assert(file.open(filename, "r"))
    for offset in range(0,filesize,blocksize):
        num=min(filesize-offset,blocksize)
        if num<blocksize: break
        Assert(file.read(offset,buffer.c_size(),buffer.c_ptr()))
        offset+=num
    file.close()
    I+=1
endStats()

20.242 seconds 2208MB 109.08MB/sec nopen 69


# read-rand

In [39]:
beginStats("read-rand")
while not stop():
    # pick a random file
    filename=random.choice(filenames)
    filesize=filesizes[filename] 
    Assert(file.open(filename,"r"))
    # pick a random offset
    offset=random.randint(0,filesize-blocksize)
    Assert(file.read(offset,buffer.c_size(),buffer.c_ptr()))
    file.close()
endStats()

20.009 seconds 29MB 1.49MB/sec nopen 477


# read-idx-seq

In [40]:
beginStats("read-idx-seq")
access.beginRead()
# start from any good block to avoid caching issues
blockid=random.randint(0,nblocks-1)
while not stop():
    query = db.createBlockQuery(blockid)
    query.buffer=buffer
    db.executeBlockQuery(access, query)
    if not query.ok():
        print("Query failed",query.errormsg)
    blockid+=1
access.endRead()
endStats()

20.002 seconds 2353MB 117.66MB/sec nopen 75


# read-idx-rand

In [41]:
beginStats("read-idx-rand")
access.beginRead()
while not stop():
    blockid=random.randint(0,nblocks-1)
    query = db.createBlockQuery(blockid)
    query.buffer=buffer
    db.executeBlockQuery(access, query)
    Assert(query.ok()) 
access.endRead()
endStats()

20.03 seconds 34MB 1.71MB/sec nopen 417


# box-query

In [42]:
dims=(512,512,512)
beginStats("box-query")
access.beginRead()
while not stop():
    x,y,z=[random.randint(0,DIMS[I]-dims[I]) for I in range(3)]
    w,h,d=x+dims[0],y+dims[1],z+dims[2]
    buffer=db.read(logic_box=[(x,y,z),(w,h,d)],access=access)
access.endRead()
endStats()

23.737 seconds 952MB 40.12MB/sec nopen 246
