In [1]:
addprocs()
@show workers()
using BenchmarkTools, Compat
using BigArrays, GSDicts, S3Dicts, BigArrays.BinDicts
using OffsetArrays
using Plots


ba = BigArray(S3Dict("s3://neuroglancer/s1_v1/image/6_6_30/"))
#const img = ba[4097:4096+1024, 4097:4096+1024, 1025:1024+512]
const img = ba[4097:4096+512, 4097:4096+512, 1025:1024+512] |> parent
#using HDF5
#rm("/tmp/img.h5")
#h5write("/tmp/img.h5", "image", img);

nworkers() = 12


[1m[36mINFO: [39m[22m[36mRecompiling stale cache file /usr/people/jingpeng/.julia/lib/v0.6/GSDicts.ji for module GSDicts.
[39m

	From worker 7:	From worker 7: processing block in global range: 4096-4608_4096-4608_1024-1088
	From worker 2:	From worker 2: processing block in global range: 4096-4608_4096-4608_1088-1152
	From worker 3:	From worker 3: processing block in global range: 4096-4608_4096-4608_1152-1216
	From worker 5:	From worker 5: processing block in global range: 4096-4608_4096-4608_1216-1280
	From worker 4:	From worker 4: processing block in global range: 4096-4608_4096-4608_1280-1344
	From worker 6:	From worker 6: processing block in global range: 4096-4608_4096-4608_1344-1408
	From worker 9:	From worker 9: processing block in global range: 4096-4608_4096-4608_1408-1472
	From worker 8:	From worker 8: processing block in global range: 4096-4608_4096-4608_1472-1536
cutout speed: 11.023721257590358 MB/s


In [10]:
using DataFrames

# prepare directory
tempDir = tempname()
const KEY = "6_6_30"
const infoPath = joinpath(tempDir,"info")
const datasetDir = joinpath(tempDir, "6_6_30")
mkdir(tempDir)
mkdir(datasetDir)
const CHUNK_SIZE = [256,256,32]
const TASK_NUM = 20 
const TEST_NUM = 3

function save(ba::BigArray, img)
    sz = size(img)
    t1 = time()
    ba[1:sz[1], 1:sz[2], 1:sz[3]] = img
    time()-t1
end 

function cutout(ba::BigArray, sz)
    t1 = time()
    ba[1:sz[1], 1:sz[2], 1:sz[3]]
    time()-t1
end 

function test(ba::BigArray, img; testTime = 5)
    tsList = Vector()
    tcList = Vector()
    for i in 1:TEST_NUM
        push!(tsList, save(ba, img))
        push!(tcList, cutout(ba, size(img)))
    end
    totalSize = length(img)*sizeof(eltype(img)) / 1000/1000
    totalSize / median(tsList), totalSize / median(tcList)
end 

function run_trials(chunkSize; testTime=5)
    sz = size(img)
    dataSize = length(img) * sizeof(eltype(img)) / 1000 / 1000 # MB
    infoString = """
{"num_channels": 1, "type": "image", "data_type": "uint8", "scales": [
{"encoding": "raw", "chunk_sizes": [$(chunkSize)], "key": "$(KEY)", "resolution": [6, 6, 30], "voxel_offset": [0, 0, 0], "size": [12286, 11262, 2046]}
]} 
"""
    open( infoPath, "w" ) do f
        write(f, infoString)
    end 
    gsDir = "gs://seunglab/jpwu/benchmark/image"
    s3Dir = "s3://seunglab/jpwu/benchmark/image"
    #run(`gsutil cp $(infoPath) $(joinpath(gsDir, "info"))`)
    #run(`gsutil cp $(infoPath) $(joinpath(s3Dir, "info"))`)
    
    #df = DataFrame(storage = ["Local", "GS", "S3"], task_num=20, chunk_size=chunkSize)
    
    # test local speed
    ba_local = BigArray(BinDict(datasetDir))
    ss,sc = test(ba_local, img; testTime=testTime)
    
    # test GS speed
    
    #ba_gs = BigArray(GSDict(joinpath(gsDir, KEY)); taskNum=taskNum)
    #ts_gs, tc_gs = test(ba_gs, img)
    
    # test S3 speed
    #ba_s3 = BigArray(GSDict(joinpath(gsDir, KEY)); taskNum=taskNum)
    #ts_s3, tc_s3 = test(ba_s3, img)
    
    #df[:save_speed] = ts_local#[ts_local, ts_gs, ts_s3]
    #df[:cutout_speed] = tc_local#[tc_local, tc_gs, tc_s3]
    #@show df
    return ss, sc
end 



run_trials (generic function with 1 method)

In [11]:
# taskNumList = [1, 5, 10, 15, 20, 25, 30]
# #taskNumList = [1, 10, 20]
# ss_list = Vector()
# sc_list = Vector()
# for taskNum in taskNumList
#     @show taskNum
#     ss, sc = run_trials(CHUNK_SIZE, taskNum; testTime=7)
#     push!(ss_list, ss)
#     push!(sc_list, sc)
# end 

# using Plots
# plot(taskNumList, hcat(ss_list, sc_list))
# #plot(taskNumList, ss_list)

In [12]:
# #plotly()
# gr()
# #pyplot()
# #plot(taskNumList, ss_list)
# plot(taskNumList, hcat(ss_list, sc_list), m=(8,:auto), legend=true,
#         lab=["saving", "cutout"],
#         xlabel="task number", 
#         ylabel="speed (MB/s)")

In [None]:
chunkSizeList = [[256,256,1], [256,256,8], [256,256,16], [256,256,32], [256,256,64], [256,256,128], [256,256,256]]

ss_list2 = Vector()
sc_list2 = Vector()
for chunkSize in chunkSizeList
    @show chunkSize
    ss, sc = run_trials(chunkSize; testTime=13)
    push!(ss_list2, ss)
    push!(sc_list2, sc)
end 

chunkSize = [256, 256, 1]
saving speed: 29.49253566048951 MB/s
	From worker 5:	From worker 5: processing block in global range: 0-256_0-256_0-1
	From worker 9:	From worker 9: processing block in global range: 256-512_0-256_0-1
	From worker 4:	From worker 4: processing block in global range: 0-256_256-512_0-1
	From worker 5:	From worker 5: processing block in global range: 256-512_256-512_2-3
	From worker 12:	From worker 12: processing block in global range: 256-512_256-512_0-1
	From worker 9:	From worker 9: processing block in global range: 0-256_0-256_3-4
	From worker 10:	From worker 10: processing block in global range: 0-256_0-256_2-3
	From worker 3:	From worker 3: processing block in global range: 256-512_0-256_1-2
	From worker 4:	From worker 4: processing block in global range: 0-256_256-512_3-4
	From worker 4:	From worker 4: processing block in global range: 256-512_256-512_5-6
	From worker 5:	From worker 5: processing block in global range: 0-256_0-256_8-9
	From worker 11:	From 

In [None]:
plot(map(string, chunkSizeList), hcat(ss_list2, sc_list2), m=(8,:auto), legend=true,
        lab=["saving", "cutout"],
        xlabel="chunk size", 
        ylabel="speed (MB/s)")