In [1]:
using PyCall
using JLD
using FileIO

In [2]:
@pyimport swiftclient.service as service
@pyimport swiftclient.client as client
serv = service.SwiftService(Dict(
    "os_auth_url"     => "https://keystone.rc.nectar.org.au:5000/v2.0/",
    "os_username"     => "20361362@student.uwa.edu.au",
    "os_password"     => "MzhkYzg0M2IwMTU2NGU1",
    "os_tenant_name"  => "UWA_SEMANTIC_VECTORS",
    "auth_version"    => "2",
    "segment_size"    => 1024^3, #1GB
    "segment_threads" => 10,
    "object_threads"  => 24
))

PyObject <swiftclient.service.SwiftService object at 0x7fdbaa727890>

In [3]:
function show_responses(responses)
    println("Swift  Responses:")
    println("-------------------------")
    for resp in responses
        for (key,value) in resp
            if key == "response_dict"
                for (kkey, vvalue) in value
                    if kkey == "response_dicts"
                        continue
                    else
                        println("** response_dict -- $kkey **:")
                        println(vvalue)
                    end
                end
            else
                println("** $key **:")
                println(value)
            end
        end
        println("")
    end
end

show_responses (generic function with 1 method)

In [4]:

"""
`container` can either be be just a container name, or a pseudofolder path
"""
function put_file(serv, container::String, name::String, fname::String; verbose::Bool=false)
    uo = service.SwiftUploadObject(fname, object_name=name)
    async_put = serv[:upload](container, [uo]) 
    responses = collect(async_put)
    if verbose
        show_responses(responses)
    end
    ret = responses[end]
    ret["success"] || error(ret["error"])
    ret
end

"""Note: this reads out the `fp` IO to the end"""
function put_file(serv, container::String, name::String, fp::IO; verbose::Bool=false)
    mktempdir("/dev/shm") do tdir
        fname = joinpath(tdir, name)
        open(fname,"w") do fp_inner
            write(fp_inner, readbytes(fp))
        end
        put_file(serv, container, name, fname; verbose=verbose)
    end
end

"""Save data as a a JLD file, and upload to Swift"""
function put_jld(serv, container::String, name::String; data...)
    mktempdir("/dev/shm") do tdir
        fname = joinpath(tdir, name)
        save(File(format"JLD", fname), Base.Flatten(((string(name), val) for (name,val) in data))...)
        put_file(serv, container, name, fname)
    end
end

###############

"""Download from swift, writing the result to the file given by `fname`.
    `container` can either be be just a container name, or a pseudofolder path    
"""
function get_file!(serv, container::String, name::String, fname::String; verbose::Bool=false)
    container_parts = split(strip(container, '/'), "/")
    container = container_parts[1]
    if length(container_parts) > 1
        psedudofolder = join(container_parts[2:end], "/")
        name = psedudofolder*"/"*name
    end

    async_get = serv[:download](container, [name], Dict("out_file" => fname))
    responses=collect(async_get)
    if verbose
        show_responses(responses)
    end
    ret = responses[end]
    ret["success"] || error(ret["error"])
    ret
end

"""Read IO stream, from Swift, and call func, on it, returning the result """
function get_file(func::Function, serv, container::String, name::String; verbose::Bool=false)
    mktempdir("/dev/shm") do tdir
        fname = joinpath(tdir, name)
        get_file!(serv, container, name, fname; verbose=verbose)    
        open(func, fname,"r")
    end
end

"""Download a JLD file from Swift. `data` is a list of fieldnames to read."""
    function get_jld(conn, container::String, name::String, verbose::Bool = false, data...)
    mktempdir("/dev/shm") do tdir
        fname = joinpath(tdir, name)
        get_file!(serv, container, name, fname; verbose=verbose)
        JLD.load(File(format"JLD", fname), data...)
    end
end

get_jld

In [None]:
rr=get_jld(serv, "sensemodels/adagram", "more_senses.adagram_model.jld", true);

In [222]:
get_file(serv, "sensemodels/adagram", "paper_v1.params.jld"; verbose=true) do fp
    println(convert(String, read(fp))[1:256])
end

Swift  Responses:
-------------------------
** read_length **:
19010
** action **:
download_object
** path **:
adagram/paper_v1.params.jld
** start_time **:
1.473236120466132e9
** pseudodir **:
false
** finish_time **:
1.473236121138233e9
** headers_receipt **:
1.473236121135896e9
** container **:
sensemodels
** auth_end_time **:
1.473236120468653e9
** attempts **:
1
** success **:
true
** object **:
adagram/paper_v1.params.jld
** response_dict -- headers **:
Dict{Any,Any}(Pair{Any,Any}("x-trans-id","tx9d6546131eaa4af983576-0057cfcc98"),Pair{Any,Any}("x-object-meta-mtime","1470737522.596769"),Pair{Any,Any}("etag","26c60f2b36a66c20a50cfd2a3065f506"),Pair{Any,Any}("x-timestamp","1473164148.32492"),Pair{Any,Any}("content-type","application/octet-stream"),Pair{Any,Any}("date","Wed, 07 Sep 2016 08:15:21 GMT"),Pair{Any,Any}("last-modified","Tue, 06 Sep 2016 12:15:49 GMT"),Pair{Any,Any}("accept-ranges","bytes"),Pair{Any,Any}("content-length","19010"))
** response_dict -- status **:
200
** res

In [236]:
function list(serv) 
    serv[:stat]()
end
function list(serv, containter::String)
    serv[:stat](container)
end
function list(serv, containter::String, object::String)
    serv[:stat](container, object)
end
stat = list


list (generic function with 3 methods)

In [232]:
"Note this reads out the streams to their ends"
function validate_equal!(a_stream::IO, b_stream::IO; buffer_len=4*1024)
    a_buf = Vector{UInt8}(buffer_len)
    b_buf = Vector{UInt8}(buffer_len)
    
    while(true)
        a_len_read = readbytes!(a_stream, a_buf)
        b_len_read = readbytes!(b_stream, b_buf)
        if a_buf != b_buf
            return false
        end
        @assert(a_len_read == b_len_read)
        if a_len_read<length(a_buf)
            return true #we have reached the end
        end
    end
    
end

"""
Downloads a file file from Swift and checks if it the same as the local file given by `fname`.
Note: this is a comprehense check. Normally you would prefer to just check Hash's.
This method is for testing checking that the hash checking method works.
"""
function validate_file(serv, container::String, objectname::String, fname::String)
    get_file(serv, container, objectname; verbose=true) do rfp
        open(fname, "r") do lfp
            validate_equal!(rfp, lfp) | error("$fname is not identical to $container : $objectname")
        end
    end
end

validate_file

In [212]:
validate_file(serv, "sensemodels/adagram", "more_senses.adagram_model.jld", "../SenseSplittingWord2Vec/eval/models/adagram/more_senses.adagram_model.jld")

Swift  Responses:
-------------------------
** traceback **:
Traceback (most recent call last):
  File "/usr/local/lib/python2.7/dist-packages/swiftclient/service.py", line 1217, in _download_object_job
    fp.write(chunk)
IOError: [Errno 28] No space left on device

** action **:
download_object
** path **:
adagram/more_senses.adagram_model.jld
** pseudodir **:
false
** container **:
sensemodels
** attempts **:
1
** error **:
PyObject IOError(28, 'No space left on device')
** success **:
false
** object **:
adagram/more_senses.adagram_model.jld
** error_timestamp **:
1.47323460499253e9
** response_dict -- headers **:
Dict{Any,Any}(Pair{Any,Any}("x-trans-id","txafa8a2b3070642a884e0b-0057cfc44b"),Pair{Any,Any}("etag","\"a79589971730ba605387b1355dd0f1f6\""),Pair{Any,Any}("x-timestamp","1473164531.28551"),Pair{Any,Any}("x-object-manifest","sensemodels_segments/adagram/more_senses.adagram_model.jld/1472614784.203176/12919105415/1073741824/"),Pair{Any,Any}("x-object-meta-mtime","1472614784.

LoadError: LoadError: PyObject IOError(28, 'No space left on device')
while loading In[212], in expression starting on line 1

1-element Array{Any,1}:
 Dict{Any,Any}(Pair{Any,Any}("headers",Dict{Any,Any}(Pair{Any,Any}("x-trans-id","tx1766a0f02e524954aa026-0057ce9f8c"),Pair{Any,Any}("x-object-meta-mtime","1461739948.487330"),Pair{Any,Any}("etag","4c8c9669806e65a5df8d2f74430ebe66"),Pair{Any,Any}("x-timestamp","1473159050.95456"),Pair{Any,Any}("content-type","application/octet-stream"),Pair{Any,Any}("date","Tue, 06 Sep 2016 10:50:52 GMT"),Pair{Any,Any}("last-modified","Tue, 06 Sep 2016 10:50:51 GMT"),Pair{Any,Any}("accept-ranges","bytes"),Pair{Any,Any}("content-length","3817"))),Pair{Any,Any}("status",200),Pair{Any,Any}("reason","OK"))

In [226]:
using Glob

In [None]:
for fn in [glob(glob"../SenseSplittingWord2Vec/eval/models/adagram/*.jld");
           glob(glob"../SenseSplittingWord2Vec/eval/models/adagram/*.adagram_model")]
    @show fn
    name= basename(fn)
    put_file(serv, "sensemodels/adagram/", name, fn)
end


for fn in [glob(glob"../SenseSplittingWord2Vec/eval/models/ss/*.jld");
           glob(glob"../SenseSplittingWord2Vec/eval/models/ss/*.log")]
    @show fn
    name= basename(fn)
    put_file(serv, "sensemodels/greedy/", name, fn)
end



for fn in [glob(glob"../SenseSplittingWord2Vec/eval/models/plain/*.jld");
           glob(glob"../SenseSplittingWord2Vec/eval/models/plain/*.log")]
    @show fn
    name = basename(fn)
    put_file(serv, "sensemodels/plain/", name, fn)
end



fn = "../SenseSplittingWord2Vec/eval/models/adagram/more_senses.adagram_model.jld"
fn = 

In [233]:
for fn in [glob(glob"../SenseSplittingWord2Vec/eval/models/adagram/*.jld");
           glob(glob"../SenseSplittingWord2Vec/eval/models/adagram/*.adagram_model")]
    @show fn
    name= basename(fn)
    validate_file(serv, "sensemodels/adagram/", name, fn)
end


for fn in [glob(glob"../SenseSplittingWord2Vec/eval/models/ss/*.jld");
           glob(glob"../SenseSplittingWord2Vec/eval/models/ss/*.log")]
    @show fn
    name= basename(fn)
    validate_file(serv, "sensemodels/greedy/", name, fn)
end



for fn in [glob(glob"../SenseSplittingWord2Vec/eval/models/plain/*.jld");
           glob(glob"../SenseSplittingWord2Vec/eval/models/plain/*.log")]
    @show fn
    name = basename(fn)
    validate_file(serv, "sensemodels/plain/", name, fn)
end



fn = "../SenseSplittingWord2Vec/eval/models/adagram/more_senses.adagram_model.jld"
Swift  Responses:
-------------------------
** read_length **:
12919105415
** action **:
download_object
** path **:
adagram/more_senses.adagram_model.jld
** start_time **:
1.473236308007375e9
** pseudodir **:
false
** finish_time **:
1.47323672863035e9
** headers_receipt **:
1.473236308567895e9
** container **:
sensemodels
** auth_end_time **:
1.473236308007974e9
** attempts **:
1
** success **:
true
** object **:
adagram/more_senses.adagram_model.jld
** response_dict -- headers **:
Dict{Any,Any}(Pair{Any,Any}("x-trans-id","tx45188f8df92745c89db6f-0057cfcd54"),Pair{Any,Any}("etag","\"a79589971730ba605387b1355dd0f1f6\""),Pair{Any,Any}("x-timestamp","1473164531.28551"),Pair{Any,Any}("x-object-manifest","sensemodels_segments/adagram/more_senses.adagram_model.jld/1472614784.203176/12919105415/1073741824/"),Pair{Any,Any}("x-object-meta-mtime","1472614784.203176"),Pair{Any,Any}("content-type","application/oct

LoadError: LoadError: ../SenseSplittingWord2Vec/eval/models/adagram/more_senses.adagram_model.jld is not identical to sensemodels/adagram/ : more_senses.adagram_model.jld
while loading In[233], in expression starting on line 1

In [235]:
; md5sum ../SenseSplittingWord2Vec/eval/models/adagram/more_senses.adagram_model.jld

b6b5918f39332660966094deb6d58a06  ../SenseSplittingWord2Vec/eval/models/adagram/more_senses.adagram_model.jld


LoadError: LoadError: PyObject IOError(2, 'No such file or directory')
while loading In[240], in expression starting on line 1