Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 77 additions & 21 deletions src/path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -514,37 +514,93 @@ function Base.mv(src::AbstractPath, dst::AbstractPath; force=false)
end

"""
sync(src::AbstractPath, dst::AbstractPath; delete=false)
sync([f::Function,] src::AbstractPath, dst::AbstractPath; delete=false, overwrite=true)

Recursively copy new and updated files from the source path to the
destination. If delete is true then files at the destination that don't
exist at the source will be removed.
Recursively copy new and updated files from the source path to the destination.
If delete is true then files at the destination that don't exist at the source will be removed.
By default, source files are sent to the destination if they have different sizes or the source has newer
last modified date.

Optionally, you can specify a function `f` which will take a `src` and `dst` path and return
true if the `src` should be sent. This may be useful if you'd like to use a checksum for
comparison.
"""
function sync(src::AbstractPath, dst::AbstractPath; delete=false)
# Create an index of all of the source files
index = Dict(Tuple(setdiff(p.segments, src.segments)) => p for p in walkpath(src))
function sync(src::AbstractPath, dst::AbstractPath; kwargs...)
sync(should_sync, src, dst; kwargs...)
end

if exists(dst)
for p in walkpath(dst)
k = Tuple(setdiff(p.segments, dst.segments))
function sync(f::Function, src::AbstractPath, dst::AbstractPath; delete=false, overwrite=true)
# Throw an error if the source path doesn't exist at all
exists(src) || throw(ArgumentError("$src does not exist"))

# If the top level source is just a file then try to just sync that
# without calling walkpath
if isfile(src)
# If the destination exists then we should make sure it is a file and check
# if we should copy the source over.
if exists(dst)
isfile(dst) || throw(ArgumentError("$dst is not a file"))
if overwrite && f(src, dst)
cp(src, dst; force=true)
end
else
cp(src, dst)
end
else
isdir(src) || throw(ArgumentError("$src is neither a file or directory."))
if exists(dst) && !isdir(dst)
throw(ArgumentError("$dst is not a directory while $src is"))
end

# Create an index of all of the source files
src_paths = collect(walkpath(src))
index = Dict(
Tuple(setdiff(p.segments, src.segments)) => i for (i, p) in enumerate(src_paths)
)

if haskey(index, k)
if modified(index[k]) > modified(p)
cp(index[k], p; force=true)
if exists(dst)
for p in walkpath(dst)
k = Tuple(setdiff(p.segments, dst.segments))

if haskey(index, k)
src_path = src_paths[index[k]]
if overwrite && f(src_path, p)
cp(src_path, p; force=true)
end

delete!(index, k)
elseif delete
rm(p; recursive=true)
end
end

delete!(index, k)
elseif delete
rm(p; recursive=true)
# Finally, copy over files that don't exist at the destination
# But we need to iterate through it in a way that respects the original
# walkpath order otherwise we may end up trying to copy a file before its parents.
index_pairs = collect(pairs(index))
index_pairs = index_pairs[sortperm(last.(index_pairs))]
for (seg, i) in index_pairs
cp(src_paths[i], Path(dst, tuple(dst.segments..., seg...)); force=true)
end
else
cp(src, dst)
end
end
end

# Finally, copy over files that don't exist at the destination
for (seg, p) in index
cp(p, Path(dst, tuple(dst.segments..., seg...)); force=true)
end
function should_sync(src::AbstractPath, dst::AbstractPath)
src_stat = stat(src)
dst_stat = stat(dst)

if src_stat.size != dst_stat.size || src_stat.mtime > dst_stat.mtime
@debug(
"syncing: $src -> $dst, " *
"size: $(src_stat.size) -> $(dst_stat.size), " *
"modified_time: $(src_stat.mtime) -> $(dst_stat.mtime)"
)
return true
else
cp(src, dst)
return false
end
end

Expand Down
113 changes: 86 additions & 27 deletions src/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -612,34 +612,93 @@ module TestPaths

function test_sync(ps::PathSet)
@testset "sync" begin
# Base cp case
sync(ps.foo, ps.qux / "foo")
@test exists(ps.qux / "foo" / "baz.txt")
@testset "empty destination" begin
sync(ps.foo, ps.qux / "foo")
@test exists(ps.qux / "foo" / "baz.txt")

# Test that the copied baz file has a newer modified time
baz_t = modified(ps.qux / "foo" / "baz.txt")
@test modified(ps.baz) < baz_t

# Don't cp unchanged files when a new file is added
# NOTE: sleep before we make a new file, so it's clear tha the
# modified time has changed.
sleep(1)
write(ps.foo / "test.txt", "New File")
sync(ps.foo, ps.qux / "foo")
@test exists(ps.qux / "foo" / "test.txt")
@test read(ps.qux / "foo" / "test.txt", String) == "New File"
@test modified(ps.qux / "foo" / "baz.txt") == baz_t
@test modified(ps.qux / "foo" / "test.txt") > baz_t

# Test not deleting a file on sync
rm(ps.foo / "test.txt")
sync(ps.foo, ps.qux / "foo")
@test exists(ps.qux / "foo" / "test.txt")

# Test passing delete flag
sync(ps.foo, ps.qux / "foo"; delete=true)
@test !exists(ps.qux / "foo" / "test.txt")
rm(ps.qux / "foo"; recursive=true)
# Test that the copied baz file has a newer modified time
baz_t = modified(ps.qux / "foo" / "baz.txt")
@test modified(ps.baz) < baz_t
end

@testset "empty source" begin
@test_throws ArgumentError sync(ps.root / "quux", ps.foo)
end

@testset "new source" begin
# Don't cp unchanged files when a new file is added
# NOTE: sleep before we make a new file, so it's clear that the
# modified time has changed.
baz_t = modified(ps.qux / "foo" / "baz.txt")
sleep(1)
write(ps.foo / "test.txt", "New src")
sync(ps.foo, ps.qux / "foo")
@test exists(ps.qux / "foo" / "test.txt")
@test read(ps.qux / "foo" / "test.txt", String) == "New src"
@test modified(ps.qux / "foo" / "baz.txt") == baz_t
@test modified(ps.qux / "foo" / "test.txt") > baz_t
end

@testset "new destination" begin
# Newer file of the same size is likely the result of an upload which
# will always have a newer last modified time.
test_t = modified(ps.foo / "test.txt")
sleep(1)
write(ps.qux / "foo" / "test.txt", "New dst")
@test modified(ps.qux / "foo" / "test.txt") > test_t
sync(ps.foo, ps.qux / "foo")
@test read(ps.qux / "foo" / "test.txt", String) == "New dst"
@test modified(ps.qux / "foo" / "test.txt") > test_t
end

@testset "no delete" begin
# Test not deleting a file on sync
rm(ps.foo / "test.txt")
sync(ps.foo, ps.qux / "foo")
@test exists(ps.qux / "foo" / "test.txt")
end

@testset "delete" begin
# Test passing delete flag
sync(ps.foo, ps.qux / "foo"; delete=true)
@test !exists(ps.qux / "foo" / "test.txt")
rm(ps.qux / "foo"; recursive=true)
end

@testset "mixed types" begin
@testset "directory -> file" begin
@test_throws ArgumentError sync(ps.foo, ps.quux)
end

@testset "file -> directory" begin
@test_throws ArgumentError sync(ps.quux, ps.foo)
end
end

@testset "walkpath order" begin
# Test a condtion where the index could reorder the walkpath order.
tmp_src = ps.root / "tmp-src"
mkdir(tmp_src)
src_file = tmp_src / "file1"
write(src_file, "Hello World!")

src_folder = tmp_src / "folder1"
mkdir(src_folder)
src_folder_file = src_folder / "file2"
write(src_folder_file, "") # empty file

src_folder2 = src_folder / "folder2" # nested folders
mkdir(src_folder2)
src_folder2_file = src_folder2 / "file3"
write(src_folder2_file, "Test")

tmp_dst = ps.root / "tmp_dst"
mkdir(tmp_dst)
sync(tmp_src, tmp_dst)
@test exists(tmp_dst / "folder1" / "folder2" / "file3")
rm(tmp_src; recursive=true)
rm(tmp_dst; recursive=true)
end
end
end

Expand Down