diff --git a/README.md b/README.md index 9a23f27..745e64f 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ The short version is: 1. Stick your data anywhere with a open HTTP link. (Skip this if it is already online.) 2. Write a DataDep registration block. 3. Refer to the data using `datadep"Dataname/file.csv` etc as if it were a file path, and DataDeps.jl will sort out getting in onto your system. -4. For CI purposes set the `DATADEPS_ALWAY_ACCEPT` environment variable. +4. For CI purposes set the `DATADEPS_ALWAYS_ACCEPT` environment variable. #### Where can I store my data online? Where ever you want, so long as it gives an Open HTTP(/s) link to download it. ** @@ -381,7 +381,7 @@ You can set these in the `.juliarc` file using the `ENV` dictionary if you don't However, most people shouldn't need to. DataDeps.jl tries to have very sensible defaults. - - `DATADEPS_ALWAY_ACCEPT` -- bypasses the confirmation before downloading data. Set to `true` (or similar string) + - `DATADEPS_ALWAYS_ACCEPT` -- bypasses the confirmation before downloading data. Set to `true` (or similar string) - This is provided for scripting (in particular CI) use - Note that it remains your responsibility to understand and read any terms of the data use (this is remains true even if you don't turn on this bypass) - default `false` @@ -391,12 +391,14 @@ DataDeps.jl tries to have very sensible defaults. - `DATADEPS_DISABLE_DOWNLOAD` -- causes any action that would result in the download being triggered to throw an exception. - useful e.g. if you are in an environment with metered data, where your datasets should have already been downloaded earlier, and if there were not you want to respond to the situation rather than let DataDeps download them for you. - default `false` + - `DATADEPS_DISABLE_ERROR_CLEANUP` -- By default DataDeps.jl will cleanup the directory the datadep was being downloaded to if there is an error during the resolution (In any of the `fetch`, `checksum`, or `post_fetch`). For debugging purposes you may wish to disable this cleanup step so you can interrogate the files by hand. + ## Extending DataDeps.jl for Contributors Feel free (encouraged even) to open issues and make PRs. ### Internal Docstrings -As well as the usual all the publicly facing methods having docistrings, +As well as the usual all the publicly facing methods having docstrings, most of the internal methods do also. You can view them in the source; or via the julia REPL etc. Hopefully the internal docstrings make it clear how each method is used. diff --git a/appveyor.yml b/appveyor.yml index d399451..4f64139 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -37,6 +37,8 @@ install: # Run installer silently, output to C:\projects\julia - C:\projects\julia-binary.exe /S /D=C:\projects\julia +build: off # Disable MSBuild + build_script: # Need to convert from shallow to complete for Pkg.clone to work - IF EXIST .git\shallow (git fetch --unshallow) diff --git a/src/resolution_automatic.jl b/src/resolution_automatic.jl index 9f511f1..19a9a55 100644 --- a/src/resolution_automatic.jl +++ b/src/resolution_automatic.jl @@ -45,15 +45,21 @@ function Base.download( accept_terms(datadep, localdir, remotepath, i_accept_the_terms_of_use) - local fetched_path - while true - fetched_path = run_fetch(datadep.fetch_method, remotepath, localdir) - if skip_checksum || checksum_pass(datadep.hash, fetched_path) - break + mkpath(localdir) + try + local fetched_path + while true # this is a Do-While loop + fetched_path = run_fetch(datadep.fetch_method, remotepath, localdir) + if skip_checksum || checksum_pass(datadep.hash, fetched_path) + break + end end - end - run_post_fetch(datadep.post_fetch_method, fetched_path) + run_post_fetch(datadep.post_fetch_method, fetched_path) + catch err + env_bool("DATADEPS_DISABLE_ERROR_CLEANUP") || rm(localdir, force=true, recursive=true) + rethrow(err) + end end """ @@ -64,7 +70,6 @@ into the local directory and local paths. Performs in (async) parallel if multiple paths are given """ function run_fetch(fetch_method, remotepath, localdir) - mkpath(localdir) localpath = fetch_method(remotepath, localdir) localpath end @@ -129,7 +134,11 @@ end Ensurses the user accepts the terms of use; otherwise errors out. """ function accept_terms(datadep::DataDep, localpath, remotepath, ::Void) - if !env_bool("DATADEPS_ALWAY_ACCEPT") + if haskey(ENV, "DATADEPS_ALWAY_ACCEPT") + warn("Environment variable \$DATADEPS_ALWAY_ACCEPT is deprecated. " * + "Please use \$DATADEPS_ALWAYS_ACCEPT instead.") + end + if !(env_bool("DATADEPS_ALWAYS_ACCEPT") || env_bool("DATADEPS_ALWAY_ACCEPT")) response = check_if_accept_terms(datadep, localpath, remotepath) accept_terms(datadep, localpath, remotepath, response) else diff --git a/test/examples.jl b/test/examples.jl index 3a125b4..2e4bcb3 100644 --- a/test/examples.jl +++ b/test/examples.jl @@ -1,7 +1,7 @@ using Base.Test using DataDeps -ENV["DATADEPS_ALWAY_ACCEPT"]=true +ENV["DATADEPS_ALWAYS_ACCEPT"]=true @testset "Pi" begin register(DataDep( @@ -203,38 +203,9 @@ end @test length(collect(eachline(datadep"UCI Adult/adult.names"))) == 110 - -end - - -@testset "Data.Gov Babynames" begin - register(DataDep( - "Baby Names", - """ - Dataset: Baby Names from Social Security Card Applications-National Level Data - Website: https://catalog.data.gov/dataset/baby-names-from-social-security-card-applications-national-level-data - License: CC0 - - The data (name, year of birth, sex and number) are from a 100 percent sample of Social Security card applications after 1879. - """, - ["https://www.ssa.gov/oact/babynames/names.zip", - "https://catalog.data.gov/harvest/object/f8ab4d49-b6b4-47d8-b1bb-b18187094f35" - # Interestingly this metadata file fails on windows to resolve to filename to save to - # See warnings, The `mv` in post_fetch_method is the work-around. - ], - Any, # Test that there is no warning about checksum. This data is updated annually - #TODO : Automate this test with new 0.7 test_warn stuff - ; - post_fetch_method = [unpack, f->mv(f, "metadata551randstuff.json")] - )) - - @test !any(endswith.(readdir(datadep"Baby Names"), "zip")) - @test first(eachline(joinpath(datadep"Baby Names", "yob2016.txt")))=="Emma,F,19471" - @test filemode(joinpath(datadep"Baby Names", "metadata551randstuff.json")) > 0 end - @testset "FastText gzipped" begin register(DataDep("FastText fr", """ diff --git a/test/main.jl b/test/main.jl index a36cb46..fcd88d5 100644 --- a/test/main.jl +++ b/test/main.jl @@ -6,7 +6,7 @@ using ExpectationStubs # HACK: todo, work out how ExpectationStubs should be changed to make this make sense Base.open(stub::Stub, t::Any, ::AbstractString) = stub(t) -withenv("DATADEPS_ALWAY_ACCEPT"=>"true") do +withenv("DATADEPS_ALWAYS_ACCEPT"=>"true") do @testset "automatic download" begin @stub dummyhash @expect(dummyhash(::Any) = [0x12, 0x34]) @@ -35,5 +35,24 @@ withenv("DATADEPS_ALWAY_ACCEPT"=>"true") do macroexpand(:(@datadep_str var)) # this line would throw an error if the varibles were being handle wrong @test true end - + + + @testset "Ensure when errors occur the datadep will still retrydownloading" begin + @testset "error in fetch" begin + use_count = 0 + function error_down(rp,lp) + use_count += 1 + error("no download for you") + end + + register(DataDep("TestErrorFetch", "dummy message", "http://example.void", Any, + fetch_method = error_down + )) + @test_throws Exception datadep"TestErrorFetch" + @test use_count == 1 + + @test_throws Exception datadep"TestErrorFetch" + @test use_count == 2 # it should have tried to download again + end + end end