# Cleanup Reprocessing Directory
This is the julia 1.0 script that will cleanup a directory based on DQR. It can be run from anywhere, the only input is the dqr. That variable must be set manually in cell to for the notebook to work.  

### DQR tracking tool
* [Reprocessing Dashboard](https://task.arm.gov/report/repo/#s/_::D150701.31&_r::_)
* Only cleanup tasks that are "Close completed" or "Close canceled" unless specifically asked to. 

### ServiceNow
* Once the task is cleaned, the user should "close complete" the Delete Original Data task on [ServiceNow](https://armcrf.service-now.com/).
* [REPO-04.2] Delete Original Data

### Todo: 
* allow for a list of dqrs to be cleaned
* query ServiceNow for completion status and loop over all DQR folders in the reproc home directory using subprocess ls call. 
* test the recursive folder move, it may not work as intended


In [22]:
#=
This must be set for the rest to work
The dqr should be similar to the following form:
D123456 | D123456.1 | D123456.12
=#
dqr = "D180827.3" # "Set DQR# Here."

"D180827.3"

In [23]:
# File extensions to archive, inclusive list
archive_file_params = ["ncr_", "conf", "json", ".py", ".ipynb", "log", ".sh", ".csh", ".bash", ".Ingest"]
archive_folder_params = ["ncr_", "script"]

# Get environment variables
reproc_home = strip(string(`$(ENV["REPROC_HOME"])`), '`')
post_proc = strip(string(`$(ENV["POST_PROC"])`), '`')

# Create Directory to clean and archve
clean_dir = joinpath(reproc_home, dqr)
archive_dir = joinpath(post_proc, dqr, "auto_archive")

# Create archive directory if it doesn't exist, pass if it does exist
mkpath(archive_dir)

# Walk the directory and get all directories, subdirectories, and files
for (root, dirs, files) in walkdir(clean_dir)
    println("Found directory: $root")
    for d in dirs
        # Recursively copy any directories used for ncreviews TODO test this, it may not work...
        for param in archive_folder_params
            if occursin(param, d)
                println("Archiving directory: $d")
                cp(joinpath(root, d), joinpath(archive_dir, d))
            end
        end
    end
    for f in files
        for param in archive_file_params
            if occursin(param, f)
                println("Archiving file: $f")
                cp(joinpath(root, f), joinpath(archive_dir, f))
            end
        end
    end
end

println("REMOVING: $clean_dir")
rm(clean_dir, force=true, recursive=true)
println("Finished cleaning: $clean_dir")

Found directory: /data/project/0021718_1509993009/D180827.3
Archiving file: D180827.3.conf
Archiving file: env.bash
Archiving file: env.csh
Found directory: /data/project/0021718_1509993009/D180827.3/archive
Found directory: /data/project/0021718_1509993009/D180827.3/collection
Found directory: /data/project/0021718_1509993009/D180827.3/collection/sgp
Found directory: /data/project/0021718_1509993009/D180827.3/collection/sgp/sgpaosnanosmpsE13.00
Found directory: /data/project/0021718_1509993009/D180827.3/conf
Found directory: /data/project/0021718_1509993009/D180827.3/datastream
Found directory: /data/project/0021718_1509993009/D180827.3/db
Found directory: /data/project/0021718_1509993009/D180827.3/file_comparison
Found directory: /data/project/0021718_1509993009/D180827.3/file_comparison/raw
Found directory: /data/project/0021718_1509993009/D180827.3/file_comparison/raw/sgp
Found directory: /data/project/0021718_1509993009/D180827.3/file_comparison/raw/sgp/sgpaosnanosmpsE13.00
Found 

## Julia Script Version

This is a version of the code that could be a julia script that is run from the command line and supplied with one dqr. The extension for a julia script is .jl and this one should be run with Julia 1.0.

In [None]:
if length(ARGS) < 1
    println("Please enter a dqr number as the first argument.")
elseif ARGS[1][1] != 'D'
    println("not a valid dqr number")
else
    dqr = ARGS[1]
end

# File extensions to archive, inclusive list
archive_file_params = ["ncr_", "conf", "json", ".py", ".ipynb", "log", ".sh", ".csh", ".bash", ".Ingest"]
archive_folder_params = ["ncr_", "script"]

# Get environment variables
reproc_home = strip(string(`$(ENV["REPROC_HOME"])`), '`')
post_proc = strip(string(`$(ENV["POST_PROC"])`), '`')

# Create Directory to clean and archve
clean_dir = joinpath(reproc_home, dqr)
archive_dir = joinpath(post_proc, dqr, "auto_archive")

# Create archive directory if it doesn't exist, pass if it does exist
mkpath(archive_dir)

# Walk the directory and get all directories, subdirectories, and files
for (root, dirs, files) in walkdir(clean_dir)
    println("Found directory: $root")
    for d in dirs
        # Recursively copy any directories used for ncreviews TODO test this, it may not work...
        for param in archive_folder_params
            if occursin(param, d)
                println("Archiving directory: $d")
                cp(joinpath(root, d), joinpath(archive_dir, d))
            end
        end
    end
    for f in files
        for param in archive_file_params
            if occursin(param, f)
                println("Archiving file: $f")
                cp(joinpath(root, f), joinpath(archive_dir, f))
            end
        end
    end
end

println("REMOVING: $clean_dir")
rm(clean_dir, force=true, recursive=true)
println("Finished cleaning: $clean_dir")