Merge pull request #4410 from Enrico204/restic-stdin-command

add --stdin-from-command flag to backup command
restic · Oct 27, 2023 · 42ab3ea · 42ab3ea
2 parents 1041078 + be28a02
commit 42ab3ea
Show file tree

Hide file tree

Showing 7 changed files with 305 additions and 24 deletions.
diff --git a/changelog/unreleased/issue-4251 b/changelog/unreleased/issue-4251
@@ -0,0 +1,14 @@
+Enhancement: Support reading backup from a program's standard output
+
+When reading data from stdin, the `backup` command could not verify whether the
+corresponding command completed successfully.
+
+The `backup` command now supports starting an arbitrary command and sourcing
+the backup content from its standard output. This enables restic to verify that
+the command completes with exit code zero. A non-zero exit code causes the
+backup to fail.
+
+Example: `restic backup --stdin-from-command mysqldump [...]`
+
+https://github.com/restic/restic/issues/4251
+https://github.com/restic/restic/pull/4410
diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go
@@ -97,6 +97,7 @@ type BackupOptions struct {
 	ExcludeLargerThan string
 	Stdin             bool
 	StdinFilename     string
+	StdinCommand      bool
 	Tags              restic.TagLists
 	Host              string
 	FilesFrom         []string
@@ -134,6 +135,7 @@ func init() {
 	f.StringVar(&backupOptions.ExcludeLargerThan, "exclude-larger-than", "", "max `size` of the files to be backed up (allowed suffixes: k/K, m/M, g/G, t/T)")
 	f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
 	f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
+	f.BoolVar(&backupOptions.StdinCommand, "stdin-from-command", false, "execute command and store its stdout")
 	f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)")
 	f.UintVar(&backupOptions.ReadConcurrency, "read-concurrency", 0, "read `n` files concurrently (default: $RESTIC_READ_CONCURRENCY or 2)")
 	f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag")
@@ -287,7 +289,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
 		}
 	}
 
-	if opts.Stdin {
+	if opts.Stdin || opts.StdinCommand {
 		if len(opts.FilesFrom) > 0 {
 			return errors.Fatal("--stdin and --files-from cannot be used together")
 		}
@@ -298,7 +300,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
 			return errors.Fatal("--stdin and --files-from-raw cannot be used together")
 		}
 
-		if len(args) > 0 {
+		if len(args) > 0 && !opts.StdinCommand {
 			return errors.Fatal("--stdin was specified and files/dirs were listed as arguments")
 		}
 	}
@@ -366,7 +368,7 @@ func collectRejectFuncs(opts BackupOptions, targets []string) (fs []RejectFunc,
 
 // collectTargets returns a list of target files/dirs from several sources.
 func collectTargets(opts BackupOptions, args []string) (targets []string, err error) {
-	if opts.Stdin {
+	if opts.Stdin || opts.StdinCommand {
 		return nil, nil
 	}
 
@@ -592,16 +594,24 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
 		defer localVss.DeleteSnapshots()
 		targetFS = localVss
 	}
-	if opts.Stdin {
+
+	if opts.Stdin || opts.StdinCommand {
 		if !gopts.JSON {
 			progressPrinter.V("read data from stdin")
 		}
 		filename := path.Join("/", opts.StdinFilename)
+		var source io.ReadCloser = os.Stdin
+		if opts.StdinCommand {
+			source, err = fs.NewCommandReader(ctx, args, globalOptions.stderr)
+			if err != nil {
+				return err
+			}
+		}
 		targetFS = &fs.Reader{
 			ModTime:    timeStamp,
 			Name:       filename,
 			Mode:       0644,
-			ReadCloser: os.Stdin,
+			ReadCloser: source,
 		}
 		targets = []string{filename}
 	}
@@ -630,7 +640,13 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
 	success := true
 	arch.Error = func(item string, err error) error {
 		success = false
-		return progressReporter.Error(item, err)
+		reterr := progressReporter.Error(item, err)
+		// If we receive a fatal error during the execution of the snapshot,
+		// we abort the snapshot.
+		if reterr == nil && errors.IsFatal(err) {
+			reterr = err
+		}
+		return reterr
 	}
 	arch.CompleteItem = progressReporter.CompleteItem
 	arch.StartFile = progressReporter.StartFile

diff --git a/cmd/restic/cmd_backup_integration_test.go b/cmd/restic/cmd_backup_integration_test.go
@@ -568,3 +568,72 @@ func linkEqual(source, dest []string) bool {
 
 	return true
 }
+
+func TestStdinFromCommand(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+
+	testSetupBackupData(t, env)
+	opts := BackupOptions{
+		StdinCommand:  true,
+		StdinFilename: "stdin",
+	}
+
+	testRunBackup(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; print('something'); sys.exit(0)"}, opts, env.gopts)
+	testListSnapshots(t, env.gopts, 1)
+
+	testRunCheck(t, env.gopts)
+}
+
+func TestStdinFromCommandNoOutput(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+
+	testSetupBackupData(t, env)
+	opts := BackupOptions{
+		StdinCommand:  true,
+		StdinFilename: "stdin",
+	}
+
+	err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; sys.exit(0)"}, opts, env.gopts)
+	rtest.Assert(t, err != nil && err.Error() == "at least one source file could not be read", "No data error expected")
+	testListSnapshots(t, env.gopts, 1)
+
+	testRunCheck(t, env.gopts)
+}
+
+func TestStdinFromCommandFailExitCode(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+
+	testSetupBackupData(t, env)
+	opts := BackupOptions{
+		StdinCommand:  true,
+		StdinFilename: "stdin",
+	}
+
+	err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; print('test'); sys.exit(1)"}, opts, env.gopts)
+	rtest.Assert(t, err != nil, "Expected error while backing up")
+
+	testListSnapshots(t, env.gopts, 0)
+
+	testRunCheck(t, env.gopts)
+}
+
+func TestStdinFromCommandFailNoOutputAndExitCode(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+
+	testSetupBackupData(t, env)
+	opts := BackupOptions{
+		StdinCommand:  true,
+		StdinFilename: "stdin",
+	}
+
+	err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; sys.exit(1)"}, opts, env.gopts)
+	rtest.Assert(t, err != nil, "Expected error while backing up")
+
+	testListSnapshots(t, env.gopts, 0)
+
+	testRunCheck(t, env.gopts)
+}
diff --git a/doc/040_backup.rst b/doc/040_backup.rst
@@ -489,35 +489,71 @@ particular note are::
   - file ownership and ACLs on Windows
   - the "hidden" flag on Windows
 
+
+Reading data from a command
+***************************
+
+Sometimes, it can be useful to directly save the output of a program, for example,
+``mysqldump`` so that the SQL can later be restored. Restic supports this mode
+of operation; just supply the option ``--stdin-from-command`` when using the
+``backup`` action, and write the command in place of the files/directories:
+
+.. code-block:: console
+
+    $ restic -r /srv/restic-repo backup --stdin-from-command mysqldump [...]
+
+This command creates a new snapshot based on the standard output of ``mysqldump``.
+By default, the command's standard output is saved in a file named ``stdin``.
+A different name can be specified with ``--stdin-filename``:
+
+.. code-block:: console
+
+    $ restic -r /srv/restic-repo backup --stdin-filename production.sql --stdin-from-command mysqldump [...]
+
+Restic uses the command exit code to determine whether the command succeeded. A
+non-zero exit code from the command causes restic to cancel the backup. This causes
+restic to fail with exit code 1. No snapshot will be created in this case.
+
+
 Reading data from stdin
 ***********************
 
-Sometimes it can be nice to directly save the output of a program, e.g.
-``mysqldump`` so that the SQL can later be restored. Restic supports
-this mode of operation, just supply the option ``--stdin`` to the
-``backup`` command like this:
+.. warning::
+
+    Restic cannot detect if data read from stdin is complete or not. As explained
+    below, this can cause incomplete backup unless additional checks (outside of
+    restic) are configured. If possible, use ``--stdin-from-command`` instead.
+
+Alternatively, restic supports reading arbitrary data directly from the standard
+input. Use the option ``--stdin`` of the ``backup`` command as  follows:
 
 .. code-block:: console
 
-    $ set -o pipefail
-    $ mysqldump [...] | restic -r /srv/restic-repo backup --stdin
+    # Will not notice failures, see the warning below
+    $ gzip bigfile.dat | restic -r /srv/restic-repo backup --stdin
 
-This creates a new snapshot of the output of ``mysqldump``. You can then
-use e.g. the fuse mounting option (see below) to mount the repository
-and read the file.
+This creates a new snapshot of the content of ``bigfile.dat``.
+As for ``--stdin-from-command``, the default file name is ``stdin``; a
+different name can be specified with ``--stdin-filename``.
 
-By default, the file name ``stdin`` is used, a different name can be
-specified with ``--stdin-filename``, e.g. like this:
+**Important**: while it is possible to pipe a command output to restic using
+``--stdin``, doing so is discouraged as it will mask errors from the
+command, leading to corrupted backups. For example, in the following code
+block, if ``mysqldump`` fails to connect to the MySQL database, the restic
+backup will nevertheless succeed in creating an _empty_ backup:
 
 .. code-block:: console
 
-    $ mysqldump [...] | restic -r /srv/restic-repo backup --stdin --stdin-filename production.sql
+    # Will not notice failures, read the warning above
+    $ mysqldump [...] | restic -r /srv/restic-repo backup --stdin
 
-The option ``pipefail`` is highly recommended so that a non-zero exit code from
-one of the programs in the pipe (e.g. ``mysqldump`` here) makes the whole chain
-return a non-zero exit code. Refer to the `Use the Unofficial Bash Strict Mode
-<http://redsymbol.net/articles/unofficial-bash-strict-mode/>`__ for more
-details on this.
+A simple solution is to use ``--stdin-from-command`` (see above). If you
+still need to use the ``--stdin`` flag, you must use the shell option ``set -o pipefail``
+(so that a non-zero exit code from one of the programs in the pipe makes the
+whole chain return a non-zero exit code) and you must check the exit code of
+the pipe and act accordingly (e.g., remove the last backup). Refer to the
+`Use the Unofficial Bash Strict Mode <http://redsymbol.net/articles/unofficial-bash-strict-mode/>`__
+for more details on this.
 
 
 Tags for backup

diff --git a/internal/archiver/file_saver.go b/internal/archiver/file_saver.go
@@ -2,6 +2,7 @@ package archiver
 
 import (
 	"context"
+	"fmt"
 	"io"
 	"os"
 	"sync"
@@ -146,7 +147,7 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat
 				panic("completed twice")
 			}
 			isCompleted = true
-			fnr.err = err
+			fnr.err = fmt.Errorf("failed to save %v: %w", target, err)
 			fnr.node = nil
 			fnr.stats = ItemStats{}
 			finish(fnr)

diff --git a/internal/fs/fs_reader_command.go b/internal/fs/fs_reader_command.go
@@ -0,0 +1,97 @@
+package fs
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"io"
+	"os/exec"
+
+	"github.com/restic/restic/internal/errors"
+)
+
+// CommandReader wrap a command such that its standard output can be read using
+// a io.ReadCloser. Close() waits for the command to terminate, reporting
+// any error back to the caller.
+type CommandReader struct {
+	cmd    *exec.Cmd
+	stdout io.ReadCloser
+
+	// cmd.Wait() must only be called once. Prevent duplicate executions in
+	// Read() and Close().
+	waitHandled bool
+
+	// alreadyClosedReadErr is the error that we should return if we try to
+	// read the pipe again after closing. This works around a Read() call that
+	// is issued after a previous Read() with `io.EOF` (but some bytes were
+	// read in the past).
+	alreadyClosedReadErr error
+}
+
+func NewCommandReader(ctx context.Context, args []string, logOutput io.Writer) (*CommandReader, error) {
+	// Prepare command and stdout
+	command := exec.CommandContext(ctx, args[0], args[1:]...)
+	stdout, err := command.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("failed to setup stdout pipe: %w", err)
+	}
+
+	// Use a Go routine to handle the stderr to avoid deadlocks
+	stderr, err := command.StderrPipe()
+	if err != nil {
+		return nil, fmt.Errorf("failed to setup stderr pipe: %w", err)
+	}
+	go func() {
+		sc := bufio.NewScanner(stderr)
+		for sc.Scan() {
+			_, _ = fmt.Fprintf(logOutput, "subprocess %v: %v\n", command.Args[0], sc.Text())
+		}
+	}()
+
+	if err := command.Start(); err != nil {
+		return nil, fmt.Errorf("failed to start command: %w", err)
+	}
+
+	return &CommandReader{
+		cmd:    command,
+		stdout: stdout,
+	}, nil
+}
+
+// Read populate the array with data from the process stdout.
+func (fp *CommandReader) Read(p []byte) (int, error) {
+	if fp.alreadyClosedReadErr != nil {
+		return 0, fp.alreadyClosedReadErr
+	}
+	b, err := fp.stdout.Read(p)
+
+	// If the error is io.EOF, the program terminated. We need to check the
+	// exit code here because, if the program terminated with no output, the
+	// error in `Close()` is ignored.
+	if errors.Is(err, io.EOF) {
+		fp.waitHandled = true
+		// check if the command terminated successfully, If not return the error.
+		if errw := fp.wait(); errw != nil {
+			err = errw
+		}
+	}
+	fp.alreadyClosedReadErr = err
+	return b, err
+}
+
+func (fp *CommandReader) wait() error {
+	err := fp.cmd.Wait()
+	if err != nil {
+		// Use a fatal error to abort the snapshot.
+		return errors.Fatal(fmt.Errorf("command failed: %w", err).Error())
+	}
+	return nil
+}
+
+func (fp *CommandReader) Close() error {
+	if fp.waitHandled {
+		return nil
+	}
+
+	return fp.wait()
+}