Skip to content

Commit

Permalink
Merge pull request #4410 from Enrico204/restic-stdin-command
Browse files Browse the repository at this point in the history
add --stdin-from-command flag to backup command
  • Loading branch information
MichaelEischer committed Oct 27, 2023
2 parents 1041078 + be28a02 commit 42ab3ea
Show file tree
Hide file tree
Showing 7 changed files with 305 additions and 24 deletions.
14 changes: 14 additions & 0 deletions changelog/unreleased/issue-4251
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Enhancement: Support reading backup from a program's standard output

When reading data from stdin, the `backup` command could not verify whether the
corresponding command completed successfully.

The `backup` command now supports starting an arbitrary command and sourcing
the backup content from its standard output. This enables restic to verify that
the command completes with exit code zero. A non-zero exit code causes the
backup to fail.

Example: `restic backup --stdin-from-command mysqldump [...]`

https://github.com/restic/restic/issues/4251
https://github.com/restic/restic/pull/4410
28 changes: 22 additions & 6 deletions cmd/restic/cmd_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ type BackupOptions struct {
ExcludeLargerThan string
Stdin bool
StdinFilename string
StdinCommand bool
Tags restic.TagLists
Host string
FilesFrom []string
Expand Down Expand Up @@ -134,6 +135,7 @@ func init() {
f.StringVar(&backupOptions.ExcludeLargerThan, "exclude-larger-than", "", "max `size` of the files to be backed up (allowed suffixes: k/K, m/M, g/G, t/T)")
f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
f.BoolVar(&backupOptions.StdinCommand, "stdin-from-command", false, "execute command and store its stdout")
f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)")
f.UintVar(&backupOptions.ReadConcurrency, "read-concurrency", 0, "read `n` files concurrently (default: $RESTIC_READ_CONCURRENCY or 2)")
f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag")
Expand Down Expand Up @@ -287,7 +289,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
}
}

if opts.Stdin {
if opts.Stdin || opts.StdinCommand {
if len(opts.FilesFrom) > 0 {
return errors.Fatal("--stdin and --files-from cannot be used together")
}
Expand All @@ -298,7 +300,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
return errors.Fatal("--stdin and --files-from-raw cannot be used together")
}

if len(args) > 0 {
if len(args) > 0 && !opts.StdinCommand {
return errors.Fatal("--stdin was specified and files/dirs were listed as arguments")
}
}
Expand Down Expand Up @@ -366,7 +368,7 @@ func collectRejectFuncs(opts BackupOptions, targets []string) (fs []RejectFunc,

// collectTargets returns a list of target files/dirs from several sources.
func collectTargets(opts BackupOptions, args []string) (targets []string, err error) {
if opts.Stdin {
if opts.Stdin || opts.StdinCommand {
return nil, nil
}

Expand Down Expand Up @@ -592,16 +594,24 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
defer localVss.DeleteSnapshots()
targetFS = localVss
}
if opts.Stdin {

if opts.Stdin || opts.StdinCommand {
if !gopts.JSON {
progressPrinter.V("read data from stdin")
}
filename := path.Join("/", opts.StdinFilename)
var source io.ReadCloser = os.Stdin
if opts.StdinCommand {
source, err = fs.NewCommandReader(ctx, args, globalOptions.stderr)
if err != nil {
return err
}
}
targetFS = &fs.Reader{
ModTime: timeStamp,
Name: filename,
Mode: 0644,
ReadCloser: os.Stdin,
ReadCloser: source,
}
targets = []string{filename}
}
Expand Down Expand Up @@ -630,7 +640,13 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
success := true
arch.Error = func(item string, err error) error {
success = false
return progressReporter.Error(item, err)
reterr := progressReporter.Error(item, err)
// If we receive a fatal error during the execution of the snapshot,
// we abort the snapshot.
if reterr == nil && errors.IsFatal(err) {
reterr = err
}
return reterr
}
arch.CompleteItem = progressReporter.CompleteItem
arch.StartFile = progressReporter.StartFile
Expand Down
69 changes: 69 additions & 0 deletions cmd/restic/cmd_backup_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -568,3 +568,72 @@ func linkEqual(source, dest []string) bool {

return true
}

func TestStdinFromCommand(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()

testSetupBackupData(t, env)
opts := BackupOptions{
StdinCommand: true,
StdinFilename: "stdin",
}

testRunBackup(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; print('something'); sys.exit(0)"}, opts, env.gopts)
testListSnapshots(t, env.gopts, 1)

testRunCheck(t, env.gopts)
}

func TestStdinFromCommandNoOutput(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()

testSetupBackupData(t, env)
opts := BackupOptions{
StdinCommand: true,
StdinFilename: "stdin",
}

err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; sys.exit(0)"}, opts, env.gopts)
rtest.Assert(t, err != nil && err.Error() == "at least one source file could not be read", "No data error expected")
testListSnapshots(t, env.gopts, 1)

testRunCheck(t, env.gopts)
}

func TestStdinFromCommandFailExitCode(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()

testSetupBackupData(t, env)
opts := BackupOptions{
StdinCommand: true,
StdinFilename: "stdin",
}

err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; print('test'); sys.exit(1)"}, opts, env.gopts)
rtest.Assert(t, err != nil, "Expected error while backing up")

testListSnapshots(t, env.gopts, 0)

testRunCheck(t, env.gopts)
}

func TestStdinFromCommandFailNoOutputAndExitCode(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()

testSetupBackupData(t, env)
opts := BackupOptions{
StdinCommand: true,
StdinFilename: "stdin",
}

err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; sys.exit(1)"}, opts, env.gopts)
rtest.Assert(t, err != nil, "Expected error while backing up")

testListSnapshots(t, env.gopts, 0)

testRunCheck(t, env.gopts)
}
70 changes: 53 additions & 17 deletions doc/040_backup.rst
Original file line number Diff line number Diff line change
Expand Up @@ -489,35 +489,71 @@ particular note are::
- file ownership and ACLs on Windows
- the "hidden" flag on Windows


Reading data from a command
***************************

Sometimes, it can be useful to directly save the output of a program, for example,
``mysqldump`` so that the SQL can later be restored. Restic supports this mode
of operation; just supply the option ``--stdin-from-command`` when using the
``backup`` action, and write the command in place of the files/directories:

.. code-block:: console
$ restic -r /srv/restic-repo backup --stdin-from-command mysqldump [...]
This command creates a new snapshot based on the standard output of ``mysqldump``.
By default, the command's standard output is saved in a file named ``stdin``.
A different name can be specified with ``--stdin-filename``:

.. code-block:: console
$ restic -r /srv/restic-repo backup --stdin-filename production.sql --stdin-from-command mysqldump [...]
Restic uses the command exit code to determine whether the command succeeded. A
non-zero exit code from the command causes restic to cancel the backup. This causes
restic to fail with exit code 1. No snapshot will be created in this case.


Reading data from stdin
***********************

Sometimes it can be nice to directly save the output of a program, e.g.
``mysqldump`` so that the SQL can later be restored. Restic supports
this mode of operation, just supply the option ``--stdin`` to the
``backup`` command like this:
.. warning::

Restic cannot detect if data read from stdin is complete or not. As explained
below, this can cause incomplete backup unless additional checks (outside of
restic) are configured. If possible, use ``--stdin-from-command`` instead.

Alternatively, restic supports reading arbitrary data directly from the standard
input. Use the option ``--stdin`` of the ``backup`` command as follows:

.. code-block:: console
$ set -o pipefail
$ mysqldump [...] | restic -r /srv/restic-repo backup --stdin
# Will not notice failures, see the warning below
$ gzip bigfile.dat | restic -r /srv/restic-repo backup --stdin
This creates a new snapshot of the output of ``mysqldump``. You can then
use e.g. the fuse mounting option (see below) to mount the repository
and read the file.
This creates a new snapshot of the content of ``bigfile.dat``.
As for ``--stdin-from-command``, the default file name is ``stdin``; a
different name can be specified with ``--stdin-filename``.

By default, the file name ``stdin`` is used, a different name can be
specified with ``--stdin-filename``, e.g. like this:
**Important**: while it is possible to pipe a command output to restic using
``--stdin``, doing so is discouraged as it will mask errors from the
command, leading to corrupted backups. For example, in the following code
block, if ``mysqldump`` fails to connect to the MySQL database, the restic
backup will nevertheless succeed in creating an _empty_ backup:

.. code-block:: console
$ mysqldump [...] | restic -r /srv/restic-repo backup --stdin --stdin-filename production.sql
# Will not notice failures, read the warning above
$ mysqldump [...] | restic -r /srv/restic-repo backup --stdin
The option ``pipefail`` is highly recommended so that a non-zero exit code from
one of the programs in the pipe (e.g. ``mysqldump`` here) makes the whole chain
return a non-zero exit code. Refer to the `Use the Unofficial Bash Strict Mode
<http://redsymbol.net/articles/unofficial-bash-strict-mode/>`__ for more
details on this.
A simple solution is to use ``--stdin-from-command`` (see above). If you
still need to use the ``--stdin`` flag, you must use the shell option ``set -o pipefail``
(so that a non-zero exit code from one of the programs in the pipe makes the
whole chain return a non-zero exit code) and you must check the exit code of
the pipe and act accordingly (e.g., remove the last backup). Refer to the
`Use the Unofficial Bash Strict Mode <http://redsymbol.net/articles/unofficial-bash-strict-mode/>`__
for more details on this.


Tags for backup
Expand Down
3 changes: 2 additions & 1 deletion internal/archiver/file_saver.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package archiver

import (
"context"
"fmt"
"io"
"os"
"sync"
Expand Down Expand Up @@ -146,7 +147,7 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat
panic("completed twice")
}
isCompleted = true
fnr.err = err
fnr.err = fmt.Errorf("failed to save %v: %w", target, err)
fnr.node = nil
fnr.stats = ItemStats{}
finish(fnr)
Expand Down
97 changes: 97 additions & 0 deletions internal/fs/fs_reader_command.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package fs

import (
"bufio"
"context"
"fmt"
"io"
"os/exec"

"github.com/restic/restic/internal/errors"
)

// CommandReader wrap a command such that its standard output can be read using
// a io.ReadCloser. Close() waits for the command to terminate, reporting
// any error back to the caller.
type CommandReader struct {
cmd *exec.Cmd
stdout io.ReadCloser

// cmd.Wait() must only be called once. Prevent duplicate executions in
// Read() and Close().
waitHandled bool

// alreadyClosedReadErr is the error that we should return if we try to
// read the pipe again after closing. This works around a Read() call that
// is issued after a previous Read() with `io.EOF` (but some bytes were
// read in the past).
alreadyClosedReadErr error
}

func NewCommandReader(ctx context.Context, args []string, logOutput io.Writer) (*CommandReader, error) {
// Prepare command and stdout
command := exec.CommandContext(ctx, args[0], args[1:]...)
stdout, err := command.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("failed to setup stdout pipe: %w", err)
}

// Use a Go routine to handle the stderr to avoid deadlocks
stderr, err := command.StderrPipe()
if err != nil {
return nil, fmt.Errorf("failed to setup stderr pipe: %w", err)
}
go func() {
sc := bufio.NewScanner(stderr)
for sc.Scan() {
_, _ = fmt.Fprintf(logOutput, "subprocess %v: %v\n", command.Args[0], sc.Text())
}
}()

if err := command.Start(); err != nil {
return nil, fmt.Errorf("failed to start command: %w", err)
}

return &CommandReader{
cmd: command,
stdout: stdout,
}, nil
}

// Read populate the array with data from the process stdout.
func (fp *CommandReader) Read(p []byte) (int, error) {
if fp.alreadyClosedReadErr != nil {
return 0, fp.alreadyClosedReadErr
}
b, err := fp.stdout.Read(p)

// If the error is io.EOF, the program terminated. We need to check the
// exit code here because, if the program terminated with no output, the
// error in `Close()` is ignored.
if errors.Is(err, io.EOF) {
fp.waitHandled = true
// check if the command terminated successfully, If not return the error.
if errw := fp.wait(); errw != nil {
err = errw
}
}
fp.alreadyClosedReadErr = err
return b, err
}

func (fp *CommandReader) wait() error {
err := fp.cmd.Wait()
if err != nil {
// Use a fatal error to abort the snapshot.
return errors.Fatal(fmt.Errorf("command failed: %w", err).Error())
}
return nil
}

func (fp *CommandReader) Close() error {
if fp.waitHandled {
return nil
}

return fp.wait()
}

0 comments on commit 42ab3ea

Please sign in to comment.