Skip to content

Commit

Permalink
s2: Make allow commandline input to be http/https (#348)
Browse files Browse the repository at this point in the history
Download input.
  • Loading branch information
klauspost committed Mar 30, 2021
1 parent 460ec9e commit b2eb836
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 56 deletions.
44 changes: 25 additions & 19 deletions s2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,29 +157,32 @@ Use - as the only file name to read from stdin and write to stdout.
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
File names beginning with 'http://' and 'https://' will be downloaded and compressed.
Only http response code 200 is accepted.
Options:
-bench int
Run benchmark n times. No output will be written
Run benchmark n times. No output will be written
-blocksize string
Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M")
-c Write all output to stdout. Multiple input files will be concatenated
Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M")
-c Write all output to stdout. Multiple input files will be concatenated
-cpu int
Compress using this amount of threads (default 32)
Compress using this amount of threads (default 32)
-faster
Compress faster, but with a minor compression loss
Compress faster, but with a minor compression loss
-help
Display help
Display help
-pad string
Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1")
-q Don't write any output to terminal, except errors
Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1")
-q Don't write any output to terminal, except errors
-rm
Delete source file(s) after successful compression
Delete source file(s) after successful compression
-safe
Do not overwrite output files
Do not overwrite output files
-slower
Compress more, but a lot slower
Compress more, but a lot slower
-verify
Verify written files
Verify written files
```

Expand All @@ -195,19 +198,22 @@ Use - as the only file name to read from stdin and write to stdout.
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
File names beginning with 'http://' and 'https://' will be downloaded and decompressed.
Extensions on downloaded files are ignored. Only http response code 200 is accepted.
Options:
-bench int
Run benchmark n times. No output will be written
-c Write all output to stdout. Multiple input files will be concatenated
Run benchmark n times. No output will be written
-c Write all output to stdout. Multiple input files will be concatenated
-help
Display help
-q Don't write any output to terminal, except errors
Display help
-q Don't write any output to terminal, except errors
-rm
Delete source file(s) after successful decompression
Delete source file(s) after successful decompression
-safe
Do not overwrite output files
Do not overwrite output files
-verify
Verify files, but do not write output
Verify files, but do not write output
```

## s2sx: self-extracting archives
Expand Down
65 changes: 50 additions & 15 deletions s2/cmd/s2c/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"io"
"io/ioutil"
"log"
"net/http"
"os"
"os/signal"
"path/filepath"
Expand Down Expand Up @@ -73,6 +74,9 @@ Use - as the only file name to read from stdin and write to stdout.
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
File names beginning with 'http://' and 'https://' will be downloaded and compressed.
Only http response code 200 is accepted.
Options:`)
flag.PrintDefaults()
os.Exit(0)
Expand Down Expand Up @@ -100,6 +104,10 @@ Options:`)
var files []string

for _, pattern := range args {
if isHTTP(pattern) {
files = append(files, pattern)
continue
}
found, err := filepath.Glob(pattern)
exitErr(err)
if len(found) == 0 {
Expand Down Expand Up @@ -146,11 +154,8 @@ Options:`)
fmt.Print("Reading ", filename, "...")
}
// Input file.
file, err := os.Open(filename)
exitErr(err)
finfo, err := file.Stat()
exitErr(err)
b := make([]byte, finfo.Size())
file, size, _ := openFile(filename)
b := make([]byte, size)
_, err = io.ReadFull(file, b)
exitErr(err)
file.Close()
Expand Down Expand Up @@ -215,30 +220,22 @@ Options:`)
for _, filename := range files {
func() {
var closeOnce sync.Once
dstFilename := fmt.Sprintf("%s%s", filename, ".s2")
if *bench > 0 {
dstFilename = "(discarded)"
}
dstFilename := cleanFileName(fmt.Sprintf("%s%s", filename, ".s2"))
if !*quiet {
fmt.Print("Compressing ", filename, " -> ", dstFilename)
}
// Input file.
file, err := os.Open(filename)
file, _, mode := openFile(filename)
exitErr(err)
defer closeOnce.Do(func() { file.Close() })
src, err := readahead.NewReaderSize(file, *cpu+1, 1<<20)
exitErr(err)
defer src.Close()
finfo, err := file.Stat()
exitErr(err)
var out io.Writer
switch {
case *bench > 0:
out = ioutil.Discard
case *stdout:
out = os.Stdout
default:
mode := finfo.Mode() // use the same mode for the output file
if *safe {
_, err := os.Stat(dstFilename)
if !os.IsNotExist(err) {
Expand Down Expand Up @@ -282,6 +279,44 @@ Options:`)
}
}

func isHTTP(name string) bool {
return strings.HasPrefix(name, "http://") || strings.HasPrefix(name, "https://")
}

func openFile(name string) (rc io.ReadCloser, size int64, mode os.FileMode) {
if isHTTP(name) {
resp, err := http.Get(name)
exitErr(err)
if resp.StatusCode != http.StatusOK {
exitErr(fmt.Errorf("unexpected response status code %v, want OK", resp.Status))
}
return resp.Body, resp.ContentLength, os.ModePerm
}
file, err := os.Open(name)
exitErr(err)
st, err := file.Stat()
exitErr(err)
return file, st.Size(), st.Mode()
}

func cleanFileName(s string) string {
if isHTTP(s) {
s = strings.TrimPrefix(s, "http://")
s = strings.TrimPrefix(s, "https://")
s = strings.Map(func(r rune) rune {
switch r {
case '\\', '/', '*', '?', ':', '|', '<', '>', '~':
return '_'
}
if r < 20 {
return '_'
}
return r
}, s)
}
return s
}

func verifyTo(w io.Writer) (io.Writer, func() error) {
if !*verify {
return w, func() error {
Expand Down
85 changes: 63 additions & 22 deletions s2/cmd/s2d/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"runtime/debug"
Expand Down Expand Up @@ -51,6 +52,9 @@ Use - as the only file name to read from stdin and write to stdout.
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
File names beginning with 'http://' and 'https://' will be downloaded and decompressed.
Extensions on downloaded files are ignored. Only http response code 200 is accepted.
Options:`)
flag.PrintDefaults()
os.Exit(0)
Expand All @@ -69,6 +73,11 @@ Options:`)
var files []string

for _, pattern := range args {
if isHTTP(pattern) {
files = append(files, pattern)
continue
}

found, err := filepath.Glob(pattern)
exitErr(err)
if len(found) == 0 {
Expand All @@ -86,21 +95,20 @@ Options:`)
case strings.HasSuffix(filename, ".s2"):
case strings.HasSuffix(filename, ".snappy"):
default:
fmt.Println("Skipping", filename)
continue
if !isHTTP(filename) {
fmt.Println("Skipping", filename)
continue
}
}

func() {
if !*quiet {
fmt.Print("Reading ", filename, "...")
}
// Input file.
file, err := os.Open(filename)
exitErr(err)
finfo, err := file.Stat()
exitErr(err)
b := make([]byte, finfo.Size())
_, err = io.ReadFull(file, b)
file, size, _ := openFile(filename)
b := make([]byte, size)
_, err := io.ReadFull(file, b)
exitErr(err)
file.Close()

Expand All @@ -127,18 +135,17 @@ Options:`)
}

for _, filename := range files {
dstFilename := filename
dstFilename := cleanFileName(filename)
switch {
case strings.HasSuffix(filename, ".s2"):
dstFilename = strings.TrimSuffix(filename, ".s2")
dstFilename = strings.TrimSuffix(dstFilename, ".s2")
case strings.HasSuffix(filename, ".snappy"):
dstFilename = strings.TrimSuffix(filename, ".snappy")
dstFilename = strings.TrimSuffix(dstFilename, ".snappy")
default:
fmt.Println("Skipping", filename)
continue
}
if *bench > 0 {
dstFilename = "(discarded)"
if !isHTTP(filename) {
fmt.Println("Skipping", filename)
continue
}
}
if *verify {
dstFilename = "(verify)"
Expand All @@ -150,16 +157,12 @@ Options:`)
fmt.Print("Decompressing ", filename, " -> ", dstFilename)
}
// Input file.
file, err := os.Open(filename)
exitErr(err)
file, _, mode := openFile(filename)
defer closeOnce.Do(func() { file.Close() })
rc := rCounter{in: file}
src, err := readahead.NewReaderSize(&rc, 2, 4<<20)
exitErr(err)
defer src.Close()
finfo, err := file.Stat()
exitErr(err)
mode := finfo.Mode() // use the same mode for the output file
if *safe {
_, err := os.Stat(dstFilename)
if !os.IsNotExist(err) {
Expand All @@ -168,7 +171,7 @@ Options:`)
}
var out io.Writer
switch {
case *bench > 0 || *verify:
case *verify:
out = ioutil.Discard
case *stdout:
out = os.Stdout
Expand Down Expand Up @@ -204,6 +207,44 @@ Options:`)
}
}

func openFile(name string) (rc io.ReadCloser, size int64, mode os.FileMode) {
if isHTTP(name) {
resp, err := http.Get(name)
exitErr(err)
if resp.StatusCode != http.StatusOK {
exitErr(fmt.Errorf("unexpected response status code %v, want 200 OK", resp.Status))
}
return resp.Body, resp.ContentLength, os.ModePerm
}
file, err := os.Open(name)
exitErr(err)
st, err := file.Stat()
exitErr(err)
return file, st.Size(), st.Mode()
}

func cleanFileName(s string) string {
if isHTTP(s) {
s = strings.TrimPrefix(s, "http://")
s = strings.TrimPrefix(s, "https://")
s = strings.Map(func(r rune) rune {
switch r {
case '\\', '/', '*', '?', ':', '|', '<', '>', '~':
return '_'
}
if r < 20 {
return '_'
}
return r
}, s)
}
return s
}

func isHTTP(name string) bool {
return strings.HasPrefix(name, "http://") || strings.HasPrefix(name, "https://")
}

func exitErr(err error) {
if err != nil {
fmt.Fprintln(os.Stderr, "\nERROR:", err.Error())
Expand Down

0 comments on commit b2eb836

Please sign in to comment.