Skip to content
This repository has been archived by the owner on Feb 20, 2020. It is now read-only.

Commit

Permalink
Merge pull request #49 from taskcluster/gzip-more-artifacts
Browse files Browse the repository at this point in the history
Bug 1347956 - gzip more artifacts
  • Loading branch information
petemoore committed Apr 19, 2017
2 parents 77dd7a7 + 4a3bb95 commit f438183
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 90 deletions.
18 changes: 6 additions & 12 deletions .taskcluster.yml
Expand Up @@ -73,8 +73,7 @@ tasks:
source ~/env_private.sh
go get -d "github.com/taskcluster/generic-worker"
cd "${GOPATH}/src/github.com/taskcluster/generic-worker"
git remote add '{{ event.head.user.login }}' '{{ event.head.repo.url }}'
git fetch --tags '{{ event.head.user.login }}'
git fetch '{{ event.head.repo.url }}' "+refs/*:refs/tasks/${TASK_ID}/*"
git checkout '{{ event.head.sha }}'
go get github.com/taskcluster/livelog github.com/gordonklaus/ineffassign
cd gw-codegen
Expand Down Expand Up @@ -119,8 +118,7 @@ tasks:
- git config --global core.autocrlf false
- 'go get -d "github.com/taskcluster/generic-worker"'
- 'cd "gopath\src\github.com\taskcluster\generic-worker"'
- 'git remote add "{{ event.head.user.login }}" "{{ event.head.repo.url }}"'
- 'git fetch --tags "{{ event.head.user.login }}"'
- 'git fetch "{{ event.head.repo.url }}" "+refs/*:refs/tasks/%TASK_ID%/*"'
- git checkout {{ event.head.sha }}
- go get github.com/taskcluster/livelog github.com/gordonklaus/ineffassign
- cd gw-codegen
Expand Down Expand Up @@ -186,8 +184,7 @@ tasks:
- git config --global core.autocrlf false
- go get -d "github.com/taskcluster/generic-worker"
- 'cd "gopath\src\github.com\taskcluster\generic-worker"'
- 'git remote add "{{ event.head.user.login }}" "{{ event.head.repo.url }}"'
- 'git fetch --tags "{{ event.head.user.login }}"'
- 'git fetch "{{ event.head.repo.url }}" "+refs/*:refs/tasks/%TASK_ID%/*"'
- git checkout {{ event.head.sha }}
- go get github.com/taskcluster/livelog github.com/gordonklaus/ineffassign
- cd gw-codegen
Expand Down Expand Up @@ -253,8 +250,7 @@ tasks:
- git config --global core.autocrlf false
- go get -d "github.com/taskcluster/generic-worker"
- 'cd "gopath\src\github.com\taskcluster\generic-worker"'
- 'git remote add "{{ event.head.user.login }}" "{{ event.head.repo.url }}"'
- 'git fetch --tags "{{ event.head.user.login }}"'
- 'git fetch "{{ event.head.repo.url }}" "+refs/*:refs/tasks/%TASK_ID%/*"'
- git checkout {{ event.head.sha }}
- go get github.com/taskcluster/livelog github.com/gordonklaus/ineffassign
- cd gw-codegen
Expand Down Expand Up @@ -328,8 +324,7 @@ tasks:
cd "${GOPATH}/src/github.com/taskcluster"
if [ ! -d generic-worker/.git ]; then rm -rf generic-worker; git clone '{{ event.head.repo.url }}' 'generic-worker'; fi
cd 'generic-worker'
if [ ! -d '.git/refs/remotes/{{ event.head.user.login }}' ]; then git remote add '{{ event.head.user.login }}' '{{ event.head.repo.url }}'; fi
git fetch --tags '{{ event.head.user.login }}'
git fetch '{{ event.head.repo.url }}' "+refs/*:refs/tasks/${TASK_ID}/*"
git checkout -f '{{ event.head.sha }}'
git clean -fdx
go get -u github.com/taskcluster/livelog github.com/gordonklaus/ineffassign
Expand Down Expand Up @@ -393,8 +388,7 @@ tasks:
# cd "${GOPATH}/src/github.com/taskcluster"
# if [ ! -d generic-worker/.git ]; then rm -rf generic-worker; git clone '{{ event.head.repo.url }}' 'generic-worker'; fi
# cd 'generic-worker'
# if [ ! -d '.git/refs/remotes/{{ event.head.user.login }}' ]; then git remote add '{{ event.head.user.login }}' '{{ event.head.repo.url }}'; fi
# git fetch --tags '{{ event.head.user.login }}'
# git fetch '{{ event.head.repo.url }}' "+refs/*:refs/tasks/${TASK_ID}/*"
# git checkout -f '{{ event.head.sha }}'
# git clean -fdx
# go get -u github.com/taskcluster/livelog github.com/gordonklaus/ineffassign
Expand Down
4 changes: 3 additions & 1 deletion README.md
@@ -1,5 +1,7 @@
# generic-worker
<img src="https://tools.taskcluster.net/lib/assets/taskcluster-120.png" />

[![logo](https://tools.taskcluster.net/lib/assets/taskcluster-120.png)](https://tools.taskcluster.net/lib/assets/taskcluster-120.png)

[![Linux Build Status](https://img.shields.io/travis/taskcluster/generic-worker.svg?style=flat-square&label=linux+build)](https://travis-ci.org/taskcluster/generic-worker)
[![Windows Build Status](https://img.shields.io/appveyor/ci/petemoore/generic-worker.svg?style=flat-square&label=windows+build)](https://ci.appveyor.com/project/petemoore/generic-worker)
[![GoDoc](https://godoc.org/github.com/taskcluster/generic-worker?status.svg)](https://godoc.org/github.com/taskcluster/generic-worker)
Expand Down
122 changes: 82 additions & 40 deletions artifacts.go
Expand Up @@ -35,7 +35,7 @@ type (
ProcessResponse(response interface{}) error
RequestObject() interface{}
ResponseObject() interface{}
Base() BaseArtifact
Base() *BaseArtifact
}

BaseArtifact struct {
Expand All @@ -45,39 +45,39 @@ type (
}

S3Artifact struct {
BaseArtifact
*BaseArtifact
MimeType string
ContentEncoding string
}

AzureArtifact struct {
BaseArtifact
*BaseArtifact
MimeType string
}

RedirectArtifact struct {
BaseArtifact
*BaseArtifact
MimeType string
URL string
}

ErrorArtifact struct {
BaseArtifact
*BaseArtifact
Message string
Reason string
}
)

func (base BaseArtifact) Base() BaseArtifact {
func (base *BaseArtifact) Base() *BaseArtifact {
return base
}

func (artifact RedirectArtifact) ProcessResponse(response interface{}) error {
func (artifact *RedirectArtifact) ProcessResponse(response interface{}) error {
// nothing to do
return nil
}

func (redirectArtifact RedirectArtifact) RequestObject() interface{} {
func (redirectArtifact *RedirectArtifact) RequestObject() interface{} {
return &queue.RedirectArtifactRequest{
ContentType: redirectArtifact.MimeType,
Expires: redirectArtifact.Expires,
Expand All @@ -86,16 +86,16 @@ func (redirectArtifact RedirectArtifact) RequestObject() interface{} {
}
}

func (redirectArtifact RedirectArtifact) ResponseObject() interface{} {
func (redirectArtifact *RedirectArtifact) ResponseObject() interface{} {
return new(queue.RedirectArtifactResponse)
}

func (artifact ErrorArtifact) ProcessResponse(response interface{}) error {
func (artifact *ErrorArtifact) ProcessResponse(response interface{}) error {
// TODO: process error response
return nil
}

func (errArtifact ErrorArtifact) RequestObject() interface{} {
func (errArtifact *ErrorArtifact) RequestObject() interface{} {
return &queue.ErrorArtifactRequest{
Expires: errArtifact.Expires,
Message: errArtifact.Message,
Expand All @@ -104,42 +104,80 @@ func (errArtifact ErrorArtifact) RequestObject() interface{} {
}
}

func (errArtifact ErrorArtifact) ResponseObject() interface{} {
func (errArtifact *ErrorArtifact) ResponseObject() interface{} {
return new(queue.ErrorArtifactResponse)
}

// gzipCompressFile gzip-compresses the file at path rawContentFile and writes
func (errArtifact *ErrorArtifact) String() string {
return fmt.Sprintf("%q", *errArtifact)
}

// createTempFileForPUTBody gzip-compresses the file at path rawContentFile and writes
// it to a temporary file. The file path of the generated temporary file is returned.
// It is the responsibility of the caller to delete the temporary file.
func gzipCompressFile(rawContentFile string) string {
func (artifact *S3Artifact) CreateTempFileForPUTBody() string {
rawContentFile := filepath.Join(taskContext.TaskDir, artifact.CanonicalPath)
baseName := filepath.Base(rawContentFile)
tmpFile, err := ioutil.TempFile("", baseName)
if err != nil {
log.Fatal(err)
}
defer tmpFile.Close()
gzipLogWriter := gzip.NewWriter(tmpFile)
gzipLogWriter.Name = baseName
rawContent, err := os.Open(rawContentFile)
var target io.Writer = tmpFile
if artifact.ContentEncoding == "gzip" {
gzipLogWriter := gzip.NewWriter(tmpFile)
defer gzipLogWriter.Close()
gzipLogWriter.Name = baseName
target = gzipLogWriter
}
source, err := os.Open(rawContentFile)
if err != nil {
panic(err)
}
defer rawContent.Close()
io.Copy(gzipLogWriter, rawContent)
gzipLogWriter.Close()
defer source.Close()
io.Copy(target, source)
return tmpFile.Name()
}

func (artifact S3Artifact) ProcessResponse(resp interface{}) (err error) {
func (artifact *S3Artifact) ChooseContentEncoding() {
// respect value, if already set
if artifact.ContentEncoding != "" {
return
}
// based on https://github.com/evansd/whitenoise/blob/03f6ea846394e01cbfe0c730141b81eb8dd6e88a/whitenoise/compress.py#L21-L29
SKIP_COMPRESS_EXTENSIONS := map[string]bool{
// Images
".jpg": true,
".jpeg": true,
".png": true,
".gif": true,
".webp": true,
// Compressed files
".zip": true,
".gz": true,
".tgz": true,
".bz2": true,
".tbz": true,
// Flash
".swf": true,
".flv": true,
// Fonts
".woff": true,
".woff2": true,
}
if SKIP_COMPRESS_EXTENSIONS[filepath.Ext(artifact.CanonicalPath)] {
return
}

artifact.ContentEncoding = "gzip"
}

func (artifact *S3Artifact) ProcessResponse(resp interface{}) (err error) {
response := resp.(*queue.S3ArtifactResponse)
rawContentFile := filepath.Join(taskContext.TaskDir, artifact.CanonicalPath)

// if Content-Encoding is gzip then we will need to gzip content...
transferContentFile := rawContentFile
if artifact.ContentEncoding == "gzip" {
transferContentFile = gzipCompressFile(rawContentFile)
defer os.Remove(transferContentFile)
}
artifact.ChooseContentEncoding()
transferContentFile := artifact.CreateTempFileForPUTBody()
defer os.Remove(transferContentFile)

// perform http PUT to upload to S3...
httpClient := &http.Client{}
Expand Down Expand Up @@ -189,24 +227,28 @@ func (artifact S3Artifact) ProcessResponse(resp interface{}) (err error) {
return err
}

func (s3Artifact S3Artifact) RequestObject() interface{} {
func (s3Artifact *S3Artifact) RequestObject() interface{} {
return &queue.S3ArtifactRequest{
ContentType: s3Artifact.MimeType,
Expires: s3Artifact.Expires,
StorageType: "s3",
}
}

func (s3Artifact S3Artifact) ResponseObject() interface{} {
func (s3Artifact *S3Artifact) ResponseObject() interface{} {
return new(queue.S3ArtifactResponse)
}

func (s3Artifact *S3Artifact) String() string {
return fmt.Sprintf("%q", *s3Artifact)
}

// Returns the artifacts as listed in the payload of the task (note this does
// not include log files)
func (task *TaskRun) PayloadArtifacts() []Artifact {
artifacts := make([]Artifact, 0)
for _, artifact := range task.Payload.Artifacts {
base := BaseArtifact{
base := &BaseArtifact{
CanonicalPath: canonicalPath(artifact.Path),
Name: artifact.Name,
Expires: artifact.Expires,
Expand Down Expand Up @@ -238,7 +280,7 @@ func (task *TaskRun) PayloadArtifacts() []Artifact {
panic(err)
}
subName := filepath.Join(base.Name, relativePath)
b := BaseArtifact{
b := &BaseArtifact{
CanonicalPath: canonicalPath(subPath),
Name: canonicalPath(subName),
Expires: artifact.Expires,
Expand Down Expand Up @@ -268,12 +310,12 @@ func (task *TaskRun) PayloadArtifacts() []Artifact {
// ErrorArtifact, otherwise if it exists as a file, as
// "invalid-resource-on-worker" ErrorArtifact
// TODO: need to also handle "too-large-file-on-worker"
func resolve(base BaseArtifact, artifactType string) Artifact {
func resolve(base *BaseArtifact, artifactType string) Artifact {
fullPath := filepath.Join(taskContext.TaskDir, base.CanonicalPath)
fileReader, err := os.Open(fullPath)
if err != nil {
// cannot read file/dir, create an error artifact
return ErrorArtifact{
return &ErrorArtifact{
BaseArtifact: base,
Message: fmt.Sprintf("Could not read %s '%s'", artifactType, fullPath),
Reason: "file-missing-on-worker",
Expand All @@ -283,21 +325,21 @@ func resolve(base BaseArtifact, artifactType string) Artifact {
// ok it exists, but is it right type?
fileinfo, err := fileReader.Stat()
if err != nil {
return ErrorArtifact{
return &ErrorArtifact{
BaseArtifact: base,
Message: fmt.Sprintf("Could not stat %s '%s'", artifactType, fullPath),
Reason: "invalid-resource-on-worker",
}
}
if artifactType == "file" && fileinfo.IsDir() {
return ErrorArtifact{
return &ErrorArtifact{
BaseArtifact: base,
Message: fmt.Sprintf("File artifact '%s' exists as a directory, not a file, on the worker", fullPath),
Reason: "invalid-resource-on-worker",
}
}
if artifactType == "directory" && !fileinfo.IsDir() {
return ErrorArtifact{
return &ErrorArtifact{
BaseArtifact: base,
Message: fmt.Sprintf("Directory artifact '%s' exists as a file, not a directory, on the worker", fullPath),
Reason: "invalid-resource-on-worker",
Expand All @@ -318,7 +360,7 @@ func resolve(base BaseArtifact, artifactType string) Artifact {
// application/octet-stream is the mime type for "unknown"
mimeType = "application/octet-stream"
}
return S3Artifact{
return &S3Artifact{
BaseArtifact: base,
MimeType: mimeType,
}
Expand All @@ -335,8 +377,8 @@ func canonicalPath(path string) string {

func (task *TaskRun) uploadLog(logFile string) *CommandExecutionError {
return task.uploadArtifact(
S3Artifact{
BaseArtifact: BaseArtifact{
&S3Artifact{
BaseArtifact: &BaseArtifact{
CanonicalPath: logFile,
Name: logFile,
// logs expire when task expires
Expand Down

0 comments on commit f438183

Please sign in to comment.