Skip to content

Commit

Permalink
[export] Add new export module for exporting a dataset and downloadin…
Browse files Browse the repository at this point in the history
…g associated assets
  • Loading branch information
rexxars authored and bjoerge committed Feb 16, 2018
1 parent 7c98296 commit ee9aa4c
Show file tree
Hide file tree
Showing 18 changed files with 734 additions and 0 deletions.
10 changes: 10 additions & 0 deletions packages/@sanity/export/.editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
; editorconfig.org
root = true
charset= utf8

[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
indent_style = space
indent_size = 2
18 changes: 18 additions & 0 deletions packages/@sanity/export/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Logs
logs
*.log

# Coverage directory used by tools like istanbul
coverage

# Grunt intermediate storage
.grunt

# Dependency directories
node_modules

# Compiled code
lib

# Dev-fixtures
moviedb.ndjson
5 changes: 5 additions & 0 deletions packages/@sanity/export/.npmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.babelrc
.editorconfig
.eslintignore
.eslintrc
test
48 changes: 48 additions & 0 deletions packages/@sanity/export/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# @sanity/export

Exports documents and assets from a Sanity dataset

## Installing

```
npm install --save @sanity/export
```

## Usage

```js
const exportDataset = require('@sanity/export')

exportDataset({
// Instance of @sanity/client configured to correct project ID and dataset
client: someInstantiatedSanityClientInstance,

// Name of dataset to export
dataset: 'myDataset',

// Path to write zip-file to, or `-` for stdout
outputPath: '/home/your-user/myDataset.zip',

// Whether or not to export assets. Note that this operation is currently slightly lossy;
// metadata stored on the asset document itself (original filename, for instance) might be lost
// Default: `true`
assets: false,

// Exports documents only, without downloading or rewriting asset references. Note that the
// `assets` option is still respected, determining whether or not to include asset documents
raw: true
})
```

## Future improvements

* Restore original filenames, keep track of duplicates, increase counter (`filename (<num>).ext`)
* Skip archiving on raw/no-asset mode?

## CLI-tool

This functionality is built in to the `@sanity/cli` package as `sanity dataset export`

## License

MIT-licensed. See LICENSE.
48 changes: 48 additions & 0 deletions packages/@sanity/export/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"name": "@sanity/export",
"version": "0.125.6",
"description": "Export Sanity documents and assets",
"main": "lib/export.js",
"engines": {
"node": ">=6.0.0"
},
"author": "Sanity.io <hello@sanity.io>",
"license": "MIT",
"scripts": {
"clean": "rimraf lib",
"test": "jest"
},
"keywords": [
"sanity",
"cms",
"headless",
"realtime",
"content",
"export",
"ndjson"
],
"dependencies": {
"archiver": "^2.1.1",
"debug": "^3.1.0",
"lodash": "^4.17.4",
"mississippi": "^2.0.0",
"p-queue": "^2.3.0",
"simple-get": "^2.7.0",
"split2": "^2.1.1"
},
"devDependencies": {
"@sanity/client": "^0.125.4",
"decompress": "^4.2.0",
"jest": "^21.2.1",
"rimraf": "^2.6.2",
"string-to-stream": "^1.1.0"
},
"repository": {
"type": "git",
"url": "git+https://github.com/sanity-io/sanity.git"
},
"bugs": {
"url": "https://github.com/sanity-io/sanity/issues"
},
"homepage": "https://www.sanity.io/"
}
157 changes: 157 additions & 0 deletions packages/@sanity/export/src/AssetHandler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
const path = require('path')
const miss = require('mississippi')
const PQueue = require('p-queue')
const pkg = require('../package.json')
const requestStream = require('./requestStream')
const debug = require('./debug')

const ACTION_REMOVE = 'remove'
const ACTION_REWRITE = 'rewrite'
const precompressedExts = ['.zip', '.gz', '.rar', '.png', '.jpeg', '.jpg', '.gif']

class AssetHandler {
constructor(options) {
this.client = options.client
this.archive = options.archive
this.archivePrefix = options.prefix

this.queueSize = 0
this.queue = options.queue || new PQueue({concurrency: 3})
this.reject = () => {
throw new Error('Asset handler errored before `finish()` was called')
}
}

clear() {
this.queue.clear()
this.queueSize = 0
}

finish() {
return new Promise((resolve, reject) => {
this.reject = reject
this.queue.onIdle().then(resolve)
})
}

// Called when we want to download all assets to local filesystem and rewrite documents to hold
// placeholder asset references (_sanityAsset: 'image@file:///local/path')
rewriteAssets = miss.through.obj((doc, enc, callback) => {
if (['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type)) {
const type = doc._type === 'sanity.imageAsset' ? 'image' : 'file'
const filePath = `${type}s/${generateFilename(doc._id)}`
this.queueAssetDownload(doc, filePath)
callback()
return
}

callback(null, this.findAndModify(doc, ACTION_REWRITE))
})

// Called in the case where we don't _want_ assets, so basically just remove all asset documents
// as well as references to assets (*.asset._ref ^= (image|file)-)
stripAssets = miss.through.obj((doc, enc, callback) => {
if (['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type)) {
callback()
return
}

callback(null, this.findAndModify(doc, ACTION_REMOVE))
})

// Called when we are using raw export mode along with `assets: false`, where we simply
// want to skip asset documents but retain asset references (useful for data mangling)
skipAssets = miss.through.obj((doc, enc, callback) => {
const isAsset = ['sanity.imageAsset', 'sanity.fileAsset'].includes(doc._type)
if (isAsset) {
callback()
return
}

callback(null, doc)
})

noop = miss.through.obj((doc, enc, callback) => callback(null, doc))

queueAssetDownload(assetDoc, dstPath) {
debug('Adding download task for %s (destination: %s)', assetDoc._id, dstPath)
this.queueSize++
this.queue.add(() => this.downloadAsset(assetDoc.url, dstPath))
}

async downloadAsset(url, dstPath) {
const headers = {'User-Agent': `${pkg.name}@${pkg.version}`}
const stream = await requestStream({url, headers})
const store = precompressedExts.includes(path.extname(dstPath))

if (stream.statusCode !== 200) {
this.archive.abort()
this.queue.clear()
this.reject(new Error(`Referenced asset URL "${url}" returned HTTP ${stream.statusCode}`))
return
}

debug('Asset stream ready, appending to archive at %s', dstPath)
this.archive.append(stream, {
name: path.basename(dstPath),
prefix: [this.archivePrefix, path.dirname(dstPath)].join('/'),
store
})
}

findAndModify = (item, action) => {
if (Array.isArray(item)) {
return item.map(child => this.findAndModify(child, action)).filter(Boolean)
}

if (!item || typeof item !== 'object') {
return item
}

const assetType = getAssetType(item)
if (assetType) {
if (action === ACTION_REMOVE) {
return undefined
}

if (action === ACTION_REWRITE) {
const filePath = `${assetType}s/${generateFilename(item.asset._ref)}`
return {
_sanityAsset: `${assetType}@file://./${filePath}`
}
}
}

return Object.keys(item).reduce((acc, key) => {
const value = item[key]
acc[key] = this.findAndModify(value, action)

if (typeof acc[key] === 'undefined') {
delete acc[key]
}

return acc
}, {})
}
}

function getAssetType(item) {
if (!item.asset || typeof item.asset._ref !== 'string') {
return null
}

const [, type] = item.asset._ref.match(/^(image|file)-/) || []
return type || null
}

function generateFilename(assetId) {
const [, , asset, ext] = assetId.match(/^(image|file)-(.*?)(-[a-z]+)?$/)
const extension = (ext || 'bin').replace(/^-/, '')
return `${asset}.${extension}`
}

function lookupAssetUrl(client, assetId) {
return client.fetch('*[_id == $id][0].url', {id: assetId})
}

module.exports = AssetHandler
1 change: 1 addition & 0 deletions packages/@sanity/export/src/debug.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module.exports = require('debug')('sanity:export')

0 comments on commit ee9aa4c

Please sign in to comment.