/
index.ts
117 lines (102 loc) · 3.95 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/**
* [[include:src/codecs/elife/README.md]]
*
* @module codecs/elife
*/
import stencila from '@stencila/schema'
import { JatsCodec } from '../jats'
import * as xml from '../../util/xml'
import { http } from '@stencila/jesta'
import * as vfile from '../../util/vfile'
import { Codec } from '../types'
import { getLogger } from '@stencila/logga'
import fs from 'fs-extra'
import path from 'path'
import tempy from 'tempy'
const log = getLogger('encoda:elife')
const jats = new JatsCodec()
export class ElifeCodec extends Codec implements Codec {
private static regex =
/^\s*((elife\s*:?\s*)|(https?:\/\/elifesciences\.org\/articles\/))(\d{5})(v(\d))?\s*$/i
public readonly sniff = (content: string): Promise<boolean> => {
return Promise.resolve(ElifeCodec.regex.exec(content) !== null)
}
public readonly decode = async (
file: vfile.VFile
): Promise<stencila.Node> => {
const content = await vfile.dump(file)
const match = ElifeCodec.regex.exec(content)
if (match === null) throw new Error('Unable to parse content')
const article = match[4]
let version = match[6]
if (version === undefined) {
// Version unspecified, so fallback to version 1, but...
version = '1'
// Try to determine the latest version using the Github API.
// This search is intended to return the latest version of the
// article with a `<body>` element
const url = `https://api.github.com/search/code?q=<body>+repo:elifesciences/elife-article-xml+language:xml+filename:elife-${article}-`
const { statusCode, body } = await http.get(url)
if (statusCode === 200) {
const { items = [] } = JSON.parse(body)
if (items.length > 0) {
const versions: string[] = items
.map((item: { name: string }) => item.name)
.sort()
const last = versions[versions.length - 1]
const match = new RegExp(`^elife-${article}-v(\\d+).xml$`).exec(last)
if (match !== null) {
version = match[1]
} else {
log.error(
`Unable to determine latest version number from filename "${last}"`
)
}
} else {
log.error(
`Unable to find a version of article "${article}" with body content. Is the article number correct?`
)
}
}
}
const url = `https://raw.githubusercontent.com/elifesciences/elife-article-xml/master/articles/elife-${article}-v${version}.xml`
const { statusCode, body } = await http.get(url)
if (statusCode !== 200) {
log.error(`Unable to find eLife article with id: ${article}`)
return stencila.article()
}
const doc = xml.load(body)
// Check that there is a <body> element, some don't have one
if (xml.all(doc, 'body').length === 0) {
log.error(
`Article ${article}v${version} has no body. You may wish to try another version.`
)
}
const dir = tempy.directory()
await fs.ensureDir(dir)
// Get the figures and rewrite hrefs
const graphics = xml.all(doc, 'graphic')
for (const graphic of graphics) {
let href = xml.attr(graphic, 'xlink:href')
if (href?.startsWith('elife')) {
if (!href.endsWith('.tif')) href += '.tif'
const url = `https://iiif.elifesciences.org/lax:${article}%2F${href}/full/full/0/default.jpg`
const filename = href
.replace(`elife-${article}-`, '')
.replace(`-v${version}.tif`, '.jpg')
const filepath = path.join(dir, filename)
await http.download(url, filepath)
if (graphic.attributes !== undefined) {
graphic.attributes['xlink:href'] = filepath
graphic.attributes['mime-subtype'] = 'jpeg'
}
}
}
// Dump the new JATS with `xlink:href`s to local images
const jatsNew = xml.dump(doc)
return jats.load(jatsNew)
}
public readonly encode = (): Promise<vfile.VFile> => {
throw new Error(`Encoding to an eLife article is not yet implemented`)
}
}