Skip to content

Commit

Permalink
[input:bibjson] Update BibJSON support
Browse files Browse the repository at this point in the history
Update BibJSON input support.

Add docs.

Add tests.

Update README description.

Close #32
  • Loading branch information
larsgw committed Jul 14, 2018
1 parent 070cf12 commit d543f85
Show file tree
Hide file tree
Showing 7 changed files with 291 additions and 58 deletions.
2 changes: 1 addition & 1 deletion README.md
@@ -1,6 +1,6 @@
<p align="center"><img alt="Citation.js" src="https://citation.js.org/static/img/square_logo_medium.png" /></p>

Citation.js converts formats like BibTeX, Wikidata JSON and ContentMine JSON to CSL-JSON to convert to other formats like APA, Vancouver and back to BibTeX.
Citation.js converts formats like BibTeX, Wikidata JSON and BibJSON to CSL-JSON to convert to other formats like APA, Vancouver, RIS and back to BibTeX.

---

Expand Down
4 changes: 3 additions & 1 deletion docs/input_formats.md
Expand Up @@ -26,7 +26,9 @@ Below is a list of built-in input types.

## BibJSON

* `@bibjson/object`: [BibJSON](http://okfnlabs.org/bibjson/) object. Parses the data
* `@bibjson/record+object`: [BibJSON](http://okfnlabs.org/bibjson/) object. Parses the data
* `@bibjson/quickscrape+record+object`: BibJSON object with some [quickscrape](https://github.com/ContentMine/quickscrape) customisation. Parses the data
* `@bibjson/collection+object`: BibJSON object, with a list of records and some collection metadata. Parses the data

## CSL-JSON

Expand Down
45 changes: 39 additions & 6 deletions src/parse/modules/bibjson/index.js
Expand Up @@ -4,18 +4,51 @@

import * as json from './json'

let scraperLinks = ['fulltext_html', 'fulltext_xml', 'fulltext_pdf']

export const ref = '@bibjson'
export const parsers = {json}
export const formats = {
'@bibjson/object': {
parse: json.parse,
'@bibjson/quickscrape+record+object': {
parse: json.quickscrapeRecord,
parseType: {
dataType: 'SimpleObject',
propertyConstraint: {
props: ['fulltext_html', 'fulltext_xml', 'fulltext_pdf'],
props: 'link',
value (links) {
return scraperLinks.some(link => links.find(({type}) => type === link))
}
},
extends: '@bibjson/record+object'
}
},
'@bibjson/record+object': {
parse: json.record,
parseType: {
dataType: 'SimpleObject',
propertyConstraint: [{
props: 'title'
}, {
props: ['author', 'editor'],
match: 'some',
value: val => val && Array.isArray(val.value)
}
value (authors) {
return Array.isArray(authors) && authors[0] && 'name' in authors[0]
}
}]
}
},
'@bibjson/collection+object': {
parse (collection) {
return collection.records
},
parseType: {
dataType: 'SimpleObject',
propertyConstraint: [{
props: 'metadata',
value (metadata) { return 'collection' in metadata }
}, {
props: 'records',
value (records) { return Array.isArray(records) }
}]
}
}
}
169 changes: 150 additions & 19 deletions src/parse/modules/bibjson/json.js
Expand Up @@ -5,34 +5,165 @@
import parseDate from '../../date'
import parseName from '../../name'

function nameProps (person) {
let {
firstname,
lastname,
firstName: given = firstname,
lastName: family = lastname
} = person

if (given && family) {
return {given, family}
} else if (person.name) {
return parseName(person.name)
}
}

let identifiers = [
'PMID',
'PMCID',
'DOI',
'ISBN'
// 'URL' is actually the URL of the record collection, if I understand it correctly,
// and not of the record. Otherwise, it should be included.
]

let journalIdentifiers = [
'ISSN'
]

function idProps (input, identifiers) {
let output = {}

for (let prop in input) {
let upperCaseProp = prop.toUpperCase()

if (identifiers.includes(upperCaseProp)) {
output[upperCaseProp] = input[prop]
}
}

if (input.identifier) {
for (let {id, type = ''} of input.identifier) {
type = type.toUpperCase()
if (identifiers.includes(type)) {
output[type] = id
}
}
}

return output
}

// copied from BibTeX, as BibJSON is based on BibTeX
let typeMap = {
article: 'article',
book: 'book',
booklet: 'book',
proceedings: 'book',
mastersthesis: 'thesis',
inbook: 'chapter',
incollection: 'chapter',
conference: 'paper-conference',
inproceedings: 'paper-conference',
online: 'website',
patent: 'patent',
phdthesis: 'thesis',
techreport: 'report',
unpublished: 'manuscript',

// no mapping, fallback
manual: undefined,
misc: undefined
}

function quickscrapeSpecificProps () {
return { type: 'article-journal' }
}

function generalProps (input) {
let output = {
type: typeMap[input.type] || 'book'
}

if (input.title) { output.title = input.title }
if (input.author) { output.author = input.author.map(nameProps).filter(Boolean) }
if (input.editor) { output.editor = input.editor.map(nameProps).filter(Boolean) }
if (input.reviewer) {
if (input.author) { output['reviewed-author'] = output.author }
output.author = input.reviewer.map(nameProps).filter(Boolean)
}

if (Array.isArray(input.keywords)) {
output.keyword = input.keywords.join()
} else if (input.keywords) {
output.keyword = input.keywords
}

if (input.publisher) { output.publisher = input.publisher.name || input.publisher }

if (input.date && Object.keys(input.date).length > 0) {
let dates = input.date
if (dates.submitted) { output.submitted = parseDate(dates.submitted) }
if (dates.published) { output.issued = parseDate(dates.published) }
} else if (input.year) {
output.issued = {'date-parts': [[+input.year]]}
}
if (input.journal) {
let journal = input.journal
if (journal.name) { output['container-title'] = journal.name }
if (journal.volume) { output.volume = +journal.volume }
if (journal.issue) { output.issue = +journal.issue }

Object.assign(output, idProps(journal, journalIdentifiers))

if (journal.firstpage) { output['page-first'] = journal.firstpage }
if (journal.pages) {
output.page = journal.pages.replace('--', '-')
} else if (journal.firstpage && journal.lastpage) {
output.page = journal.firstpage + '-' + journal.lastpage
}
}

if (input.link && typeof input.link[0] === 'object') {
output.URL = input.link[0].url
}

Object.assign(output, idProps(input, identifiers))

if (input.cid) {
output.id = input.cid
} else if (output.DOI) {
output.id = output.DOI
}

return output
}

/**
* Forat ContentMine data
* Parse ContentMine quickscrape data
*
* @access protected
* @method parseContentMine
*
* @param {Object} data - The input data
*
* @return {Array<CSL>} The formatted input data
*/
const parseContentMine = function (data) {
const res = {
type: 'article-journal'
}

Object.keys(data).forEach((prop) => { res[prop] = data[prop].value[0] })

if (res.hasOwnProperty('authors')) { res.author = data.authors.value.map(parseName) }
if (res.hasOwnProperty('firstpage')) {
res.page = res['page-first'] = res.firstpage
}
if (res.hasOwnProperty('date')) { res.issued = parseDate(res.date) }
if (res.hasOwnProperty('journal')) { res['container-title'] = res.journal }
if (res.hasOwnProperty('doi')) { res.id = res.DOI = res.doi }
return Object.assign(generalProps(data), quickscrapeSpecificProps(data))
}

return res
/**
* Parse BibJSON data
*
* @access protected
* @param {Object} data - The input data
* @return {Array<CSL>} The formatted input data
*/
const parseBibJson = function (data) {
return generalProps(data)
}

export {
parseContentMine as parse
parseContentMine as quickscrapeRecord,
parseBibJson as record
}
3 changes: 2 additions & 1 deletion test/input.spec.js
Expand Up @@ -53,7 +53,8 @@ const configs = {
},

// @bibjson
'@bibjson/object': [input.bibjson.simple, output.bibjson.simple],
'@bibjson/quickscrape+record+object': [input.bibjson.quickscrape, output.bibjson.quickscrape],
'@bibjson/record+object': [input.bibjson.simple, output.bibjson.simple],

// @else
'@else/json': {
Expand Down
85 changes: 72 additions & 13 deletions test/input/parse.json
Expand Up @@ -49,19 +49,78 @@
},
"bibjson": {
"simple": {
"publisher": {"value": ["BioMed Central"]},
"journal": {"value": ["Journal of Ethnobiology and Ethnomedicine"]},
"title": {"value": ["Gitksan medicinal plants-cultural choice and efficacy"]},
"authors": {"value": ["Leslie Main Johnson"]},
"date": {"value": ["2006-06-21"]},
"doi": {"value": ["10.1186/1746-4269-2-29"]},
"volume": {"value": ["2"]},
"issue": {"value": ["1"]},
"firstpage": {"value": ["1"]},
"fulltext_html": {"value": ["http://ethnobiomed.biomedcentral.com/articles/10.1186/1746-4269-2-29"]},
"fulltext_pdf": {"value": ["http://ethnobiomed.biomedcentral.com/track/pdf/10.1186/1746-4269-2-29?site=http://ethnobiomed.biomedcentral.com"]},
"license": {"value": ["This article is published under license to BioMed Central Ltd. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited."]},
"copyright": {"value": ["2006 Johnson; licensee BioMed Central Ltd."]}
"type": "article",
"title": "On a family of symmetric Bernoulli convolutions",
"author": [
{
"name": "Erdös, Paul"
}
],
"journal": {
"name": "American Journal of Mathematics",
"identifier": [
{
"id": "0002-9327",
"type": "issn"
}
],
"volume": "61",
"pages": "974--976"
},
"year": "1939",
"owner": "me",
"id": "ID_1",
"collection": "my_collection",
"url": "http://example.com/me/my_collection/ID_1",
"link":[
{
"url": "http://okfn.org",
"anchor": "Open Knowledge Foundation"
}
]
},
"quickscrape": {
"title": "Gitksan medicinal plants-cultural choice and efficacy",
"link": [
{
"type": "fulltext_html",
"url": "https://ethnobiomed.biomedcentral.com/articles/10.1186/1746-4269-2-29"
},
{
"type": "fulltext_pdf",
"url": "https://ethnobiomed.biomedcentral.com/track/pdf/10.1186/1746-4269-2-29"
}
],
"author": [
{
"name": "Leslie Main Johnson",
"institution": "Centre for Work and Community Studies and Centre for Integrated Studies, Athabasca University, Athabasca, Canada"
},
{
"institution": "Anthropology Department, University of Alberta, Edmonton, Canada"
}
],
"publisher": { "name": "BioMed Central" },
"journal": {
"volume": "2",
"issue": "1",
"firstpage": "29",
"name": "Journal of Ethnobiology and Ethnomedicine",
"issn": "1746-4269"
},
"sections": {
"abstract": {
"text": "The use of plants for healing by any cultural group is integrally related to local concepts of the nature of disease, the nature of plants, and the world view of the culture. The physical and chemical properties of the plants themselves also bear on their selection by people for medicines, as does the array of plants available for people to choose from. I examine use of medicinal plants from a "
},
"description": {
"text": "The use of plants for healing by any cultural group is integrally related to local concepts of the nature of disease, the nature of plants, and the world view of the culture. The physical and chemical properties of the plants themselves also bear on their selection by people for medicines, as does the array of plants available for people to choose from. I examine use of medicinal plants from a "
}
},
"date": { "published": "2006-06-21T00:00:00+02:00" },
"identifier": [ { "type": "doi", "id": "10.1186/1746-4269-2-29" } ],
"license": [ { "raw": "This article is published under license to BioMed Central Ltd. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited." } ],
"copyright": [ "2006 Johnson; licensee BioMed Central Ltd." ],
"log": [ { "date": "2018-07-12T18:35:20+02:00", "event": "scraped by quickscrape v0.4.7" } ]
}
},
"csl": {
Expand Down

0 comments on commit d543f85

Please sign in to comment.