Permalink
Browse files

[input:bibtex] Refactor BibTeX parser

  • Loading branch information...
larsgw committed Aug 1, 2017
1 parent c5c8764 commit 3fba7435dc1e1796d03f1a59eaafda209afc5d88
Showing with 524 additions and 793 deletions.
  1. +18 −381 dev/bibtex.html
  2. +145 −207 lib/parse/bibtex/text.js
  3. +1 −1 lib/parse/input/type.js
  4. +1 −1 lib/parse/regex.js
  5. +7 −2 lib/util/index.js
  6. +135 −0 lib/util/stack.js
  7. +120 −194 src/parse/bibtex/text.js
  8. +1 −1 src/parse/input/type.js
  9. +1 −5 src/parse/regex.js
  10. +2 −1 src/util/index.js
  11. +93 −0 src/util/stack.js
View

Large diffs are not rendered by default.

Oops, something went wrong.
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -53,7 +53,7 @@ var parseInputType = function parseInputType(input) {
} else if (_regex2.default.wikidata[3].test(input)) {
return 'url/wikidata';
// BibTeX
} else if (_regex2.default.bibtex[0].test(input)) {
} else if (_regex2.default.bibtex.test(input)) {
return 'string/bibtex';
// Bib.TXT
} else if (_regex2.default.bibtxt.test(input)) {
View
@@ -12,8 +12,8 @@ Object.defineProperty(exports, "__esModule", {
*/
var regex = {
url: /^(https?:\/\/)?((([a-z\d]([a-z\d-]*[a-z\d])*)\.)+[a-z]{2,}|((\d{1,3}\.){3}\d{1,3})|localhost)(:\d+)?(\/[-a-z\d%_.~+:]*)*(\?[;&a-z\d%_.~+=-]*)?(#[-a-z\d_]*)?$/i,
bibtex: [/^(?:\s*@\s*[^@]+?\s*\{\s*[^@]+?\s*,\s*[^@]+\})+\s*$/, /^\s$/, /^[@{}"=,\\]$/],
bibtxt: /^\s*(\[.*?\]\s*(\n\s*[^[]((?!:)\S)+\s*:\s*.+?\s*)*\s*)+$/,
bibtex: /^(?:\s*@\s*[^@]+?\s*\{\s*[^@]+?\s*,\s*[^@]+\})+\s*$/,
wikidata: [/^\s*(Q\d+)\s*$/, /^\s*((?:Q\d+(?:\s+|,|))*Q\d+)\s*$/, /^(https?:\/\/(?:www\.)wikidata.org\/w\/api\.php(?:\?.*)?)$/, /\/(Q\d+)(?:[#?/]|\s*$)/],
json: [[/((?:\[|:|,)\s*)'((?:\\'|[^'])*?[^\\])?'(?=\s*(?:\]|}|,))/g, '$1"$2"'], [/((?:(?:"|]|}|\/[gmi]|\.|(?:\d|\.|-)*\d)\s*,|{)\s*)(?:"([^":\n]+?)"|'([^":\n]+?)'|([^":\n]+?))(\s*):/g, '$1"$2$3$4"$5:']],
doi: [/^\s*(https?:\/\/(?:dx\.)?doi\.org\/(10.\d{4,9}\/[-._;()/:A-Z0-9]+))\s*$/i, /^\s*(10.\d{4,9}\/[-._;()/:A-Z0-9]+)\s*$/i, /^\s*(?:(?:10.\d{4,9}\/[-._;()/:A-Z0-9]+)\s*)+$/i],
View
@@ -3,7 +3,7 @@
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.fetchId = exports.fetchFileAsync = exports.fetchFile = exports.deepCopy = exports.attr = undefined;
exports.TokenStack = exports.fetchId = exports.fetchFileAsync = exports.fetchFile = exports.deepCopy = exports.attr = undefined;
var _attr = require('./attr');
@@ -25,6 +25,10 @@ var _fetchId = require('./fetchId');
var _fetchId2 = _interopRequireDefault(_fetchId);
var _stack = require('./stack');
var _stack2 = _interopRequireDefault(_stack);
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } }
@@ -33,4 +37,5 @@ exports.attr = attr;
exports.deepCopy = _deepCopy2.default;
exports.fetchFile = _fetchFile2.default;
exports.fetchFileAsync = _fetchFileAsync2.default;
exports.fetchId = _fetchId2.default;
exports.fetchId = _fetchId2.default;
exports.TokenStack = _stack2.default;
View
@@ -0,0 +1,135 @@
'use strict';
Object.defineProperty(exports, "__esModule", {
value: true
});
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var memoize = function memoize() {};
// TODO jsdoc
var TokenStack = function () {
function TokenStack(array) {
_classCallCheck(this, TokenStack);
this.stack = array;
this.index = 0;
this.current = this.stack[this.index];
}
_createClass(TokenStack, [{
key: 'tokensLeft',
value: function tokensLeft() {
return this.stack.length - this.index;
}
}, {
key: 'matches',
value: function matches(pattern) {
return TokenStack.getMatchCallback(pattern)(this.current, this.index, this.stack);
}
}, {
key: 'consumeToken',
value: function consumeToken() {
var pattern = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : /^[\s\S]$/;
var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {},
_ref$inverse = _ref.inverse,
inverse = _ref$inverse === undefined ? false : _ref$inverse;
var token = this.current;
var match = TokenStack.getMatchCallback(pattern)(token, this.index, this.stack);
if (match) {
this.current = this.stack[++this.index];
} else {
throw new SyntaxError('Unexpected token at index ' + this.index + ': Expected ' + TokenStack.getPatternText(pattern) + ', got "' + token + '"');
}
return token;
}
}, {
key: 'consumeN',
value: function consumeN(length) {
if (this.tokensLeft() < length) {
throw new SyntaxError('Not enough tokens left');
}
var start = this.index;
while (length--) {
this.current = this.stack[++this.index];
}
return this.stack.slice(start, this.index).join('');
}
}, {
key: 'consume',
value: function consume() {
var pattern = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : /^[\s\S]$/;
var _ref2 = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {},
_ref2$min = _ref2.min,
min = _ref2$min === undefined ? 0 : _ref2$min,
_ref2$max = _ref2.max,
max = _ref2$max === undefined ? Infinity : _ref2$max,
_ref2$inverse = _ref2.inverse,
inverse = _ref2$inverse === undefined ? false : _ref2$inverse,
tokenMap = _ref2.tokenMap,
tokenFilter = _ref2.tokenFilter;
var start = this.index;
var match = TokenStack.getMatchCallback(pattern);
while (match(this.current, this.index, this.stack) !== inverse) {
this.current = this.stack[++this.index];
}
var consumed = this.stack.slice(start, this.index);
if (consumed.length < min) {
throw new SyntaxError('Not enough ' + TokenStack.getPatternText(pattern));
} else if (consumed.length > max) {
throw new SyntaxError('Too many ' + TokenStack.getPatternText(pattern));
}
if (tokenMap) {
consumed = consumed.map(tokenMap);
}
if (tokenFilter) {
consumed = consumed.filter(tokenFilter);
}
return consumed.join('');
}
}], [{
key: 'getPatternText',
value: function getPatternText(pattern) {
return '"' + (pattern instanceof RegExp ? pattern.source : pattern) + '"';
}
}, {
key: 'getMatchCallback',
value: function getMatchCallback(pattern) {
if (Array.isArray(pattern)) {
var matches = pattern.map(TokenStack.getMatchCallback);
return function (token) {
return matches.some(function (matchCallback) {
return matchCallback(token);
});
};
} else if (pattern instanceof Function) {
return pattern;
} else if (pattern instanceof RegExp) {
return function (token) {
return pattern.test(token);
};
} else {
return function (token) {
return pattern === token;
};
}
}
}]);
return TokenStack;
}();
exports.default = TokenStack;
Oops, something went wrong.

0 comments on commit 3fba743

Please sign in to comment.