Skip to content

Commit

Permalink
prep chunker for dual use
Browse files Browse the repository at this point in the history
  • Loading branch information
retorquere committed Mar 2, 2024
1 parent c805edb commit ef13f2f
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 62 deletions.
118 changes: 65 additions & 53 deletions chunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ const letter = new RegExp('[' + [
/\u00AA\u00BA\u01BB\u01C0-\u01C3\u0294\u05D0-\u05EA\u05F0-\u05F2\u0620-\u063F\u0641-\u064A\u066E-\u066F\u0671-\u06D3\u06D5\u06EE-\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u0800-\u0815\u0840-\u0858\u08A0-\u08B4\u0904-\u0939\u093D\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0-\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0-\u0CE1\u0CF1-\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065-\u1066\u106E-\u1070\u1075-\u1081\u108E\u10D0-\u10FA\u10FD-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17DC\u1820-\u1842\u1844-\u1877\u1880-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE-\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C77\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5-\u1CF6\u2135-\u2138\u2D30-\u2D67\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u3006\u303C\u3041-\u3096\u309F\u30A1-\u30FA\u30FF\u3105-\u312D\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FD5\uA000-\uA014\uA016-\uA48C\uA4D0-\uA4F7\uA500-\uA60B\uA610-\uA61F\uA62A-\uA62B\uA66E\uA6A0-\uA6E5\uA78F\uA7F7\uA7FB-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9E0-\uA9E4\uA9E7-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA6F\uAA71-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5-\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADC\uAAE0-\uAAEA\uAAF2\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF66-\uFF6F\uFF71-\uFF9D\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC/.source,

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on+guess, strict, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (off, off, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on, strict, true)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on, strict, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on+guess, as-needed, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (off, strict, true)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (off, as-needed, true)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on, as-needed, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (off, strict, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (off, as-needed, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on+guess, off, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on, off, true)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on+guess, strict, true)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on, off, false)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on+guess, off, true)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on+guess, as-needed, true)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (on, as-needed, true)

This line has a length of 3376. Maximum allowed is 240

Check warning on line 33 in chunker.ts

View workflow job for this annotation

GitHub Actions / test (off, off, true)

This line has a length of 3376. Maximum allowed is 240
].join('') + ']')

type Entry = {
type: string
key?: string
fields: Record<string, string>
}

export interface Chunk {
/**
* The text content of the chunk
Expand Down Expand Up @@ -77,45 +83,71 @@ export interface ParserOptions {
max_entries?: number
}

type Entry = {
type: string
key?: string
fields: Record<string, string>
}

class BibtexParser {
class BibTeXParser {
public parsing: string
public chunks: Chunk[]

public strings: Record<string, string>
public entries: Entry[]

private pos: number
// private linebreaks: { pos: number, line: number}[]
private input: string

private strings: Record<string, string>
private max_entries: number

reset(input) {
constructor(input: string, options: ParserOptions = {}) {
this.max_entries = options.max_entries || 0
this.input = input
this.pos = 0
this.entries = []
this.strings = {}
this.strings = {
jan: '01',
feb: '02',
mar: '03',
apr: '04',
may: '05',
jun: '06',
jul: '07',
aug: '08',
sep: '09',
oct: '10',
nov: '11',
dec: '12',
acmcs: 'ACM Computing Surveys',
acta: 'Acta Informatica',
cacm: 'Communications of the ACM',
ibmjrd: 'IBM Journal of Research and Development',
ibmsj: 'IBM Systems Journal',
ieeese: 'IEEE Transactions on Software Engineering',
ieeetc: 'IEEE Transactions on Computers',
ieeetcad: 'IEEE Transactions on Computer-Aided Design of Integrated Circuits',
ipl: 'Information Processing Letters',
jacm: 'Journal of the ACM',
jcss: 'Journal of Computer and System Sciences',
scp: 'Science of Computer Programming',
sicomp: 'SIAM Journal on Computing',
tocs: 'ACM Transactions on Computer Systems',
tods: 'ACM Transactions on Database Systems',
tog: 'ACM Transactions on Graphics',
toms: 'ACM Transactions on Mathematical Software',
toois: 'ACM Transactions on Office Information Systems',
toplas: 'ACM Transactions on Programming Languages and Systems',
tcs: 'Theoretical Computer Science',
}
this.parsing = null
this.chunks = []
// this.linebreaks = undefined
}

public parse(input, options: ParserOptions = {}): Chunk[] {
this.reset(input)
this.max_entries = options.max_entries || 0
public parse() {
this.bibtex()
return this.chunks
this.entries.reverse()
}
public async parseAsync(input, options: ParserOptions = {}): Promise<Chunk[]> {
this.reset(input)
this.max_entries = options.max_entries || 0

public async parseAsync() {
await this.bibtexAsync()
return this.chunks
this.entries.reverse()
}

private isWhitespace(s, horizontalOnly = false) {
Expand Down Expand Up @@ -242,17 +274,14 @@ class BibtexParser {
return values.join('')
}

private key(allowUnicode = false) {
private key() {
const start = this.pos
while (true) { // eslint-disable-line no-constant-condition
if (this.pos === this.input.length) {
throw new ParseError('Runaway key', this)
}

if (this.input[this.pos].match(/['a-zA-Z0-9&;_:\\./-]/)) {
this.pos++
}
else if (allowUnicode && this.input[this.pos].match(letter)) {
if (this.input[this.pos].match(/['a-zA-Z0-9&;_:\\./-]/) || this.input[this.pos].match(letter)) {
this.pos++
}
else {
Expand All @@ -279,8 +308,8 @@ class BibtexParser {
}
}

private entry(_d) {
this.entries[0].key = this.key(true)
private entry(d) {
this.entries.unshift({ type: d, key: this.key(), fields: {} })
this.match(',')
this.key_equals_value()
while (this.tryMatch(',')) {
Expand Down Expand Up @@ -310,23 +339,14 @@ class BibtexParser {
while (this.isWhitespace(this.input[this.pos], true)) this.pos++

if (this.input[this.pos] === '{') {
this.value_braces()
return
return this.value_braces()
}

const start = this.pos
while (this.input[this.pos] !== '\n' && this.pos < this.input.length) this.pos++
return this.input.substring(start, this.pos)
}

/*
private progress() {
const progress = Math.round((this.pos / this.input.length * 100) / 5) * 5 // eslint-disable-line no-magic-numbers
if (this._progress !== progress) {
this._progress = progress
process.stdout.write(` (${this._progress}%) `)
}
}
*/

private hasMore() {
if (this.max_entries && this.entries.length >= this.max_entries) return false
return (this.pos < this.input.length)
Expand Down Expand Up @@ -355,8 +375,6 @@ class BibtexParser {
}

private parseNext() {
// this.progress()

const chunk: Chunk = {
offset: {
pos: this.pos,
Expand Down Expand Up @@ -394,7 +412,6 @@ class BibtexParser {

default:
guard = this.matchGuard()
this.entries.unshift({ type: d, fields: {} })
this.entry(d)
this.match(guard)
chunk.entry = true
Expand Down Expand Up @@ -444,21 +461,16 @@ class BibtexParser {
*
* @returns array of chunks, with markers for type and errors (if any) found.
*/
export function parse(input: string, options: ParserOptions = {}): Chunk[] {
return (new BibtexParser).parse(input, options)
}

export function entries(input: string, options: ParserOptions = {}): { entries: Entry[], errors: Chunk[] } {
const parser = new BibtexParser
parser.parse(input, options)
return {
entries: parser.entries.reverse(),
errors: parser.chunks.filter(chunk => chunk.error),
}
export function parse(input: string, options: ParserOptions = {}): BibTeXParser {
const parser = new BibTeXParser(input, options)
parser.parse()
return parser
}

export const promises = {
async parse(input: string, options: ParserOptions = {}): Promise<Chunk[]> { // eslint-disable-line prefer-arrow/prefer-arrow-functions
return await (new BibtexParser).parseAsync(input, options)
async parse(input: string, options: ParserOptions = {}): Promise<BibTeXParser> { // eslint-disable-line prefer-arrow/prefer-arrow-functions
const parser = new BibTeXParser(input, options)
await parser.parseAsync()
return parser
},
}
1 change: 0 additions & 1 deletion docs/bibtex-parser.chunker.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

| Function | Description |
| --- | --- |
| [entries(input, options)](./bibtex-parser.chunker.entries.md) | |
| [parse(input, options)](./bibtex-parser.chunker.parse.md) | Reads the bibtex input and splits it into separate chunks of <code>string</code>s, <code>@comment</code>s, and bibtex entries. Useful for detecting if a file is bibtex file and for filtering out basic errors that would make the more sophisticated \[\[bibtex.parse\]\] reject the whole file |

## Interfaces
Expand Down
4 changes: 2 additions & 2 deletions docs/bibtex-parser.chunker.parse.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Reads the bibtex input and splits it into separate chunks of `string`<!-- -->s,
**Signature:**

```typescript
export declare function parse(input: string, options?: ParserOptions): Chunk[];
export declare function parse(input: string, options?: ParserOptions): BibTeXParser;
```

## Parameters
Expand All @@ -21,7 +21,7 @@ export declare function parse(input: string, options?: ParserOptions): Chunk[];

**Returns:**

[Chunk](./bibtex-parser.chunker.chunk.md)<!-- -->\[\]
BibTeXParser

array of chunks, with markers for type and errors (if any) found.

2 changes: 1 addition & 1 deletion docs/bibtex-parser.chunker.promises.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@

```typescript
promises: {
parse(input: string, options?: ParserOptions): Promise<Chunk[]>;
parse(input: string, options?: ParserOptions): Promise<BibTeXParser>;
}
```
6 changes: 3 additions & 3 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ class Parser {

public ast(input, clean = true): Node[] {
let parsed: Node[] = []
for (const chunk of chunker.parse(input)) {
for (const chunk of chunker.parse(input).chunks) {
const { children } = bibtex.parse(chunk.text, {...this.options, combining: combining.macros})
if (clean) this.clean(children)
parsed = parsed.concat(children)
Expand All @@ -631,15 +631,15 @@ class Parser {
}

public parse(input): Bibliography {
for (const chunk of chunker.parse(input)) {
for (const chunk of chunker.parse(input).chunks) {
this.parseChunk(chunk)
}
return this.parsed()
}

public async parseAsync(input): Promise<Bibliography> {
// eslint-disable-next-line @typescript-eslint/await-thenable
for (const chunk of await chunker.promises.parse(input)) {
for (const chunk of (await chunker.promises.parse(input)).chunks) {
this.parseChunk(chunk)
}
return this.parsed()
Expand Down
2 changes: 1 addition & 1 deletion snap.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/bash
TAP_SNAPSHOT=0 npm_config_test=apa-test npm_config_all=true npm test
TAP_SNAPSHOT=0 npm_config_test=664 npm_config_all=false npm test
2 changes: 1 addition & 1 deletion test/all.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ for (let [option, value] of Object.entries(process.env)) {
config[option] = value
}

if (config.snapshot || config.all) { // reset to all for snapshots
if (config.snapshot || (config.all === 'true')) { // reset to all for snapshots
for (const [key, value] of Object.entries(valid)) {
if (!process.env[prefix + key]) config[key] = value
}
Expand Down

0 comments on commit ef13f2f

Please sign in to comment.