diff --git a/content/auto-export.ts b/content/auto-export.ts index 5846fa67b4..84b03d6bae 100644 --- a/content/auto-export.ts +++ b/content/auto-export.ts @@ -165,7 +165,6 @@ if (Preference.autoExportIdleWait < 1) Preference.autoExportIdleWait = 1 const queue = new class TaskQueue { private scheduler = new Scheduler('autoExportDelay', 1000) private autoexports: any - private started = false private idleService = Components.classes['@mozilla.org/widget/idleservice;1'].getService(Components.interfaces.nsIIdleService) constructor() { @@ -173,11 +172,10 @@ const queue = new class TaskQueue { } public start() { - if (this.started) return - this.started = true if (Preference.autoExport === 'immediate') this.resume('startup') - this.idleService.addIdleObserver(this, Preference.autoExportIdleWait * 1000) + // really dumb but the idle service deals with msecs wverywhere -- except add, which is in seconds + this.idleService.addIdleObserver(this, Preference.autoExportIdleWait) Zotero.Notifier.registerObserver(this, ['sync'], 'BetterBibTeX', 1) } @@ -186,15 +184,15 @@ const queue = new class TaskQueue { this.autoexports = autoexports } - public pause(reason: 'startup' | 'end-of-idle' | 'start-of-sync' | 'trigger-change') { - log.debug('idle?: queue paused:', reason) + public pause(reason: 'startup' | 'end-of-idle' | 'start-of-sync' | 'preference-change') { + log.debug('on-idle: queue.pause:', reason) this.scheduler.paused = true } - public resume(reason: 'startup' | 'end-of-sync' | 'start-of-idle' | 'trigger-change') { - log.debug('idle?: queue resume request:', reason) + public resume(reason: 'startup' | 'end-of-sync' | 'start-of-idle' | 'preference-change') { + log.debug('on-idle: queue.resume:', reason) if (Zotero.Sync.Runner.syncInProgress) { - log.debug('idle?: queue not resumed: sync in progress, end-of-sync will trigger resume') + log.debug('on-idle: queue not resumed: sync in progress, end-of-sync will trigger resume') this.scheduler.paused = true return } @@ -202,14 +200,14 @@ const queue = new class TaskQueue { const is_idle = this.idleService.idleTime >= Preference.autoExportIdleWait * 1000 switch (Preference.autoExport) { case 'off': - log.debug('idle?: queue not resumed: auto-export is off') + log.debug('on-idle: queue not resumed: auto-export is off') this.scheduler.paused = true return case 'idle': // don't re-schedule idle for end-of-sync / should never happen? if (!is_idle) { - log.debug('idle?: queue not resumed:', reason, "but we're not actually idle") + log.debug('on-idle: queue not resumed:', reason, "but we're not actually idle") this.scheduler.paused = true return } @@ -219,7 +217,7 @@ const queue = new class TaskQueue { break } - log.debug('idle?: queue resumed:', reason) + log.debug('on-idle: queue resumed:', reason) this.scheduler.paused = false } @@ -308,6 +306,7 @@ const queue = new class TaskQueue { } } + log.debug('on-idle: starting auto-export') await Promise.all(jobs.map(job => Translators.exportItems(ae.translatorID, displayOptions, job.scope, job.path))) await repo.push(l10n.localize('Preferences.auto-export.git.message', { type: Translators.byId[ae.translatorID].label.replace('Better ', '') })) @@ -330,17 +329,23 @@ const queue = new class TaskQueue { } // idle observer - protected observe(subject, topic, data) { - log.debug('idle?: observer:', { subject, topic, data }) - if (!this.started || Preference.autoExport === 'off') return + protected observe(_subject, topic, data) { + log.debug('on-idle: idle.observe:', { topic, data }) + if (Preference.autoExport === 'off') { + log.debug('on-idle: idle.observe: auto-export is off') + this.pause('preference-change') + return + } switch (topic) { case 'back': case 'active': + log.debug('on-idle: idle.observe: => pause', topic) this.pause('end-of-idle') break case 'idle': + log.debug('on-idle: idle.observe: => resume', topic) this.resume('start-of-idle') break @@ -354,16 +359,20 @@ const queue = new class TaskQueue { // It is theoretically possible that auto-export is paused because Zotero is idle and then restarted when the sync finishes, but // I can't see how a system can be considered idle when Zotero is syncing. protected notify(action, type) { - if (!this.started || Preference.autoExport === 'off') return + if (Preference.autoExport === 'off') { + log.debug('on-idle: sync.notify: auto-export is off') + this.pause('preference-change') + return + } switch(`${type}.${action}`) { case 'sync.start': - log.debug('idle?: sync started => pausing queue') + log.debug('on-idle: sync.notify: started => pausing queue') this.pause('start-of-sync') break case 'sync.finish': - log.debug('idle?: sync finished => resuming queue') + log.debug('on-idle: sync.notify: finished => resuming queue') this.resume('end-of-sync') break @@ -512,9 +521,9 @@ Events.on('preference-changed', pref => { switch (Preference.autoExport) { case 'immediate': - queue.resume('trigger-change') + queue.resume('preference-change') break default: // off / idle - queue.pause('trigger-change') + queue.pause('preference-change') } }) diff --git a/pandoc/Makefile b/pandoc/Makefile index 250ba45598..b62a536f01 100644 --- a/pandoc/Makefile +++ b/pandoc/Makefile @@ -11,7 +11,7 @@ count: bundle: ifeq ($(shell grep '^local pl =' *.lua), ) echo "print('zotero-live-citations $(shell git rev-parse --short HEAD)')" > $(DEPLOYED) - ~/.luarocks/bin/amalg.lua -o $(BUNDLED) -s $(MAIN) lunajson lunajson.decoder lunajson.encoder lunajson.sax locator utils zotero + /usr/local/bin/amalg.lua -o $(BUNDLED) -s $(MAIN) lunajson lunajson.decoder lunajson.encoder lunajson.sax locator utils zotero cat $(BUNDLED) >> $(DEPLOYED) rm $(BUNDLED) else @@ -19,10 +19,13 @@ else @exit 1 endif +test-deployed: + @rm -f *.docx *.odt *.json + pandoc -s --lua-filter=$(DEPLOYED) -o paper$(TIMESTAMP).odt main.md test: @rm -f *.docx *.odt *.json #@pandoc -s --lua-filter=$(MAIN) -o paper$(TIMESTAMP).docx main.md - @pandoc -s --lua-filter=$(MAIN) -o paper$(TIMESTAMP).odt main.md + pandoc -s --lua-filter=$(MAIN) -o paper$(TIMESTAMP).odt main.md pandoc -s --metadata=zotero_scannable_cite:true --lua-filter=$(MAIN) -o paper$(TIMESTAMP)-scannable-cite.odt main.md jurism: diff --git a/pandoc/zotero.lua b/pandoc/zotero.lua index deeedbd1ca..89ff39c53e 100644 --- a/pandoc/zotero.lua +++ b/pandoc/zotero.lua @@ -10,23 +10,36 @@ local state = { module.citekeys = {} -function module.authors(csl) - if csl.author == nil then - return nil - end - +function module.authors(csl_or_item) local authors = {} local author - for _, author in ipairs(csl.author) do - if author.literal ~= nil then - table.insert(authors, author.literal) - elseif author.family ~= nil then - table.insert(authors, author.family) + + if csl_or_item.author ~= nil then + for _, author in ipairs(csl_or_item.author) do + if author.literal ~= nil then + table.insert(authors, author.literal) + elseif author.family ~= nil then + table.insert(authors, author.family) + end end + + elseif csl_or_item.creators ~= nil then + for _, author in ipairs(csl_or_item.creators) do + if author.name ~= nil then + table.insert(authors, author.name) + elseif author.lastName ~= nil then + table.insert(authors, author.lastName) + end + end + + elseif csl_or_item.reporter ~= nil then + table.insert(authors, csl_or_item.reporter) end + if utils.tablelength(authors) == 0 then return nil end + local last = table.remove(authors) if utils.tablelength(authors) == 0 then return last diff --git a/site/content/citing/_index.md.orig b/site/content/citing/_index.md.orig new file mode 100644 index 0000000000..50a080ac6a --- /dev/null +++ b/site/content/citing/_index.md.orig @@ -0,0 +1,112 @@ +--- +title: Citation Keys +weight: 3 +aliases: + - /Citation-Keys + - /citation-keys +tags: + - citation keys +--- + +## Generating citekeys for your items + +The BibTeX citations keys generated by the standard Zotero exporters are always generated at time of export, using an algorithm that usually generates unique keys. For serious LaTeX users, "usually" presents the following problems: + +* If a non-unique key is generated, which one gets postfixed with a distinguishing character is essentially + non-deterministic. +* The keys are *always* auto-generated, so if you correct a typo in the author name or title, the key will change +* You can't see the citation keys until you export them + +For a LaTeX author, the citation keys have their own meaning, fully separate from the other entry data, even if +people usually pick a naming scheme related to them. As the citation key is *the* piece of data that connects your +bibliography, this is a piece of data you want to have control over. BBT offers you this control: + +* Stable citation keys, without key clashes. BBT generates citation keys that take into account other existing keys in your library in a deterministic way, regardless of what part of your library you export, or the order in which you do it. +* BBT is conservative about citation key changes, and allows you to fix keys to any value of your choosing. +* Generate citation keys from JabRef(-ish) patterns. + +You can also + +* Drag and drop LaTeX citations using these keys to your favorite LaTeX editor +* Show your citation keys in the item list view. + +## Set your own, fixed citation keys + +By default, BBT generates the citation key from the item information, and this key may change when you edit the item. Such keys are called `dynamic` keys, which are marked with a pushpin the item list view and in the item details to distinguish them from dynamic keys. + +You can fix the citation key (called `pinning` in BBT) for an item by adding the text `Citation Key: ` anywhere in the +`extra` field of the item on a line of its own. You can generate a pinned citation key by selecting one or more items, right-clicking, and selecting `Generate BibTeX key`, which will add the current citation key to the `extra` field, thereby pinning it. + +## Drag and drop/hotkey citations + +You can drag and drop citations into your LaTeX/Markdown/Orgmode editor, and it will add a proper `\cite{citekey}`/`[@citekey]`/`[[zotero://select...][@citekey]`. The `cite` command is +configurable for LaTeX by setting the config option in the [preferences]({{< ref "installation/preferences" >}}). Do not include the leading backslash. + +This feature requires a one-time setup: choose the Quick Copy format under the `Citation keys` preferences for BBT, and go to Zotero preferences, tab Export, under Default Output Format, select "Better BibTeX Quick Copy: [format you just selected]". + +## Find duplicate keys through integration with [Report Customizer](https://github.com/retorquere/zotero-report-customizer) + +The plugin will generate BibTeX comments to show whether a key conflicts and with which entry. BBT integrates with +[Zotero: Report Customizer](https://github.com/retorquere/zotero-report-customizer) to display the BibTeX key plus any +conflicts between them in the zotero report. + +## Configurable citekey generator + +BBT also implements a citekey generator for those entries that don't have a citekey set explicitly; the formatter pattern language used to follow +the [JabRef key formatting syntax](https://help.jabref.org/en/BibtexKeyPatterns), but now uses a javascript-ish format. You can set your generator pattern in the Better BibTeX +preferences (you can get there via the Zotero preferences, or by clicking the Better BibTeX "Preferences" button in the addons pane. + +The default key pattern is `auth.lower + shorttitle(3,3) + year`; if you have papers that use keys which were generated by the key generator of the standard Bib(La)TeX exporters of Zotero you may want to use `zotero.clean` instead in order to ease migration from existing exports for people who previously used the standard Zotero Bib(La)TeX exports. You will be offered this choice on first run of BBT. + +A common pattern is `auth.lower + year`, which means + +1. last name of first author without spaces, in lowercase +2. year of publication if any, +3. a letter postfix (a, b, c, etc) in case of a clash (this part is always added, you can't disable it, although you can change it to Zotero-style numeric) + +Changing a pattern will only affect items created/changed after you changed the pattern; existing keys are not automatically regenerated when you change the pattern. If you want your keys to update after a pattern change you will have to select your items, right-click, and select `Refresh`. This will not affect keys you have pinned. + +If you want to get fancy, you can set multiple patterns separated by a vertical bar, of which the first will be applied +that yields a non-empty string. If all return a empty string, a random key will be generated. + +An example application for this behavior is to use the `tex.shortauthor` from the [extra field]({{< ref "../exporting/extra-fields" >}}) when defined to generate short citation keys for entries with long group author names, but to default to `auth.lower` otherwise: + +```text +extra('tex.shortauthor').transliterate.clean.lower.len + year | auth.lower + year +``` + +You can add a verbatim text by just including it in single or double quotes: + +```text +extra('tex.shortauthor').transliterate.clean.lower.len + year | 'default' + auth.lower + year +``` + +### Generating citekeys + +To generate your citekeys, you use a formula composed of functions and filters. Broadly, functions grab text from your item, and filters transform that text. **Note that the formula syntax has changed from a bracketed format to a javascript-ish format**. The old syntax was getting harder to maintain and its inflexibility prevented new extensions to the functions being implemented cleanly. **The old syntax still works** and will be translated to the new format automatically and displayed below the old format if you use it. At some point not too far away, BBT will automatically upgrade old patterns and use those directly. But for now you can choose which format you use. + +Below you will find a full list of functions and filters you can use, in the new format only, sorry. You can till use these in the old syntax, but they support only positional parameters, where I would recommend generally to use the new syntax with named parameters. + +#### Functions + +{{< citekey-formatters/functions >}} + +**Note**: All `auth...` functions will fall back to editors if no authors are present on the item. + +**Note**: the functions above all have the `clean` filter (see below) applied to them by default. You can turn that off by passying `clean=false`. + +### Direct access to unprocessed fields + +The above functions all retrieve information stored in the item's fields and process it in some way. If you don't want this, you can instead call field contents without any processing. To access Zotero fields, refer to them as given in the table below: + +{{< citekey-formatters/fields >}} + +(fields marked JM are only available in Juris-M). + +#### Filters + +{{< citekey-formatters/filters >}} + +*Usage note*: the functions `condense`, `skipwords`, `capitalize` and `select` rely on whitespaces for word handling. Most functions strip +whitespace and thereby make these filter functions sort of useless. You will in general want to use the fields from the +table above, which give you the values from Zotero without any changes. The fields with `**` are only available in Juris-M. diff --git a/site/content/exporting/zotero.lua b/site/content/exporting/zotero.lua index 3dc2dcb6a4..36c65c9675 100644 --- a/site/content/exporting/zotero.lua +++ b/site/content/exporting/zotero.lua @@ -1,4 +1,4 @@ -print('zotero-live-citations 993af1f79') +print('zotero-live-citations f34bfc770') do local _ENV = _ENV package.preload[ "locator" ] = function( ... ) local arg = _G.arg; @@ -1680,23 +1680,36 @@ local state = { module.citekeys = {} -function module.authors(csl) - if csl.author == nil then - return nil - end - +function module.authors(csl_or_item) local authors = {} local author - for _, author in ipairs(csl.author) do - if author.literal ~= nil then - table.insert(authors, author.literal) - elseif author.family ~= nil then - table.insert(authors, author.family) + + if csl_or_item.author ~= nil then + for _, author in ipairs(csl_or_item.author) do + if author.literal ~= nil then + table.insert(authors, author.literal) + elseif author.family ~= nil then + table.insert(authors, author.family) + end end + + elseif csl_or_item.creators ~= nil then + for _, author in ipairs(csl_or_item.creators) do + if author.name ~= nil then + table.insert(authors, author.name) + elseif author.lastName ~= nil then + table.insert(authors, author.lastName) + end + end + + elseif csl_or_item.reporter ~= nil then + table.insert(authors, csl_or_item.reporter) end + if utils.tablelength(authors) == 0 then return nil end + local last = table.remove(authors) if utils.tablelength(authors) == 0 then return last diff --git a/site/content/exporting/zotero.lua.orig b/site/content/exporting/zotero.lua.orig new file mode 100644 index 0000000000..3dc2dcb6a4 --- /dev/null +++ b/site/content/exporting/zotero.lua.orig @@ -0,0 +1,2158 @@ +print('zotero-live-citations 993af1f79') +do +local _ENV = _ENV +package.preload[ "locator" ] = function( ... ) local arg = _G.arg; +local module = {} + +local labels = { + book = 'book', + ['bk.'] = 'book', + ['bks.'] = 'book', + chapter = 'chapter', + ['chap.'] = 'chapter', + ['chaps.'] = 'chapter', + column = 'column', + ['col.'] = 'column', + ['cols.'] = 'column', + figure = 'figure', + ['fig.'] = 'figure', + ['figs.'] = 'figure', + folio = 'folio', + ['fol.'] = 'folio', + ['fols.'] = 'folio', + number = 'number', + ['no.'] = 'number', + ['nos.'] = 'number', + line = 'line', + ['l.'] = 'line', + ['ll.'] = 'line', + note = 'note', + ['n.'] = 'note', + ['nn.'] = 'note', + opus = 'opus', + ['op.'] = 'opus', + ['opp.'] = 'opus', + page = 'page', + ['p.'] = 'page', + ['pp.'] = 'page', + paragraph = 'paragraph', + ['para.'] = 'paragraph', + ['paras.'] = 'paragraph', + part = 'part', + ['pt.'] = 'part', + ['pts.'] = 'part', + section = 'section', + ['sec.'] = 'section', + ['secs.'] = 'section', + ['sub verbo'] = 'sub verbo', + ['s.v.'] = 'sub verbo', + ['s.vv.'] = 'sub verbo', + verse = 'verse', + ['v.'] = 'verse', + ['vv.'] = 'verse', + volume = 'volume', + ['vol.'] = 'volume', + ['vols.'] = 'volume' +} + +function module.short_labels() + local sl = {} + for k, v in pairs(labels) do + if not sl[v] or string.len(k) < string.len(sl[v]) then + sl[v] = k + end + end + + for k, v in pairs(labels) do + labels[k] = sl[v] + end +end + +local function get_label(locator) + local s, e, label, remaining = string.find(locator, '^(%l+.?) *(.*)') + if label and labels[label:lower()] then + return labels[label:lower()], remaining + else + return labels['page'], locator + end +end + +local function parse(suffix) + if not suffix then + return nil, nil, suffix + end + + local s, e, locator, label, remaining + local _suffix = suffix + + s, e, locator = string.find(_suffix, '^{([^{}]+)}$') + if locator then + label, locator = get_label(locator) + return label, locator, nil + end + + local s, e, locator, remaining = string.find(_suffix, '^{([^{}]+)}, *(.*)') + if locator then + label, locator = get_label(locator) + return label, locator, remaining + end + + s, e, locator = string.find(_suffix, '^, *{([^{}]+)}$') + if locator then + label, locator = get_label(locator) + return label, locator, nil + end + + s, e, locator, remaining = string.find(_suffix, '^, *{([^{}]+)} *(.*)') + if locator then + label, locator = get_label(locator) + return label, locator, remaining + end + + if not string.find(_suffix, '^, .') then + return nil, nil, suffix + end + + s, e, label, remaining = string.find(_suffix, '^, *(%l+%.?) *(.*)') + if label and labels[label:lower()] then + label = labels[label:lower()] + _suffix = ', ' .. remaining + else + label = labels['page'] + end + + local _locator = '' + local loc + while true do + s, e, loc, remaining = string.find(_suffix, '^(, *[^, ]+)(.*)') + if loc then + _locator = _locator .. loc + _suffix = remaining + else + break + end + end + + if _locator ~= '' then + if _suffix == '' then + _suffix = nil + end + + _locator = _locator:gsub('^, *', '') + + return label, _locator, _suffix + end + + return nil, nil, suffix +end + +function module.parse(suffix) + label, locator, suffix = parse(suffix) + if label == labels['page'] then + label = nil + end + return label, locator, suffix +end + +return module +end +end + +do +local _ENV = _ENV +package.preload[ "lunajson" ] = function( ... ) local arg = _G.arg; +local newdecoder = require 'lunajson.decoder' +local newencoder = require 'lunajson.encoder' +local sax = require 'lunajson.sax' +-- If you need multiple contexts of decoder and/or encoder, +-- you can require lunajson.decoder and/or lunajson.encoder directly. +return { + decode = newdecoder(), + encode = newencoder(), + newparser = sax.newparser, + newfileparser = sax.newfileparser, +} +end +end + +do +local _ENV = _ENV +package.preload[ "lunajson.decoder" ] = function( ... ) local arg = _G.arg; +local setmetatable, tonumber, tostring = + setmetatable, tonumber, tostring +local floor, inf = + math.floor, math.huge +local mininteger, tointeger = + math.mininteger or nil, math.tointeger or nil +local byte, char, find, gsub, match, sub = + string.byte, string.char, string.find, string.gsub, string.match, string.sub + +local function _decode_error(pos, errmsg) + error("parse error at " .. pos .. ": " .. errmsg, 2) +end + +local f_str_ctrl_pat +if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_str_ctrl_pat = '[^\32-\255]' +else + f_str_ctrl_pat = '[\0-\31]' +end + +local _ENV = nil + + +local function newdecoder() + local json, pos, nullv, arraylen, rec_depth + + -- `f` is the temporary for dispatcher[c] and + -- the dummy for the first return value of `find` + local dispatcher, f + + --[[ + Helper + --]] + local function decode_error(errmsg) + return _decode_error(pos, errmsg) + end + + --[[ + Invalid + --]] + local function f_err() + decode_error('invalid value') + end + + --[[ + Constants + --]] + -- null + local function f_nul() + if sub(json, pos, pos+2) == 'ull' then + pos = pos+3 + return nullv + end + decode_error('invalid value') + end + + -- false + local function f_fls() + if sub(json, pos, pos+3) == 'alse' then + pos = pos+4 + return false + end + decode_error('invalid value') + end + + -- true + local function f_tru() + if sub(json, pos, pos+2) == 'rue' then + pos = pos+3 + return true + end + decode_error('invalid value') + end + + --[[ + Numbers + Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp) + is captured as a number and its conformance to the JSON spec is checked. + --]] + -- deal with non-standard locales + local radixmark = match(tostring(0.5), '[^0-9]') + local fixedtonumber = tonumber + if radixmark ~= '.' then + if find(radixmark, '%W') then + radixmark = '%' .. radixmark + end + fixedtonumber = function(s) + return tonumber(gsub(s, '.', radixmark)) + end + end + + local function number_error() + return decode_error('invalid number') + end + + -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_zro(mns) + local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0 + + if num == '' then + if c == '' then + if mns then + return -0.0 + end + return 0 + end + + if c == 'e' or c == 'E' then + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + if c == '' then + pos = pos + #num + if mns then + return -0.0 + end + return 0.0 + end + end + number_error() + end + + if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then + number_error() + end + + if c ~= '' then + if c == 'e' or c == 'E' then + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + end + if c ~= '' then + number_error() + end + end + + pos = pos + #num + c = fixedtonumber(num) + + if mns then + c = -c + end + return c + end + + -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_num(mns) + pos = pos-1 + local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos) + if byte(num, -1) == 0x2E then -- error if ended with period + number_error() + end + + if c ~= '' then + if c ~= 'e' and c ~= 'E' then + number_error() + end + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + if not num or c ~= '' then + number_error() + end + end + + pos = pos + #num + c = fixedtonumber(num) + + if mns then + c = -c + if c == mininteger and not find(num, '[^0-9]') then + c = mininteger + end + end + return c + end + + -- skip minus sign + local function f_mns() + local c = byte(json, pos) + if c then + pos = pos+1 + if c > 0x30 then + if c < 0x3A then + return f_num(true) + end + else + if c > 0x2F then + return f_zro(true) + end + end + end + decode_error('invalid number') + end + + --[[ + Strings + --]] + local f_str_hextbl = { + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, inf, inf, inf, inf, inf, inf, + inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, + inf, inf, inf, inf, inf, inf, inf, inf, + inf, inf, inf, inf, inf, inf, inf, inf, + inf, inf, inf, inf, inf, inf, inf, inf, + inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, + __index = function() + return inf + end + } + setmetatable(f_str_hextbl, f_str_hextbl) + + local f_str_escapetbl = { + ['"'] = '"', + ['\\'] = '\\', + ['/'] = '/', + ['b'] = '\b', + ['f'] = '\f', + ['n'] = '\n', + ['r'] = '\r', + ['t'] = '\t', + __index = function() + decode_error("invalid escape sequence") + end + } + setmetatable(f_str_escapetbl, f_str_escapetbl) + + local function surrogate_first_error() + return decode_error("1st surrogate pair byte not continued by 2nd") + end + + local f_str_surrogate_prev = 0 + local function f_str_subst(ch, ucode) + if ch == 'u' then + local c1, c2, c3, c4, rest = byte(ucode, 1, 5) + ucode = f_str_hextbl[c1-47] * 0x1000 + + f_str_hextbl[c2-47] * 0x100 + + f_str_hextbl[c3-47] * 0x10 + + f_str_hextbl[c4-47] + if ucode ~= inf then + if ucode < 0x80 then -- 1byte + if rest then + return char(ucode, rest) + end + return char(ucode) + elseif ucode < 0x800 then -- 2bytes + c1 = floor(ucode / 0x40) + c2 = ucode - c1 * 0x40 + c1 = c1 + 0xC0 + c2 = c2 + 0x80 + if rest then + return char(c1, c2, rest) + end + return char(c1, c2) + elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes + c1 = floor(ucode / 0x1000) + ucode = ucode - c1 * 0x1000 + c2 = floor(ucode / 0x40) + c3 = ucode - c2 * 0x40 + c1 = c1 + 0xE0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + if rest then + return char(c1, c2, c3, rest) + end + return char(c1, c2, c3) + elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st + if f_str_surrogate_prev == 0 then + f_str_surrogate_prev = ucode + if not rest then + return '' + end + surrogate_first_error() + end + f_str_surrogate_prev = 0 + surrogate_first_error() + else -- surrogate pair 2nd + if f_str_surrogate_prev ~= 0 then + ucode = 0x10000 + + (f_str_surrogate_prev - 0xD800) * 0x400 + + (ucode - 0xDC00) + f_str_surrogate_prev = 0 + c1 = floor(ucode / 0x40000) + ucode = ucode - c1 * 0x40000 + c2 = floor(ucode / 0x1000) + ucode = ucode - c2 * 0x1000 + c3 = floor(ucode / 0x40) + c4 = ucode - c3 * 0x40 + c1 = c1 + 0xF0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + c4 = c4 + 0x80 + if rest then + return char(c1, c2, c3, c4, rest) + end + return char(c1, c2, c3, c4) + end + decode_error("2nd surrogate pair byte appeared without 1st") + end + end + decode_error("invalid unicode codepoint literal") + end + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + surrogate_first_error() + end + return f_str_escapetbl[ch] .. ucode + end + + -- caching interpreted keys for speed + local f_str_keycache = setmetatable({}, {__mode="v"}) + + local function f_str(iskey) + local newpos = pos + local tmppos, c1, c2 + repeat + newpos = find(json, '"', newpos, true) -- search '"' + if not newpos then + decode_error("unterminated string") + end + tmppos = newpos-1 + newpos = newpos+1 + c1, c2 = byte(json, tmppos-1, tmppos) + if c2 == 0x5C and c1 == 0x5C then -- skip preceding '\\'s + repeat + tmppos = tmppos-2 + c1, c2 = byte(json, tmppos-1, tmppos) + until c2 ~= 0x5C or c1 ~= 0x5C + tmppos = newpos-2 + end + until c2 ~= 0x5C -- leave if '"' is not preceded by '\' + + local str = sub(json, pos, tmppos) + pos = newpos + + if iskey then -- check key cache + tmppos = f_str_keycache[str] -- reuse tmppos for cache key/val + if tmppos then + return tmppos + end + tmppos = str + end + + if find(str, f_str_ctrl_pat) then + decode_error("unescaped control string") + end + if find(str, '\\', 1, true) then -- check whether a backslash exists + -- We need to grab 4 characters after the escape char, + -- for encoding unicode codepoint to UTF-8. + -- As we need to ensure that every first surrogate pair byte is + -- immediately followed by second one, we grab upto 5 characters and + -- check the last for this purpose. + str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst) + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + decode_error("1st surrogate pair byte not continued by 2nd") + end + end + if iskey then -- commit key cache + f_str_keycache[tmppos] = str + end + return str + end + + --[[ + Arrays, Objects + --]] + -- array + local function f_ary() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + decode_error('too deeply nested json (> 1000)') + end + local ary = {} + + pos = match(json, '^[ \n\r\t]*()', pos) + + local i = 0 + if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty + pos = pos+1 + else + local newpos = pos + repeat + i = i+1 + f = dispatcher[byte(json,newpos)] -- parse value + pos = newpos+1 + ary[i] = f() + newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma + until not newpos + + newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket + if not newpos then + decode_error("no closing bracket of an array") + end + pos = newpos + end + + if arraylen then -- commit the length of the array if `arraylen` is set + ary[0] = i + end + rec_depth = rec_depth - 1 + return ary + end + + -- objects + local function f_obj() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + decode_error('too deeply nested json (> 1000)') + end + local obj = {} + + pos = match(json, '^[ \n\r\t]*()', pos) + if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty + pos = pos+1 + else + local newpos = pos + + repeat + if byte(json, newpos) ~= 0x22 then -- check '"' + decode_error("not key") + end + pos = newpos+1 + local key = f_str(true) -- parse key + + -- optimized for compact json + -- c1, c2 == ':', or + -- c1, c2, c3 == ':', ' ', + f = f_err + local c1, c2, c3 = byte(json, pos, pos+3) + if c1 == 0x3A then + if c2 ~= 0x20 then + f = dispatcher[c2] + newpos = pos+2 + else + f = dispatcher[c3] + newpos = pos+3 + end + end + if f == f_err then -- read a colon and arbitrary number of spaces + newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) + if not newpos then + decode_error("no colon after a key") + end + f = dispatcher[byte(json, newpos)] + newpos = newpos+1 + end + pos = newpos + obj[key] = f() -- parse value + newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) + until not newpos + + newpos = match(json, '^[ \n\r\t]*}()', pos) + if not newpos then + decode_error("no closing bracket of an object") + end + pos = newpos + end + + rec_depth = rec_depth - 1 + return obj + end + + --[[ + The jump table to dispatch a parser for a value, + indexed by the code of the value's first char. + Nil key means the end of json. + --]] + dispatcher = { [0] = + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_str, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_mns, f_err, f_err, + f_zro, f_num, f_num, f_num, f_num, f_num, f_num, f_num, + f_num, f_num, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_ary, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_fls, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_nul, f_err, + f_err, f_err, f_err, f_err, f_tru, f_err, f_err, f_err, + f_err, f_err, f_err, f_obj, f_err, f_err, f_err, f_err, + __index = function() + decode_error("unexpected termination") + end + } + setmetatable(dispatcher, dispatcher) + + --[[ + run decoder + --]] + local function decode(json_, pos_, nullv_, arraylen_) + json, pos, nullv, arraylen = json_, pos_, nullv_, arraylen_ + rec_depth = 0 + + pos = match(json, '^[ \n\r\t]*()', pos) + + f = dispatcher[byte(json, pos)] + pos = pos+1 + local v = f() + + if pos_ then + return v, pos + else + f, pos = find(json, '^[ \n\r\t]*', pos) + if pos ~= #json then + decode_error('json ended') + end + return v + end + end + + return decode +end + +return newdecoder +end +end + +do +local _ENV = _ENV +package.preload[ "lunajson.encoder" ] = function( ... ) local arg = _G.arg; +local error = error +local byte, find, format, gsub, match = string.byte, string.find, string.format, string.gsub, string.match +local concat = table.concat +local tostring = tostring +local pairs, type = pairs, type +local setmetatable = setmetatable +local huge, tiny = 1/0, -1/0 + +local f_string_esc_pat +if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_string_esc_pat = '[^ -!#-[%]^-\255]' +else + f_string_esc_pat = '[\0-\31"\\]' +end + +local _ENV = nil + + +local function newencoder() + local v, nullv + local i, builder, visited + + local function f_tostring(v) + builder[i] = tostring(v) + i = i+1 + end + + local radixmark = match(tostring(0.5), '[^0-9]') + local delimmark = match(tostring(12345.12345), '[^0-9' .. radixmark .. ']') + if radixmark == '.' then + radixmark = nil + end + + local radixordelim + if radixmark or delimmark then + radixordelim = true + if radixmark and find(radixmark, '%W') then + radixmark = '%' .. radixmark + end + if delimmark and find(delimmark, '%W') then + delimmark = '%' .. delimmark + end + end + + local f_number = function(n) + if tiny < n and n < huge then + local s = format("%.17g", n) + if radixordelim then + if delimmark then + s = gsub(s, delimmark, '') + end + if radixmark then + s = gsub(s, radixmark, '.') + end + end + builder[i] = s + i = i+1 + return + end + error('invalid number') + end + + local doencode + + local f_string_subst = { + ['"'] = '\\"', + ['\\'] = '\\\\', + ['\b'] = '\\b', + ['\f'] = '\\f', + ['\n'] = '\\n', + ['\r'] = '\\r', + ['\t'] = '\\t', + __index = function(_, c) + return format('\\u00%02X', byte(c)) + end + } + setmetatable(f_string_subst, f_string_subst) + + local function f_string(s) + builder[i] = '"' + if find(s, f_string_esc_pat) then + s = gsub(s, f_string_esc_pat, f_string_subst) + end + builder[i+1] = s + builder[i+2] = '"' + i = i+3 + end + + local function f_table(o) + if visited[o] then + error("loop detected") + end + visited[o] = true + + local tmp = o[0] + if type(tmp) == 'number' then -- arraylen available + builder[i] = '[' + i = i+1 + for j = 1, tmp do + doencode(o[j]) + builder[i] = ',' + i = i+1 + end + if tmp > 0 then + i = i-1 + end + builder[i] = ']' + + else + tmp = o[1] + if tmp ~= nil then -- detected as array + builder[i] = '[' + i = i+1 + local j = 2 + repeat + doencode(tmp) + tmp = o[j] + if tmp == nil then + break + end + j = j+1 + builder[i] = ',' + i = i+1 + until false + builder[i] = ']' + + else -- detected as object + builder[i] = '{' + i = i+1 + local tmp = i + for k, v in pairs(o) do + if type(k) ~= 'string' then + error("non-string key") + end + f_string(k) + builder[i] = ':' + i = i+1 + doencode(v) + builder[i] = ',' + i = i+1 + end + if i > tmp then + i = i-1 + end + builder[i] = '}' + end + end + + i = i+1 + visited[o] = nil + end + + local dispatcher = { + boolean = f_tostring, + number = f_number, + string = f_string, + table = f_table, + __index = function() + error("invalid type value") + end + } + setmetatable(dispatcher, dispatcher) + + function doencode(v) + if v == nullv then + builder[i] = 'null' + i = i+1 + return + end + return dispatcher[type(v)](v) + end + + local function encode(v_, nullv_) + v, nullv = v_, nullv_ + i, builder, visited = 1, {}, {} + + doencode(v) + return concat(builder) + end + + return encode +end + +return newencoder +end +end + +do +local _ENV = _ENV +package.preload[ "lunajson.sax" ] = function( ... ) local arg = _G.arg; +local setmetatable, tonumber, tostring = + setmetatable, tonumber, tostring +local floor, inf = + math.floor, math.huge +local mininteger, tointeger = + math.mininteger or nil, math.tointeger or nil +local byte, char, find, gsub, match, sub = + string.byte, string.char, string.find, string.gsub, string.match, string.sub + +local function _parse_error(pos, errmsg) + error("parse error at " .. pos .. ": " .. errmsg, 2) +end + +local f_str_ctrl_pat +if _VERSION == "Lua 5.1" then + -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly + f_str_ctrl_pat = '[^\32-\255]' +else + f_str_ctrl_pat = '[\0-\31]' +end + +local type, unpack = type, table.unpack or unpack +local open = io.open + +local _ENV = nil + + +local function nop() end + +local function newparser(src, saxtbl) + local json, jsonnxt, rec_depth + local jsonlen, pos, acc = 0, 1, 0 + + -- `f` is the temporary for dispatcher[c] and + -- the dummy for the first return value of `find` + local dispatcher, f + + -- initialize + if type(src) == 'string' then + json = src + jsonlen = #json + jsonnxt = function() + json = '' + jsonlen = 0 + jsonnxt = nop + end + else + jsonnxt = function() + acc = acc + jsonlen + pos = 1 + repeat + json = src() + if not json then + json = '' + jsonlen = 0 + jsonnxt = nop + return + end + jsonlen = #json + until jsonlen > 0 + end + jsonnxt() + end + + local sax_startobject = saxtbl.startobject or nop + local sax_key = saxtbl.key or nop + local sax_endobject = saxtbl.endobject or nop + local sax_startarray = saxtbl.startarray or nop + local sax_endarray = saxtbl.endarray or nop + local sax_string = saxtbl.string or nop + local sax_number = saxtbl.number or nop + local sax_boolean = saxtbl.boolean or nop + local sax_null = saxtbl.null or nop + + --[[ + Helper + --]] + local function tryc() + local c = byte(json, pos) + if not c then + jsonnxt() + c = byte(json, pos) + end + return c + end + + local function parse_error(errmsg) + return _parse_error(acc + pos, errmsg) + end + + local function tellc() + return tryc() or parse_error("unexpected termination") + end + + local function spaces() -- skip spaces and prepare the next char + while true do + pos = match(json, '^[ \n\r\t]*()', pos) + if pos <= jsonlen then + return + end + if jsonlen == 0 then + parse_error("unexpected termination") + end + jsonnxt() + end + end + + --[[ + Invalid + --]] + local function f_err() + parse_error('invalid value') + end + + --[[ + Constants + --]] + -- fallback slow constants parser + local function generic_constant(target, targetlen, ret, sax_f) + for i = 1, targetlen do + local c = tellc() + if byte(target, i) ~= c then + parse_error("invalid char") + end + pos = pos+1 + end + return sax_f(ret) + end + + -- null + local function f_nul() + if sub(json, pos, pos+2) == 'ull' then + pos = pos+3 + return sax_null(nil) + end + return generic_constant('ull', 3, nil, sax_null) + end + + -- false + local function f_fls() + if sub(json, pos, pos+3) == 'alse' then + pos = pos+4 + return sax_boolean(false) + end + return generic_constant('alse', 4, false, sax_boolean) + end + + -- true + local function f_tru() + if sub(json, pos, pos+2) == 'rue' then + pos = pos+3 + return sax_boolean(true) + end + return generic_constant('rue', 3, true, sax_boolean) + end + + --[[ + Numbers + Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp) + is captured as a number and its conformance to the JSON spec is checked. + --]] + -- deal with non-standard locales + local radixmark = match(tostring(0.5), '[^0-9]') + local fixedtonumber = tonumber + if radixmark ~= '.' then + if find(radixmark, '%W') then + radixmark = '%' .. radixmark + end + fixedtonumber = function(s) + return tonumber(gsub(s, '.', radixmark)) + end + end + + local function number_error() + return parse_error('invalid number') + end + + -- fallback slow parser + local function generic_number(mns) + local buf = {} + local i = 1 + local is_int = true + + local c = byte(json, pos) + pos = pos+1 + + local function nxt() + buf[i] = c + i = i+1 + c = tryc() + pos = pos+1 + end + + if c == 0x30 then + nxt() + if c and 0x30 <= c and c < 0x3A then + number_error() + end + else + repeat nxt() until not (c and 0x30 <= c and c < 0x3A) + end + if c == 0x2E then + is_int = false + nxt() + if not (c and 0x30 <= c and c < 0x3A) then + number_error() + end + repeat nxt() until not (c and 0x30 <= c and c < 0x3A) + end + if c == 0x45 or c == 0x65 then + is_int = false + nxt() + if c == 0x2B or c == 0x2D then + nxt() + end + if not (c and 0x30 <= c and c < 0x3A) then + number_error() + end + repeat nxt() until not (c and 0x30 <= c and c < 0x3A) + end + if c and (0x41 <= c and c <= 0x5B or + 0x61 <= c and c <= 0x7B or + c == 0x2B or c == 0x2D or c == 0x2E) then + number_error() + end + pos = pos-1 + + local num = char(unpack(buf)) + num = fixedtonumber(num) + if mns then + num = -num + if num == mininteger and is_int then + num = mininteger + end + end + return sax_number(num) + end + + -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_zro(mns) + local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0 + + if num == '' then + if pos > jsonlen then + pos = pos - 1 + return generic_number(mns) + end + if c == '' then + if mns then + return sax_number(-0.0) + end + return sax_number(0) + end + + if c == 'e' or c == 'E' then + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + if c == '' then + pos = pos + #num + if pos > jsonlen then + pos = pos - #num - 1 + return generic_number(mns) + end + if mns then + return sax_number(-0.0) + end + return sax_number(0.0) + end + end + pos = pos-1 + return generic_number(mns) + end + + if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then + pos = pos-1 + return generic_number(mns) + end + + if c ~= '' then + if c == 'e' or c == 'E' then + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + end + if c ~= '' then + pos = pos-1 + return generic_number(mns) + end + end + + pos = pos + #num + if pos > jsonlen then + pos = pos - #num - 1 + return generic_number(mns) + end + c = fixedtonumber(num) + + if mns then + c = -c + end + return sax_number(c) + end + + -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` + local function f_num(mns) + pos = pos-1 + local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos) + if byte(num, -1) == 0x2E then -- error if ended with period + return generic_number(mns) + end + + if c ~= '' then + if c ~= 'e' and c ~= 'E' then + return generic_number(mns) + end + num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) + if not num or c ~= '' then + return generic_number(mns) + end + end + + pos = pos + #num + if pos > jsonlen then + pos = pos - #num + return generic_number(mns) + end + c = fixedtonumber(num) + + if mns then + c = -c + if c == mininteger and not find(num, '[^0-9]') then + c = mininteger + end + end + return sax_number(c) + end + + -- skip minus sign + local function f_mns() + local c = byte(json, pos) or tellc() + if c then + pos = pos+1 + if c > 0x30 then + if c < 0x3A then + return f_num(true) + end + else + if c > 0x2F then + return f_zro(true) + end + end + end + parse_error("invalid number") + end + + --[[ + Strings + --]] + local f_str_hextbl = { + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, inf, inf, inf, inf, inf, inf, + inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, + inf, inf, inf, inf, inf, inf, inf, inf, + inf, inf, inf, inf, inf, inf, inf, inf, + inf, inf, inf, inf, inf, inf, inf, inf, + inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, + __index = function() + return inf + end + } + setmetatable(f_str_hextbl, f_str_hextbl) + + local f_str_escapetbl = { + ['"'] = '"', + ['\\'] = '\\', + ['/'] = '/', + ['b'] = '\b', + ['f'] = '\f', + ['n'] = '\n', + ['r'] = '\r', + ['t'] = '\t', + __index = function() + parse_error("invalid escape sequence") + end + } + setmetatable(f_str_escapetbl, f_str_escapetbl) + + local function surrogate_first_error() + return parse_error("1st surrogate pair byte not continued by 2nd") + end + + local f_str_surrogate_prev = 0 + local function f_str_subst(ch, ucode) + if ch == 'u' then + local c1, c2, c3, c4, rest = byte(ucode, 1, 5) + ucode = f_str_hextbl[c1-47] * 0x1000 + + f_str_hextbl[c2-47] * 0x100 + + f_str_hextbl[c3-47] * 0x10 + + f_str_hextbl[c4-47] + if ucode ~= inf then + if ucode < 0x80 then -- 1byte + if rest then + return char(ucode, rest) + end + return char(ucode) + elseif ucode < 0x800 then -- 2bytes + c1 = floor(ucode / 0x40) + c2 = ucode - c1 * 0x40 + c1 = c1 + 0xC0 + c2 = c2 + 0x80 + if rest then + return char(c1, c2, rest) + end + return char(c1, c2) + elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes + c1 = floor(ucode / 0x1000) + ucode = ucode - c1 * 0x1000 + c2 = floor(ucode / 0x40) + c3 = ucode - c2 * 0x40 + c1 = c1 + 0xE0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + if rest then + return char(c1, c2, c3, rest) + end + return char(c1, c2, c3) + elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st + if f_str_surrogate_prev == 0 then + f_str_surrogate_prev = ucode + if not rest then + return '' + end + surrogate_first_error() + end + f_str_surrogate_prev = 0 + surrogate_first_error() + else -- surrogate pair 2nd + if f_str_surrogate_prev ~= 0 then + ucode = 0x10000 + + (f_str_surrogate_prev - 0xD800) * 0x400 + + (ucode - 0xDC00) + f_str_surrogate_prev = 0 + c1 = floor(ucode / 0x40000) + ucode = ucode - c1 * 0x40000 + c2 = floor(ucode / 0x1000) + ucode = ucode - c2 * 0x1000 + c3 = floor(ucode / 0x40) + c4 = ucode - c3 * 0x40 + c1 = c1 + 0xF0 + c2 = c2 + 0x80 + c3 = c3 + 0x80 + c4 = c4 + 0x80 + if rest then + return char(c1, c2, c3, c4, rest) + end + return char(c1, c2, c3, c4) + end + parse_error("2nd surrogate pair byte appeared without 1st") + end + end + parse_error("invalid unicode codepoint literal") + end + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + surrogate_first_error() + end + return f_str_escapetbl[ch] .. ucode + end + + local function f_str(iskey) + local pos2 = pos + local newpos + local str = '' + local bs + while true do + while true do -- search '\' or '"' + newpos = find(json, '[\\"]', pos2) + if newpos then + break + end + str = str .. sub(json, pos, jsonlen) + if pos2 == jsonlen+2 then + pos2 = 2 + else + pos2 = 1 + end + jsonnxt() + if jsonlen == 0 then + parse_error("unterminated string") + end + end + if byte(json, newpos) == 0x22 then -- break if '"' + break + end + pos2 = newpos+2 -- skip '\' + bs = true -- mark the existence of a backslash + end + str = str .. sub(json, pos, newpos-1) + pos = newpos+1 + + if find(str, f_str_ctrl_pat) then + parse_error("unescaped control string") + end + if bs then -- a backslash exists + -- We need to grab 4 characters after the escape char, + -- for encoding unicode codepoint to UTF-8. + -- As we need to ensure that every first surrogate pair byte is + -- immediately followed by second one, we grab upto 5 characters and + -- check the last for this purpose. + str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst) + if f_str_surrogate_prev ~= 0 then + f_str_surrogate_prev = 0 + parse_error("1st surrogate pair byte not continued by 2nd") + end + end + + if iskey then + return sax_key(str) + end + return sax_string(str) + end + + --[[ + Arrays, Objects + --]] + -- arrays + local function f_ary() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + parse_error('too deeply nested json (> 1000)') + end + sax_startarray() + + spaces() + if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty + pos = pos+1 + else + local newpos + while true do + f = dispatcher[byte(json, pos)] -- parse value + pos = pos+1 + f() + newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma + if newpos then + pos = newpos + else + newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket + if newpos then + pos = newpos + break + end + spaces() -- since the current chunk can be ended, skip spaces toward following chunks + local c = byte(json, pos) + pos = pos+1 + if c == 0x2C then -- check comma again + spaces() + elseif c == 0x5D then -- check closing bracket again + break + else + parse_error("no closing bracket of an array") + end + end + if pos > jsonlen then + spaces() + end + end + end + + rec_depth = rec_depth - 1 + return sax_endarray() + end + + -- objects + local function f_obj() + rec_depth = rec_depth + 1 + if rec_depth > 1000 then + parse_error('too deeply nested json (> 1000)') + end + sax_startobject() + + spaces() + if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty + pos = pos+1 + else + local newpos + while true do + if byte(json, pos) ~= 0x22 then + parse_error("not key") + end + pos = pos+1 + f_str(true) -- parse key + newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) -- check colon + if newpos then + pos = newpos + else + spaces() -- read spaces through chunks + if byte(json, pos) ~= 0x3A then -- check colon again + parse_error("no colon after a key") + end + pos = pos+1 + spaces() + end + if pos > jsonlen then + spaces() + end + f = dispatcher[byte(json, pos)] + pos = pos+1 + f() -- parse value + newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma + if newpos then + pos = newpos + else + newpos = match(json, '^[ \n\r\t]*}()', pos) -- check closing bracket + if newpos then + pos = newpos + break + end + spaces() -- read spaces through chunks + local c = byte(json, pos) + pos = pos+1 + if c == 0x2C then -- check comma again + spaces() + elseif c == 0x7D then -- check closing bracket again + break + else + parse_error("no closing bracket of an object") + end + end + if pos > jsonlen then + spaces() + end + end + end + + rec_depth = rec_depth - 1 + return sax_endobject() + end + + --[[ + The jump table to dispatch a parser for a value, + indexed by the code of the value's first char. + Key should be non-nil. + --]] + dispatcher = { [0] = + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_str, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_mns, f_err, f_err, + f_zro, f_num, f_num, f_num, f_num, f_num, f_num, f_num, + f_num, f_num, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_ary, f_err, f_err, f_err, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_fls, f_err, + f_err, f_err, f_err, f_err, f_err, f_err, f_nul, f_err, + f_err, f_err, f_err, f_err, f_tru, f_err, f_err, f_err, + f_err, f_err, f_err, f_obj, f_err, f_err, f_err, f_err, + } + + --[[ + public funcitons + --]] + local function run() + rec_depth = 0 + spaces() + f = dispatcher[byte(json, pos)] + pos = pos+1 + f() + end + + local function read(n) + if n < 0 then + error("the argument must be non-negative") + end + local pos2 = (pos-1) + n + local str = sub(json, pos, pos2) + while pos2 > jsonlen and jsonlen ~= 0 do + jsonnxt() + pos2 = pos2 - (jsonlen - (pos-1)) + str = str .. sub(json, pos, pos2) + end + if jsonlen ~= 0 then + pos = pos2+1 + end + return str + end + + local function tellpos() + return acc + pos + end + + return { + run = run, + tryc = tryc, + read = read, + tellpos = tellpos, + } +end + +local function newfileparser(fn, saxtbl) + local fp = open(fn) + local function gen() + local s + if fp then + s = fp:read(8192) + if not s then + fp:close() + fp = nil + end + end + return s + end + return newparser(gen, saxtbl) +end + +return { + newparser = newparser, + newfileparser = newfileparser +} +end +end + +do +local _ENV = _ENV +package.preload[ "utils" ] = function( ... ) local arg = _G.arg; +local module = {} + +function module.tablelength(T) + local count = 0 + for _ in pairs(T) do count = count + 1 end + return count +end + +module.id_number = 0 +function module.next_id(length) + module.id_number = module.id_number + 1 + return string.format(string.format('%%0%dd', length), module.id_number) +end + +local function url_encode_char(chr) + return string.format("%%%X",string.byte(chr)) +end + +function module.urlencode(str) + local output, t = string.gsub(str,"[^%w]",url_encode_char) + return output +end + +function module.xmlescape(str) + return string.gsub(str, '["<>&]', { ['&'] = '&', ['<'] = '<', ['>'] = '>', ['"'] = '"' }) +end + +function module.trim(s) + return (s:gsub("^%s*(.-)%s*$", "%1")) +end + +function module.deepcopy(orig) + local orig_type = type(orig) + local copy + if orig_type == 'table' then + copy = {} + for orig_key, orig_value in next, orig, nil do + copy[module.deepcopy(orig_key)] = module.deepcopy(orig_value) + end + setmetatable(copy, module.deepcopy(getmetatable(orig))) + else -- number, string, boolean, etc + copy = orig + end + return copy +end + +return module +end +end + +do +local _ENV = _ENV +package.preload[ "zotero" ] = function( ... ) local arg = _G.arg; +local module = {} + +local utils = require('utils') +local json = require('lunajson') +-- local pl = require('pl.pretty') -- for pl.pretty.dump + +local state = { + reported = {}, +} + +module.citekeys = {} + +function module.authors(csl) + if csl.author == nil then + return nil + end + + local authors = {} + local author + for _, author in ipairs(csl.author) do + if author.literal ~= nil then + table.insert(authors, author.literal) + elseif author.family ~= nil then + table.insert(authors, author.family) + end + end + if utils.tablelength(authors) == 0 then + return nil + end + local last = table.remove(authors) + if utils.tablelength(authors) == 0 then + return last + end + authors = table.concat(authors, ', ') + return table.concat({ authors, last }, ' and ') +end + +local function load_items() + if state.fetched ~= nil then + return + end + + state.fetched = { + items = {}, + errors = {}, + } + + local citekeys = {} + for k, _ in pairs(module.citekeys) do + table.insert(citekeys, k) + end + + if utils.tablelength(citekeys) == 0 then + return + end + + citekeys = table.concat(citekeys, ',') + local url = module.url .. utils.urlencode(citekeys) + local mt, contents = pandoc.mediabag.fetch(url, '.') + local ok, fetched = pcall(json.decode, contents) + if not ok then + print('could not fetch Zotero items: ' .. contents) + return + end + state.fetched = fetched +end + +function module.get(citekey) + load_items() + + if state.reported[citekey] ~= nil then + return nil + end + + if state.fetched.errors[citekey] ~= nil then + state.reported[citekey] = true + print('@' .. citekey .. ': ' .. state.fetched.errors[citekey]) + return nil + end + + if state.fetched.items[citekey] == nil then + state.reported[citekey] = true + print('@' .. citekey .. ' not in Zotero') + return nil + end + + return state.fetched.items[citekey], state.fetched.zotero[citekey] +end + +return module +end +end + +-- +-- bbt-to-live-doc +-- +-- Copyright (c) 2020 Emiliano Heyns +-- +-- Permission is hereby granted, free of charge, to any person obtaining a copy of +-- this software and associated documentation files (the "Software"), to deal in +-- the Software without restriction, including without limitation the rights to +-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +-- of the Software, and to permit persons to whom the Software is furnished to do +-- so, subject to the following conditions: +-- +-- The above copyright notice and this permission notice shall be included in all +-- copies or substantial portions of the Software. +-- +-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +-- SOFTWARE. +-- + +-- local pl = require('pl.pretty') -- for pl.pretty.dump +local json = require('lunajson') +local csl_locator = require('locator') +local utils = require('utils') +local zotero = require('zotero') + +-- -- global state -- -- +local config = { + client = 'zotero', + scannable_cite = false, + csl_style = nil, -- more to document than anything else -- Lua does not store nils in tables + format = nil, -- more to document than anything else -- Lua does not store nils in tables + transferable = false +} + +-- -- -- bibliography marker generator -- -- -- +function zotero_docpreferences_odt(csl_style) + return string.format( + '' + .. ' ' + .. '