/
tomelr.el
486 lines (406 loc) · 18 KB
/
tomelr.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
;;; tomelr.el --- Convert S-expressions to TOML -*- lexical-binding: t -*-
;; Copyright (C) 2022 Free Software Foundation, Inc.
;; Author: Kaushal Modi <kaushal.modi@gmail.com>
;; Version: 0.4.1
;; Package-Requires: ((emacs "26.3") (map "3.2.1") (seq "2.23"))
;; Keywords: data, tools, toml, serialization, config
;; URL: https://github.com/kaushalmodi/tomelr/
;; This file is not part of GNU Emacs.
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; tomelr.el is a library for converting Lisp data expressions or
;; S-expressions to TOML format (https://toml.io/en/).
;; It has one entry point `tomelr-encode' which accepts a Lisp data
;; expression, usually in an alist or plist form, and return a string
;; representing the TOML serializaitno format.
;; Example using an alist as input:
;;
;; (tomelr-encode '((title . "My title")
;; (author . "Me")
;; (params . ((foo . 123)))))
;;
;; Output:
;;
;; title = "My title"
;; author = "Me"
;; [params]
;; foo = 123
;; Example using an plist as input:
;;
;; (tomelr-encode '(:title "My title"
;; :author "Me"
;; :params (:foo 123)))
;;
;; Above snippet will give as the same TOML output shown above.
;; See the README.org on https://github.com/kaushalmodi/tomelr/ for
;; more examples and package details.
;;; Code:
(require 'json)
(require 'map)
(require 'subr-x) ;For `string-trim' on Emacs versions 27.2 and older
;;; Variables
(defvar tomelr-false '(:false 'false)
"S-exp values to be interpreted as TOML `false'.")
(defvar tomelr-encoding-default-indentation " "
"String used for a single indentation level during encoding.
This value is repeated for each further nested element.")
(defvar tomelr-coerce-to-types '(boolean integer)
"List of TOML types to which the TOML strings will be attempted to be coerced.
Valid symbols that can be present in this list: boolean, integer, float
For example, if this list contains `boolean' and if a string
value is exactly \"true\", it will coerce to TOML boolean
`true'.")
(defvar tomelr-indent-multi-line-strings nil
"Indent the multi-line TOML strings when non-nil.
This option injects spaces after each newline to present the
multi-line strings in a more readable format.
*Note: This option should be set to non-nil only if the TOML
string data is insensitive to horizontal space. Good examples of
this would be Org, Markdown or HTML strings.")
;;;; Internal Variables
(defvar tomelr--print-indentation-prefix "\n"
"String used to start indentation during encoding.")
(defvar tomelr--print-indentation-depth -1
"Current indentation level during encoding.
Dictates repetitions of `tomelr-encoding-default-indentation'.")
(defvar tomelr--print-table-hierarchy ()
"Internal variable used to save TOML Table hierarchies.
This variable is used for both TOML Tables and Arrays of TOML
Tables.")
(defvar tomelr--print-keyval-separator " = "
"String used to separate key-value pairs during encoding.")
(defvar tomelr--date-time-regexp
(concat "\\`[[:digit:]]\\{4\\}-[[:digit:]]\\{2\\}-[[:digit:]]\\{2\\}"
"\\(?:[T ][[:digit:]]\\{2\\}:[[:digit:]]\\{2\\}:[[:digit:]]\\{2\\}\\(?:\\.[[:digit:]]+\\)*"
"\\(?:Z\\|[+-][[:digit:]]\\{2\\}:[[:digit:]]\\{2\\}\\)*\\)*\\'")
"Regexp to match RFC 3339 formatted date-time with offset.
- https://toml.io/en/v1.0.0#offset-date-time
- https://tools.ietf.org/html/rfc3339#section-5.8
Examples:
1979-05-27
1979-05-27T07:32:00Z
1979-05-27 07:32:00Z
1979-05-27T00:32:00-07:00
1979-05-27T00:32:00.999999+04:00.")
;;; Error conditions
(define-error 'tomelr-error "Unknown TOML error")
(define-error 'tomelr-key-format "Bad TOML object key" 'tomelr-error)
;;; Utilities
(defmacro tomelr--with-output-to-string (&rest body)
"Eval BODY in a temporary buffer bound to `standard-output'.
Return the resulting buffer contents as a string."
(declare (indent 0) (debug t))
`(with-output-to-string
(with-current-buffer standard-output
;; This affords decent performance gains.
(setq-local inhibit-modification-hooks t)
,@body)))
(defmacro tomelr--with-indentation (&rest body)
"Eval BODY with the TOML encoding nesting incremented by one step.
This macro sets up appropriate variable bindings for
`tomelr--print-indentation' to produce the correct indentation."
(declare (debug t) (indent 0))
`(let ((tomelr--print-indentation-depth (1+ tomelr--print-indentation-depth)))
,@body))
(defun tomelr--print-indentation ()
"Insert the current indentation for TOML encoding at point."
(insert tomelr--print-indentation-prefix)
(dotimes (_ tomelr--print-indentation-depth)
(insert tomelr-encoding-default-indentation)))
;;; Encoding
;;;; Booleans
(defun tomelr--print-boolean (object)
"Insert TOML boolean true or false at point if OBJECT is a boolean.
Return nil if OBJECT is not recognized as a TOML boolean."
(prog1 (setq object (cond ((or
(eq object t)
(and (member 'boolean tomelr-coerce-to-types)
(member object '("true" true))))
"true")
((or
(member object tomelr-false)
(and (member 'boolean tomelr-coerce-to-types)
(member object '("false" false))))
"false")))
(and object (insert object))))
;;;; Strings
(defun tomelr--print-string (string)
"Insert a TOML representation of STRING at point.
Return the same STRING passed as input."
;; (message "[tomelr--print-string DBG] string = `%s'" string)
(let ((special-chars '((?b . ?\b) ;U+0008
(?f . ?\f) ;U+000C
(?\\ . ?\\)))
(special-chars-re (rx (in ?\" ?\\ cntrl ?\u007F))) ;cntrl is same as (?\u0000 . ?\u001F)
;; Use multi-line string quotation if the string contains a "
;; char or a newline - """STRING""".
(multi-line (string-match-p "\n\\|\"" string))
begin-q end-q)
(cond
(multi-line
;; From https://toml.io/en/v1.0.0#string, Any Unicode
;; character may be used except those that must be escaped:
;; backslash and the control characters other than tab, line
;; feed, and carriage return (U+0000 to U+0008, U+000B,
;; U+000C, U+000E to U+001F, U+007F).
(setq special-chars-re (rx (in ?\\
(?\u0000 . ?\u0008)
?\u000B ?\u000C
(?\u000E . ?\u001F)
?\u007F)))
(setq begin-q "\"\"\"\n")
(setq end-q "\"\"\"")
(when tomelr-indent-multi-line-strings
(let ((indentation (let ((tmp ""))
(dotimes (_ (1+ tomelr--print-indentation-depth))
(setq tmp (concat tmp tomelr-encoding-default-indentation)))
tmp)))
(setq string
(concat
indentation ;Indent the first line in the multi-line string
(replace-regexp-in-string
"\\(\n\\)\\([^\n]\\)" ;Don't indent blank lines
(format "\\1%s\\2" indentation)
string)
"\n" indentation ;Indent the closing """ at the end of the multi-line string
)))))
(t ;Basic quotation "STRING"
(push '(?\" . ?\") special-chars)
(push '(?t . ?\t) special-chars) ;U+0009
(push '(?n . ?\n) special-chars) ;U+000A
(push '(?r . ?\r) special-chars) ;U+000D
(setq begin-q "\"")
(setq end-q begin-q)))
(and begin-q (insert begin-q))
(goto-char (prog1 (point) (princ string)))
(while (re-search-forward special-chars-re nil :noerror)
(let ((char (preceding-char)))
(delete-char -1)
(insert ?\\ (or
;; Escape special characters
(car (rassq char special-chars))
;; Fallback: UCS code point in \uNNNN form.
(format "u%04x" char)))))
(and end-q (insert end-q))
string))
(defun tomelr--print-stringlike (object &optional key-type)
"Insert OBJECT encoded as a TOML string at point.
Possible values of KEY-TYPE are `normal-key', `table-key',
`table-array-key', or nil.
Return nil if OBJECT cannot be encoded as a TOML string."
;; (message "[tomelr--print-stringlike DBG] object = %S (type = %S) key type = %S"
;; object (type-of object) key-type)
(let ((str (cond ;; Object is a normal, TT or TTA key
(key-type
(cond
((stringp object)
(if (string-match-p "\\`[A-Za-z0-9_-]+\\'" object)
;; https://toml.io/en/v1.0.0#keys
;; Bare keys may only contain ASCII letters, ASCII digits,
;; underscores, and dashes (A-Za-z0-9_-).
object
;; Wrap string in double-quotes if it
;; doesn't contain only A-Za-z0-9_- chars.
(format "\"%s\"" object)))
;; Plist keys as in (:foo 123)
((keywordp object)
(string-trim-left (symbol-name object) ":"))
;; Alist keys as in ((foo . 123))
((symbolp object)
(symbol-name object))
(t
(user-error "[tomelr--print-stringlike] Unhandled case of key-type"))))
;; Cases where object is a key value.
((symbolp object)
(symbol-name object))
((stringp object)
object))))
;; (message "[tomelr--print-stringlike DBG] str = %S" str)
(when (member key-type '(table-key table-array-key))
;; (message "[tomelr--print-stringlike DBG] %S is symbol, type = %S, depth = %d"
;; object key-type tomelr--print-indentation-depth)
(if (null (nth tomelr--print-indentation-depth tomelr--print-table-hierarchy))
(setq tomelr--print-table-hierarchy
(append tomelr--print-table-hierarchy (list str)))
;; Throw away table keys collected at higher depths, if
;; any, from earlier runs of this function.
(setq tomelr--print-table-hierarchy
(seq-take tomelr--print-table-hierarchy (1+ tomelr--print-indentation-depth)))
(setf (nth tomelr--print-indentation-depth tomelr--print-table-hierarchy) str))
;; (message "[tomelr--print-stringlike DBG] table hier: %S" tomelr--print-table-hierarchy)
)
(cond
;; TT keys
((equal key-type 'table-key)
(princ (format "[%s]" (string-join tomelr--print-table-hierarchy "."))))
;; TTA keys
((equal key-type 'table-array-key)
(princ (format "[[%s]]" (string-join tomelr--print-table-hierarchy "."))))
;; Normal keys (Alist and Plist keys)
((equal key-type 'normal-key)
(princ str))
(str
(cond
((or
;; RFC 3339 Date/Time
(string-match-p tomelr--date-time-regexp str)
;; Coercing
;; Integer that can be stored in the system as a fixnum.
;; For example, if `object' is "10040216507682529280" that
;; needs more than 64 bits to be stored as a signed
;; integer, it will be automatically stored as a float.
;; So (integerp (string-to-number object)) will return nil
;; [or `fixnump' instead of `integerp' in Emacs 27 or
;; newer].
;; https://github.com/toml-lang/toml#integer
;; Integer examples: 7, +7, -7, 7_000
(and (or (symbolp object)
(member 'integer tomelr-coerce-to-types))
(string-match-p "\\`[+-]?[[:digit:]_]+\\'" str)
(if (functionp #'fixnump) ;`fixnump' and `bignump' get introduced in Emacs 27.x
(fixnump (string-to-number str))
;; On older Emacsen, `integerp' behaved the same as the
;; new `fixnump'.
(integerp (string-to-number str)))))
(princ str))
(t
(tomelr--print-string str))))
(t
nil))))
(defun tomelr--print-key (key &optional key-type)
"Insert a TOML key representation of KEY at point.
KEY-TYPE represents the type of key: `normal-key', `table-key' or
`table-array-key'.
Signal `tomelr-key-format' if it cannot be encoded as a string."
(or (tomelr--print-stringlike key key-type)
(signal 'tomelr-key-format (list key))))
;;;; Objects
;; `tomelr-alist-p' is a slightly modified version of `json-alist-p'.
;; It fixes this scenario: (json-alist-p '((:a 1))) return t, which is wrong.
;; '((:a 1)) is an array of plist format maps, and not an alist.
;; (tomelr-alist-p '((:a 1))) returns nil as expected.
(defun tomelr-alist-p (list)
"Non-nil if and only if LIST is an alist with simple keys."
(declare (pure t) (side-effect-free error-free))
(while (and (consp (car-safe list))
(not (json-plist-p (car-safe list)))
(atom (caar list)))
;; (message "[tomelr-alist-p DBG] INSIDE list = %S, car = %S, caar = %S, atom of caar = %S"
;; list (car-safe list) (caar list) (atom (caar list)))
(setq list (cdr list)))
;; (message "[tomelr-alist-p DBG] out 2 list = %S, is alist? %S" list (null list))
(null list))
(defun tomelr-toml-table-p (object)
"Return non-nil if OBJECT can represent a TOML Table.
Recognize both alist and plist format maps as TOML Tables.
Examples:
- Alist format: \\='((a . 1) (b . \"foo\"))
- Plist format: \\='(:a 1 :b \"foo\")"
(or (tomelr-alist-p object)
(json-plist-p object)))
(defun tomelr--print-pair (key val)
"Insert TOML representation of KEY - VAL pair at point."
(let ((key-type (cond
((tomelr-toml-table-p val) 'table-key)
((tomelr-toml-table-array-p val) 'table-array-key)
(t 'normal-key))))
;; (message "[tomelr--print-pair DBG] key = %S, val = %S, key-type = %S"
;; key val key-type)
(when val ;Don't print the key if val is nil
(tomelr--print-indentation) ;Newline before each key in a key-value pair
(tomelr--print-key key key-type)
;; Skip putting the separator for table and table array keys.
(unless (member key-type '(table-key table-array-key))
(insert tomelr--print-keyval-separator))
(tomelr--print val))))
(defun tomelr--print-map (map)
"Insert a TOML representation of MAP at point.
This works for any MAP satisfying `mapp'."
;; (message "[tomelr--print-map DBG] map = %S" map)
(unless (map-empty-p map)
(tomelr--with-indentation
(map-do #'tomelr--print-pair map))))
;;;; Lists (including alists and plists)
(defun tomelr--print-list (list)
"Insert a TOML representation of LIST at point."
(cond ((tomelr-toml-table-p list)
(tomelr--print-map list))
((listp list)
(tomelr--print-array list))
((signal 'tomelr-error (list list)))))
;;;; Arrays
(defun tomelr-toml-table-array-p (object)
"Return non-nil if OBJECT can represent a TOML Table Array.
Definition of a TOML Table Array (TTA):
- OBJECT is TTA if it is of type ((TT1) (TT2) ..) where each element is a
TOML Table (TT)."
(when (or (listp object)
(vectorp object))
(seq-every-p
(lambda (elem) (tomelr-toml-table-p elem))
object)))
(defun tomelr--print-tta-key ()
"Print TOML Table Array key."
;; (message "[tomelr--print-array DBG] depth = %d" tomelr--print-indentation-depth)
;; Throw away table keys collected at higher depths, if
;; any, from earlier runs of this function.
(setq tomelr--print-table-hierarchy
(seq-take tomelr--print-table-hierarchy (1+ tomelr--print-indentation-depth)))
(tomelr--print-indentation)
(insert
(format "[[%s]]" (string-join tomelr--print-table-hierarchy "."))))
(defun tomelr--print-array (array)
"Insert a TOML representation of ARRAY at point."
;; (message "[tomelr--print-array DBG] array = %S, TTA = %S"
;; array (tomelr-toml-table-array-p array))
(cond
((tomelr-toml-table-array-p array)
(unless (= 0 (length array))
(let ((first t))
(mapc (lambda (elt)
(if first
(setq first nil)
(tomelr--print-tta-key))
(tomelr--print elt))
array))))
(t
(insert "[")
(unless (= 0 (length array))
(tomelr--with-indentation
(let ((first t))
(mapc (lambda (elt)
(if first
(setq first nil)
(insert ", "))
(tomelr--print elt))
array))))
(insert "]"))))
;;;; Print wrapper
(defun tomelr--print (object)
"Insert a TOML representation of OBJECT at point.
See `tomelr-encode' that returns the same as a string."
(cond ((tomelr--print-boolean object))
((listp object) (tomelr--print-list object))
((tomelr--print-stringlike object))
((numberp object) (prin1 object))
((arrayp object) (tomelr--print-array object))
((signal 'tomelr-error (list object)))))
;;; User API
(defun tomelr-encode (object)
"Return a TOML representation of OBJECT as a string.
If an error is detected during encoding, an error based on
`tomelr-error' is signaled."
(setq tomelr--print-table-hierarchy ())
(string-trim
(tomelr--with-output-to-string (tomelr--print object))))
(provide 'tomelr)
;;; tomelr.el ends here