From c3f741768249a07890c6185726fda19dfbbe6415 Mon Sep 17 00:00:00 2001 From: rcmerci Date: Mon, 20 Mar 2023 13:41:13 +0800 Subject: [PATCH] enhance: add mldoc ast schema (#8829) * enhance: add mldoc ast schema * Add bb task to validate mldoc Also move mldoc schema to make room for other schemas --------- Co-authored-by: Gabriel Horner Co-authored-by: Tienson Qin --- bb.edn | 3 + deps/graph-parser/.carve/ignore | 2 + .../src/logseq/graph_parser/mldoc.cljc | 4 +- .../src/logseq/graph_parser/schema/mldoc.cljc | 220 ++++++++++++++++++ scripts/src/logseq/tasks/malli.clj | 27 +++ src/main/frontend/handler/export/common.cljs | 9 +- src/main/frontend/handler/export/text.cljs | 3 +- 7 files changed, 262 insertions(+), 6 deletions(-) create mode 100644 deps/graph-parser/src/logseq/graph_parser/schema/mldoc.cljc diff --git a/bb.edn b/bb.edn index ef3226ba780..5e279fcc4c4 100644 --- a/bb.edn +++ b/bb.edn @@ -69,6 +69,9 @@ dev:validate-global-config-edn logseq.tasks.malli/validate-global-config-edn + dev:validate-ast + logseq.tasks.malli/validate-ast + dev:lint logseq.tasks.dev/lint diff --git a/deps/graph-parser/.carve/ignore b/deps/graph-parser/.carve/ignore index 327ee6c7a2d..1dd02aab9c3 100644 --- a/deps/graph-parser/.carve/ignore +++ b/deps/graph-parser/.carve/ignore @@ -44,3 +44,5 @@ logseq.graph-parser.util/remove-nils logseq.graph-parser.text/get-file-basename ;; API logseq.graph-parser.mldoc/mldoc-link? +;; public var +logseq.graph-parser.schema.mldoc/block-ast-coll-schema diff --git a/deps/graph-parser/src/logseq/graph_parser/mldoc.cljc b/deps/graph-parser/src/logseq/graph_parser/mldoc.cljc index 676d55b8b26..221461a394b 100644 --- a/deps/graph-parser/src/logseq/graph_parser/mldoc.cljc +++ b/deps/graph-parser/src/logseq/graph_parser/mldoc.cljc @@ -13,7 +13,8 @@ [logseq.graph-parser.utf8 :as utf8] [clojure.string :as string] [logseq.graph-parser.util :as gp-util] - [logseq.graph-parser.config :as gp-config])) + [logseq.graph-parser.config :as gp-config] + [logseq.graph-parser.schema.mldoc :as mldoc-schema])) (defonce parseJson (gobj/get Mldoc "parseJson")) (defonce parseInlineJson (gobj/get Mldoc "parseInlineJson")) @@ -117,6 +118,7 @@ original-ast)))) (defn ->edn + {:malli/schema [:=> [:cat :string :string] mldoc-schema/block-ast-with-pos-coll-schema]} [content config] (if (string? content) (try diff --git a/deps/graph-parser/src/logseq/graph_parser/schema/mldoc.cljc b/deps/graph-parser/src/logseq/graph_parser/schema/mldoc.cljc new file mode 100644 index 00000000000..22afd0f1697 --- /dev/null +++ b/deps/graph-parser/src/logseq/graph_parser/schema/mldoc.cljc @@ -0,0 +1,220 @@ +(ns logseq.graph-parser.schema.mldoc + "Malli schema for mldoc AST") + +(defn- field-optional-and-maybe-nil + [k v] + [k {:optional true} [:maybe v]]) + +(def pos-schema + [:map + [:start_pos :int] + [:end_pos :int]]) + +(def nested-link-schema + [:schema {:registry {::nested-link + [:map + [:content :string] + [:children [:sequential [:or + [:tuple [:= "Label"] :string] + [:tuple [:= "Nested_link"] [:ref ::nested-link]]]]]]}} + ::nested-link]) + +(def timestamp-schema + [:map + [:date [:map + [:year :int] + [:month :int] + [:day :int]]] + [:wday :string] + (field-optional-and-maybe-nil + :time [:map + [:hour :int] + [:min :int]]) + (field-optional-and-maybe-nil + :repetition + :any) + [:active :boolean]]) + +(def ^:private time-range-schema + [:map + [:start [:ref ::timestamp]] + [:stop [:ref ::timestamp]]]) + +(def ^:private link-schema + [:map + [:url [:or + [:cat [:= "File"] :string] + [:cat [:= "Search"] :string] + [:cat [:= "Complex"] [:map + [:protocol :string] + [:link :string]]] + [:cat [:= "Page_ref"] :string] + [:cat [:= "Block_ref"] :string] + [:cat [:= "Embed_data"] :string]]] + [:label [:sequential [:ref ::inline]]] + (field-optional-and-maybe-nil :title :string) + [:full_text :string] + [:metadata :string]]) + +(def latex-fragment-schema + [:or + [:tuple [:= "Inline"] :string] + [:tuple [:= "Displayed"] :string]]) + +(def inline-ast-schema + [:schema {:registry {::timestamp timestamp-schema + ::time-range time-range-schema + ::link link-schema + ::inline + [:or + [:tuple [:= "Emphasis"] + [:tuple + [:tuple [:enum "Italic" "Bold" "Underline" "Strike_through" "Highlight"]] + [:sequential [:ref ::inline]]]] + + [:tuple [:= "Break_Line"]] + [:tuple [:= "Hard_Break_Line"]] + [:tuple [:= "Verbatim"] :string] + [:tuple [:= "Code"] :string] + [:tuple [:= "Tag"] [:sequential [:ref ::inline]]] + [:tuple [:= "Spaces"] :string] + [:tuple [:= "Plain"] :string] + [:tuple [:= "Link"] [:ref ::link]] + [:tuple [:= "Nested_link"] nested-link-schema] + [:tuple [:= "Target"] :string] + [:tuple [:= "Subscript"] [:sequential [:ref ::inline]]] + [:tuple [:= "Superscript"] [:sequential [:ref ::inline]]] + [:tuple [:= "Footnote_Reference"] [:map + [:id :int] + [:name :string] + (field-optional-and-maybe-nil + :definition [:sequential [:ref ::inline]])]] + [:tuple [:= "Cookie"] [:or + [:tuple [:= "Percent"] :int] + [:catn [:label [:= "Absolute"]] [:current :int] [:total :int]]]] + [:tuple [:= "Latex_Fragment"] latex-fragment-schema] + [:tuple [:= "Macro"] [:map + [:name :string] + [:arguments [:sequential :string]]]] + [:tuple [:= "Entity"] [:map + [:name :string] + [:latex :string] + [:latex_mathp :boolean] + [:html :string] + [:ascii :string] + [:unicode :string]]] + [:tuple [:= "Timestamp"] [:or + [:tuple [:= "Scheduled"] [:ref ::timestamp]] + [:tuple [:= "Deadline"] [:ref ::timestamp]] + [:tuple [:= "Date"] [:ref ::timestamp]] + [:tuple [:= "Closed"] [:ref ::timestamp]] + [:tuple [:= "Clock"] [:or + [:tuple [:= "Started"] [:ref ::timestamp]] + [:tuple [:= "Stopped"] [:ref ::time-range]]]] + [:tuple [:= "Range"] [:ref ::time-range]]]] + [:tuple [:= "Radio_Target"] :string] + [:tuple [:= "Export_Snippet"] :string :string] + [:tuple [:= "Inline_Source_Block"] [:map + [:language :string] + [:options :string] + [:code :string]]] + [:tuple [:= "Email"] [:map + [:local_part :string] + [:domain :string]]] + [:tuple [:= "Inline_Hiccup"] :string] + [:tuple [:= "Inline_Html"] :string]]}} + ::inline]) + +(def ^:private list-item-schema + [:map + [:content [:sequential [:ref ::block]]] + [:items [:sequential [:ref ::list-item]]] + (field-optional-and-maybe-nil + :number :int) + [:name [:sequential [:ref ::inline]]] + (field-optional-and-maybe-nil + :checkbox :boolean) + [:indent :int] + [:ordered :boolean]]) + +(def ^:private heading-schema + [:map + [:title [:sequential [:ref ::inline]]] + [:tags [:sequential :string]] + (field-optional-and-maybe-nil + :marker :string) + [:level :int] + (field-optional-and-maybe-nil + :numbering [:sequential :int]) + (field-optional-and-maybe-nil + :priority :string) + [:anchor :string] + [:meta :map] + (field-optional-and-maybe-nil + :size :int)]) + +(def block-ast-schema + [:schema {:registry {::inline inline-ast-schema + ::list-item list-item-schema + ::block + [:or + [:tuple [:= "Paragraph"] [:sequential [:ref ::inline]]] + [:tuple [:= "Paragraph_Sep"] :int] + [:tuple [:= "Heading"] heading-schema] + [:tuple [:= "List"] [:sequential [:ref ::list-item]]] + [:tuple [:= "Directive"] :string :string] + [:tuple [:= "Results"]] + [:tuple [:= "Example"] [:sequential :string]] + [:tuple [:= "Src"] [:map + [:lines [:sequential :string]] + (field-optional-and-maybe-nil + :language :string) + (field-optional-and-maybe-nil + :options [:sequential :string]) + [:pos_meta pos-schema]]] + [:tuple [:= "Quote"] [:sequential [:ref ::block]]] + [:catn + [:label [:= "Export"]] + [:type :string] + [:options [:maybe [:sequential :string]]] + [:content :string]] + [:tuple [:= "CommentBlock"] [:sequential :string]] + [:catn + [:label [:= "Custom"]] + [:type :string] + [:options [:maybe :string]] + [:result [:sequential [:ref ::block]]] + [:content :string]] + [:tuple [:= "Latex_Fragment"] latex-fragment-schema] + [:catn + [:label [:= "Latex_Environment"]] + [:name :string] + [:options [:maybe :string]] + [:content :string]] + [:tuple [:= "Displayed_Math"] :string] + [:tuple [:= "Drawer"] :string [:sequential :string]] + [:tuple [:= "Property_Drawer"] + [:sequential + [:catn [:k :string] [:v :string] [:other-info [:sequential [:ref ::inline]]]]]] + [:tuple [:= "Footnote_Definition"] :string [:sequential [:ref ::inline]]] + [:tuple [:= "Horizontal_Rule"]] + [:tuple [:= "Table"] + [:map + (field-optional-and-maybe-nil + :header [:sequential [:sequential [:ref ::inline]]]) + [:groups [:sequential [:sequential [:sequential [:sequential [:ref ::inline]]]]]] + [:col_groups [:sequential :int]]]] + [:tuple [:= "Comment"] :string] + [:tuple [:= "Raw_Html"] :string] + [:tuple [:= "Hiccup"] :string] + + ;; this block type is not from mldoc, + ;; but from `logseq.graph-parser.mldoc/collect-page-properties` + [:tuple [:= "Properties"] [:sequential :any]]]}} + ::block]) + +(def block-ast-with-pos-coll-schema + [:sequential [:cat block-ast-schema [:maybe pos-schema]]]) + +(def block-ast-coll-schema + [:sequential block-ast-schema]) diff --git a/scripts/src/logseq/tasks/malli.clj b/scripts/src/logseq/tasks/malli.clj index 3dbaa7bc375..05051ec7a74 100644 --- a/scripts/src/logseq/tasks/malli.clj +++ b/scripts/src/logseq/tasks/malli.clj @@ -5,6 +5,8 @@ [frontend.schema.handler.plugin-config :as plugin-config-schema] [frontend.schema.handler.global-config :as global-config-schema] [frontend.schema.handler.repo-config :as repo-config-schema] + [logseq.graph-parser.schema.mldoc :as mldoc-schema] + [babashka.fs :as fs] [clojure.pprint :as pprint] [clojure.edn :as edn])) @@ -43,3 +45,28 @@ "Validate a global config.edn" [file] (validate-file-with-schema file repo-config-schema/Config-edn)) + +(defn validate-ast + "Validate mldoc ast(s) in a file or as an EDN arg" + [file-or-edn] + (let [edn (edn/read-string + (if (fs/exists? file-or-edn) (slurp file-or-edn) file-or-edn))] + (if (and (sequential? edn) (:ast (first edn))) + ;; Validate multiple asts in the format [{:file "" :ast []} ...] + ;; Produced by https://github.com/logseq/nbb-logseq/tree/main/examples/from-js#graph_astmjs + (do + (println "Validating" (count edn) "files...") + (if-let [errors-by-file (seq (keep + #(when-let [errors (m/explain mldoc-schema/block-ast-with-pos-coll-schema (:ast %))] + {:file (:file %) + :errors errors}) + edn))] + (do + (println "Found errors:") + (pprint/pprint errors-by-file)) + (println "All files valid!"))) + (if-let [errors (m/explain mldoc-schema/block-ast-with-pos-coll-schema edn)] + (do + (println "Found errors:") + (pprint/pprint errors)) + (println "Valid!"))))) diff --git a/src/main/frontend/handler/export/common.cljs b/src/main/frontend/handler/export/common.cljs index 968cc389d45..5512fef6016 100644 --- a/src/main/frontend/handler/export/common.cljs +++ b/src/main/frontend/handler/export/common.cljs @@ -106,10 +106,11 @@ (defn- update-level-in-block-ast-coll [block-ast-coll origin-level] (mapv - (fn [[ast-type ast-content]] - (if (= ast-type "Heading") - [ast-type (update ast-content :level #(+ (dec %) origin-level))] - [ast-type ast-content])) + (fn [block-ast] + (let [[ast-type ast-content] block-ast] + (if (= ast-type "Heading") + [ast-type (update ast-content :level #(+ (dec %) origin-level))] + block-ast))) block-ast-coll)) (defn- plain-indent-inline-ast diff --git a/src/main/frontend/handler/export/text.cljs b/src/main/frontend/handler/export/text.cljs index 24afbe6d87e..28fb94136d4 100644 --- a/src/main/frontend/handler/export/text.cljs +++ b/src/main/frontend/handler/export/text.cljs @@ -7,6 +7,7 @@ [frontend.handler.export.common :as common :refer [*state* indent newline* raw-text simple-ast-malli-schema simple-asts->string space]] + [logseq.graph-parser.schema.mldoc :as mldoc-schema] [frontend.state :as state] [frontend.util :as util :refer [concatv mapcatv removev]] [goog.dom :as gdom] @@ -320,7 +321,7 @@ (indent-with-2-spaces (dec current-level)))))]) ;; {:malli/schema ...} only works on public vars, so use m/=> here -(m/=> block-ast->simple-ast [:=> [:cat [:sequential :any]] [:sequential simple-ast-malli-schema]]) +(m/=> block-ast->simple-ast [:=> [:cat mldoc-schema/block-ast-schema] [:sequential simple-ast-malli-schema]]) (defn- block-ast->simple-ast [block] (let [newline-after-block? (get-in *state* [:export-options :newline-after-block])]