This repository has been archived by the owner on Apr 29, 2023. It is now read-only.
/
html.cljc
151 lines (143 loc) · 6.33 KB
/
html.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
(ns oc.lib.html
"Functions related to processing HTML."
(:require [cuerdas.core :as str]
#?(:clj [jsoup.soup :as soup]))
#?(:clj (:import [org.owasp.html HtmlPolicyBuilder Sanitizers])))
(defn- thumbnail-elements [body exclude-gifs?]
(let [thumbnail-selector (if exclude-gifs?
"img:not(.emojione):not([data-media-type='image/gif']), iframe"
"img:not(.emojione), iframe")]
#?(:clj
(let [parsed-body (soup/parse body)
els (.select parsed-body thumbnail-selector)]
{:elements els
:count (count els)})
:cljs
(let [$body (js/$ (str "<div>" body "</div>"))
els (js->clj (js/$ thumbnail-selector $body))]
{:elements els
:count (.-length els)}))))
(defn- $el [el]
#?(:clj
el
:cljs
(js/$ el)))
(defn- tag-name [el]
#?(:clj
(.tagName el)
:cljs
(.-tagName el)))
(defn first-body-thumbnail
"
Given an entry body get the first thumbnail available.
Thumbnail type: image, video or chart.
This rely on the similitudes between jQuery and soup parsed objects like the attr function.
"
([html-body] (first-body-thumbnail html-body true))
([html-body exclude-gifs?]
(let [{els-count :count thumb-els :elements} (thumbnail-elements html-body exclude-gifs?)
found (atom nil)]
(dotimes [el-num els-count]
(let [el #?(:clj (nth thumb-els el-num) :cljs (aget thumb-els el-num))
$el ($el el)]
(when-not @found
(if (= (str/lower (tag-name el)) "img")
(when (not @found)
(reset! found
{:type "image"
:thumbnail (if (.attr $el "data-thumbnail")
(.attr $el "data-thumbnail")
(.attr $el "src"))}))
(reset! found {:type (.attr $el "data-media-type") :thumbnail (.attr $el "data-thumbnail")})))))
@found)))
(def allowed-block-elements ["span" "img" "a" "iframe" "pre" "code" "div" "mark"])
#?(:clj
(def user-input-html-policy
(let [string-array (fn [sa] (into-array java.lang.String sa))
iframe-src-regex #"^https://((www\.)?youtube.com|player.vimeo.com|(www\.)?loom.com)/.*"]
(.. (HtmlPolicyBuilder.)
;; -- common --
(allowCommonBlockElements)
(allowCommonInlineFormattingElements)
(allowStyling)
(allowStandardUrlProtocols)
(allowElements (string-array allowed-block-elements))
;; -- span --
(allowWithoutAttributes (string-array ["span"]))
(allowAttributes (string-array ["class"
"data-first-name"
"data-last-name"
"data-slack-username"
"data-user-id"
"data-email"
"data-avatar-url"
"data-found"
"data-auto-link"
"data-href"]))
(onElements (string-array ["span"]))
;; -- images --
(allowAttributes (string-array ["src"
"alt"
"class"
"data-media-type"
"data-thumbnail"]))
(onElements (string-array ["img"]))
;; -- anchors / links --
(allowAttributes (string-array ["href"
"target"]))
(onElements (string-array ["a"]))
(requireRelNofollowOnLinks)
;; -- iframes (embeds) --
(allowAttributes (string-array ["src"]))
(matching iframe-src-regex)
(onElements (string-array ["iframe"]))
(allowAttributes (string-array ["class"
"width"
"height"
"data-media-type"
"frameborder"
"webkitallowfullscreen"
"mozallowfullscreen"
"allowfullscreen"
"data-thumbnail"
"data-video-type"
"data-video-id"]))
(onElements (string-array ["iframe"]))
;; -- pre --
(allowAttributes (string-array ["class"
"data-disable-toolbar"]))
(onElements (string-array ["pre"]))
;; -- code --
(allowAttributes (string-array ["class"
"data-disable-toolbar"]))
(onElements (string-array ["code"]))
;; -- div for polls --
(allowAttributes (string-array ["class"
"contenteditable"
"data-media-type"
"data-poll-id"
"data-question"
"data-disable-toolbar"
"id"]))
(onElements (string-array ["div"]))
;; -- Mark for highlighter button --
(allowAttributes (string-array ["class"]))
(onElements (string-array ["mark"]))
(toFactory)))))
#?(:clj
(defn sanitize-html
"Sanitizes HTML content assumed to have been created by a (untrusted) user."
[html-str]
(.sanitize user-input-html-policy html-str)
))
#?(:clj
(defn strip-html-tags
"Reduces an html string to only its textual content, removing all tags. Takes
optional args:
- `:decode-entities?` if true, will decode HTML entities (e.g. @)"
[html-str & {:keys [decode-entities?] :as opts}]
(let [policy (.toFactory (HtmlPolicyBuilder.))
sanitized (.sanitize policy html-str)]
(if-not decode-entities?
sanitized
(.text (soup/parse sanitized))))))