This repository has been archived by the owner on Apr 29, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
html.cljc
62 lines (57 loc) · 2.01 KB
/
html.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
(ns oc.lib.html
"Functions related to processing HTML."
(:require [cuerdas.core :as str]
#?(:clj [jsoup.soup :as soup])))
(defn- thumbnail-elements [body]
(let [thumbnail-selector "img:not(.emojione):not([data-media-type='image/gif']), iframe"]
#?(:clj
(let [parsed-body (soup/parse body)
els (.select parsed-body thumbnail-selector)]
{:elements els
:count (count els)})
:cljs
(let [$body (js/$ (str "<div>" body "</div>"))
els (js->clj (js/$ thumbnail-selector $body))]
{:elements els
:count (.-length els)}))))
(defn- $el [el]
#?(:clj
el
:cljs
(js/$ el)))
(defn- tag-name [el]
#?(:clj
(.tagName el)
:cljs
(.-tagName el)))
(defn- read-size [size]
#?(:clj
(Integer/parseInt (re-find #"\A-?\d+" size))
:cljs
size))
(defn first-body-thumbnail
"
Given an entry body get the first thumbnail available.
Thumbnail type: image, video or chart.
This rely on the similitudes between jQuery and soup parsed objects like the attr function.
"
[html-body]
(let [{els-count :count thumb-els :elements} (thumbnail-elements html-body)
found (atom nil)]
(dotimes [el-num els-count]
(let [el #?(:clj (nth thumb-els el-num) :cljs (aget thumb-els el-num))
$el ($el el)]
(when-not @found
(if (= (str/lower (tag-name el)) "img")
(let [width (read-size (.attr $el "width"))
height (read-size (.attr $el "height"))]
(when (and (not @found)
(or (<= width (* height 2))
(<= height (* width 2))))
(reset! found
{:type "image"
:thumbnail (if (.attr $el "data-thumbnail")
(.attr $el "data-thumbnail")
(.attr $el "src"))})))
(reset! found {:type (.attr $el "data-media-type") :thumbnail (.attr $el "data-thumbnail")})))))
@found))