This repository has been archived by the owner on Apr 29, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
text.clj
67 lines (57 loc) · 2.67 KB
/
text.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
(ns oc.lib.text
"Functions related to processing text."
(:require [clojure.string :as s]
[cuerdas.core :as str]
[jsoup.soup :as soup]))
(defn attribution
"
Given the number of distinct authors to mention, the number of items, what to call the
item (needs to pluralize with just an 's'), and a sequence of authors of the items
to attribute (sequence needs to be distinct'able, and have a `:name` property per author),
return a text string that attributes the authors to the items.
E.g.
(attribution 3 7 'comment' [{:name 'Joe'} {:name 'Joe'} {:name 'Moe'} {:name 'Flo'} {:name 'Flo'} {:name 'Sue'}])
'7 comments by Joe, Moe, Flo and others'
"
[attribution-count item-count item-name authors]
(let [distinct-authors (distinct authors)
author-names (map :name (take attribution-count distinct-authors))
more-authors? (> (count distinct-authors) (count author-names))
multiple-authors? (> (count author-names) 1)
author-attribution (cond
;; more distinct authors than we are going to mention
more-authors?
(str (clojure.string/join ", " author-names) " and others")
;; more than 1 author so last mention needs an "and", not a comma
multiple-authors?
(str (clojure.string/join ", " (butlast author-names))
" and "
(last author-names))
;; just 1 author
:else
(first author-names))]
(str item-count " " item-name (when (> item-count 1) "s") " by " author-attribution)))
(defn strip-xss-tags
"
Current xss tags are script, style, and input.
"
[data]
(when data (s/replace data #"(?i)<\/?((script|style|input){1})(\s?[^<>]*)>" "")))
(defn- clean-body-text [body]
(-> body
(s/replace #" " " ")
(str/strip-tags)
(str/strip-newlines)))
(def body-words 20)
(defn truncated-body [body]
(let [clean-body (if-not (clojure.string/blank? body)
(clean-body-text (.text (soup/parse body)))
"")
splitted-body (clojure.string/split clean-body #" ")
truncated-body (filter not-empty
(take body-words ;; 20 words is the average sentence
splitted-body))
reduced-body (str (clojure.string/join " " truncated-body)
(when (= (count truncated-body) body-words)
"..."))]
reduced-body))