-
Notifications
You must be signed in to change notification settings - Fork 0
/
io.clj
110 lines (99 loc) · 3.64 KB
/
io.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
(ns nd-db.io
(:require [clojure.java.io :as io]
[clojure.string :as s]
[taoensso.nippy :as nippy]
[buddy.core.codecs :as c]
digest
[nd-db.util :as ndut])
(:import [java.io File Writer]))
(defn ndfile-md5
"Reads first 10 lines of file, return corresponding MD5"
[filename]
(with-open [r (io/reader filename)]
(let [input (take 10 (line-seq r))]
(digest/md5 (s/join input)))))
(defn ->str [data]
(-> data nippy/freeze c/bytes->b64 c/bytes->str))
(defn str-> [data-str]
(-> data-str c/str->bytes c/b64->bytes nippy/thaw))
(defn serialize-db [filename db]
{:pre [(ndut/db? db)]}
(with-open [os (io/output-stream filename)]
(.write os (nippy/freeze @db)))
db)
(defn parse-db [filename]
{:post [(ndut/db? %)]}
(future (nippy/thaw-from-file filename)))
(defn serialize-db-filename [{:keys [filename idx-id index-folder]}]
(let [db-filename (last (s/split filename (re-pattern File/separator)))
db-md5 (ndfile-md5 filename)]
(str (or index-folder (System/getProperty "java.io.tmpdir"))
File/separator
(first (s/split db-filename #"\."))
"_" db-md5
idx-id
".nddbmeta")))
(defn append+newline
"append to a file, super simple lock mechanism"
[^Writer writer]
(fn [data]
(let [data-str (str (->str data) "\n")]
(doto writer
(.write data-str)
(.flush))
(count data-str))))
(defn name-type->id+fn
"Generates valid :id-fn input based on :id-name and :id-type"
[{:keys [id-name id-type source-type]
:or {id-type :string}}]
(when (string? id-name)
{:idx-id (str id-name (name id-type))
:id-fn (let [source-type (or source-type id-type)
source-pattern (condp = source-type
:integer "(\\d+)"
"\"(\\w+)\"")]
(condp = id-type
:integer #(BigInteger.
^String
(second
(re-find
(re-pattern (format "%s\":%s" id-name source-pattern))
%)))
#(second
(re-find
(re-pattern (format "%s\":%s" id-name source-pattern))
%))))}))
(defn path->id+fn
"Generates valid :id-fn input based on :id-path (.ndnippy only!)"
[id-path]
{:pre [(vector? id-path)]}
{:idx-id (s/join (map #(if (keyword? %) (name %) %) id-path))
:id-fn #(-> % str-> (get-in id-path))})
(defn rx-str->id+fn
"Generates valid :id-fn input based on a regular expression string"
[rx-str]
{:idx-id (ndut/str->hash rx-str)
:id-fn #(Integer. ^String (second (re-find (re-pattern rx-str) %)))})
(defn parse-params
"Parses input params for intake of raw-db"
[{:keys [filename
id-fn id-rx-str
id-path
id-name id-type
index-folder index-persist?] :as params}]
{:pre [(or (fn? id-fn)
(string? id-rx-str)
(vector? id-path)
(and id-name id-type))]
:post [#(and (:filename %)
(:id-fn %)
(:idx-id %))]}
(with-meta (assoc (merge (cond id-fn {:id-fn id-fn
:idx-id ""}
id-rx-str (rx-str->id+fn id-rx-str)
id-path (path->id+fn id-path)
:else (name-type->id+fn params))
(when index-folder {:index-folder index-folder}))
:filename filename
:index-persist? (not (false? index-persist?)))
{:parsed? true}))