/
read.clj
173 lines (152 loc) · 5.77 KB
/
read.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
(ns clj-xlsxio.read
(:require [clj-xlsxio.low-level-read :refer :all])
(:import [com.sun.jna Pointer]
[java.util Date TimeZone]
[org.joda.time DateTime]
[java.time LocalDateTime Instant]
[java.io File]))
(def ^:const skip-none 0)
(def ^:const skip-empty-rows 0x01)
(def ^:const skip-empty-cells 0x02)
(def ^:const skip-all-empty (bit-or skip-empty-rows skip-empty-cells))
(def ^:const skip-extra-cells 0x04)
(defn read-row
([sheet]
(if-not (zero? (sheet-next-row sheet))
(loop
[res []]
(if-let [cell-value (sheet-next-cell sheet)]
(recur (conj res cell-value))
res))
nil))
([sheet xlsx]
(if-not (zero? (sheet-next-row sheet))
(loop
[res []]
(if-let [cell-value (sheet-next-cell sheet)]
(recur (conj res cell-value))
res))
(do
(sheet-close sheet)
(close xlsx)
nil))))
(defmulti read-xlsx (fn [x & args] (type x)))
(defmethod read-xlsx Pointer
([sheet]
(if-let [first-row (read-row sheet)]
(lazy-seq (cons first-row (read-xlsx sheet)))
nil))
([sheet xlsx]
(if-let [first-row (read-row sheet)]
(lazy-seq (cons first-row (read-xlsx sheet)))
(do
(sheet-close sheet)
(close xlsx)
nil))))
(defmethod read-xlsx String
[filename & {:keys [skip sheet] :or {skip skip-none sheet nil}}]
(let [xlsx (open filename)
sheet (sheet-open xlsx sheet skip)]
(read-xlsx sheet xlsx)))
(defmethod read-xlsx File
[^File file & {:keys [skip sheet] :or {skip skip-none sheet nil}}]
(let [^String filename (.getAbsolutePath file)]
(read-xlsx filename :sheet sheet :skip skip)))
(defn xlsx->enumerated-maps
"Returns a lazy sequence of maps, keys are the number of the column in the excel format"
[lz-seq]
(let [n-columns (-> lz-seq first count)]
(map zipmap (repeat (range n-columns)) lz-seq)))
(defn- int->excel-column
[^Long n]
(loop [s []
aux (inc n)]
(if (>= (dec aux) 0)
(recur (conj s (char (+ (int \A) (mod (dec aux) 26)))) (quot (dec aux) 26))
(reduce str (reverse s)))))
(defn xlsx->excel-enumerated-maps
"Returns a lazy sequence of maps, keys are the number of the column in the excel format"
[lz-seq]
(let [n-columns (-> lz-seq first count)]
(map zipmap (repeat (map (comp keyword str int->excel-column) (range n-columns))) lz-seq)))
(defn xlsx->column-title-maps
"Takes the first row of a xlsx and enumerate every row with the column title.
Use the keyword arg :column-fn to pass a functions that are applied on each of the column names, must be a function of 1 arg.
"
[lz-seq & {:keys [str-keys column-fn] :or {str-keys false column-fn nil}}]
(map zipmap
(->> (first lz-seq)
(#(if column-fn (map column-fn %) %))
(#(if str-keys % (map keyword %)))
repeat)
(rest lz-seq)))
(defn coerce
"Coerce every row applying a vector of functions"
[lz-seq fs & {:keys [skip-first-row] :or {skip-first-row false}}]
(if skip-first-row
(let [[head & tail] lz-seq]
(cons head (map (fn [row] (mapv #(%1 %2) fs row)) tail)))
(map (fn [row] (mapv #(%1 %2) fs row)) lz-seq)))
(defn coerce-map
"coerce one map based on a coercion map, extra keys are untouched."
[m fs]
(loop [new-m {}
ks (keys fs)]
(if (empty? ks)
(merge m new-m)
(recur (assoc new-m (first ks) (((first ks) fs) ((first ks) m)))
(rest ks)))))
(comment (coerce-map {:d "extra key" :a "1" :b "10" :c "doasdjasodjas"}
{:a #(Long/parseLong %) :b excel-date->java-date :c #(count %)}))
(defn coerce-maps
"coerce a list of maps based on a coercion map, extra keys will be untouched."
[lz-seq fs & {:keys [skip-first-row] :or {skip-first-row false}}]
(if skip-first-row
(let [[head & tail] lz-seq]
(cons head (map coerce-map tail (repeat fs))))
(map coerce-map lz-seq (repeat fs))))
(comment (coerce-maps (repeat 5 {:d "an extra key"
:another-extra-key-with-a-bizare-class-that-we-dont-want-to-mess-with-or-apply-a-coercion (Thread.)
:a "1" :b "10" :c "doasdjasodjas"})
{:a #(Long/parseLong %) :b excel-date->java-date :c #(count %)}))
(defn excel-date->unix-timestamp
"Takes a excel date and convert it to a unix timestamp"
^Long
[^String n-str]
(let [n (Double/parseDouble n-str)]
(long (* 86400 (- n 25569)))))
(defn excel-date->java-date
"Takes a excel date and convert it to a java Date object"
^Date
[^String n-str]
(Date. ^Long (* 1000 (excel-date->unix-timestamp n-str))))
(defn excel-date->joda-date
"Takes a excel date and convert it to a joda time DateTime object"
^DateTime
[^String n-str]
(DateTime. (* 1000 (excel-date->unix-timestamp n-str))))
(defn excel-date->java-localdatetime
"Takes a excel date and convert it to a java.time.LocalDateTime object"
^LocalDateTime
[^String n-str]
(LocalDateTime/ofInstant (Instant/ofEpochSecond (excel-date->unix-timestamp n-str))
(-> (TimeZone/getDefault) .toZoneId)))
(defprotocol ListSheets
(list-sheets [this]))
(extend-protocol ListSheets
Pointer (list-sheets [xlsx]
(let [sheetlist (sheetlist-open xlsx)
res (loop [sheets []]
(if-let [sheetname (sheetlist-next sheetlist)]
(recur (conj sheets sheetname))
sheets))]
(sheetlist-close sheetlist)
res))
String (list-sheets [filename]
(let [^Pointer xlsx (open filename)
res (list-sheets xlsx)]
(close xlsx)
res))
File (list-sheets [file]
(let [^String filename (.getAbsolutePath file)]
(list-sheets filename))))