/
readers.clj
56 lines (48 loc) · 2.13 KB
/
readers.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
(ns ^:no-doc tech.v3.dataset.readers
(:require [tech.v3.datatype :as dtype]
[tech.v3.protocols.dataset :as ds-proto])
(:import [tech.v3.datatype ObjectReader Buffer]
[tech.v3.dataset FastStruct]
[org.roaringbitmap RoaringBitmap]
[java.util List HashMap Collections ArrayList]))
(defn dataset->column-readers
"Create a list of object column readers. Packed datatypes will be unpacked.
options -
:missing-nil? - Default to true - Substitute nil in for missing values to make
missing value detection downstream to be column datatype independent."
(^List [dataset]
(->> (ds-proto/columns dataset)
(mapv dtype/->reader))))
(defn value-reader
"Return a reader that produces a reader of column values per index.
Options:
:missing-nil? - Default to true - Substitute nil in for missing values to make
missing value detection downstream to be column datatype independent."
(^Buffer [dataset]
(let [readers (dataset->column-readers dataset)
n-rows (long (second (dtype/shape dataset)))
n-cols (long (first (dtype/shape dataset)))]
(reify ObjectReader
(lsize [rdr] n-rows)
(readObject [rdr idx]
(reify ObjectReader
(lsize [rdr] n-cols)
(readObject [rdr inner-idx]
(.get ^List (.get readers inner-idx) idx))))))))
(defn mapseq-reader
"Return a reader that produces a map of column-name->column-value
Options:
:missing-nil? - Default to true - Substitute nil in for missing values to make
missing value detection downstream to be column datatype independent."
(^Buffer [dataset]
(let [colnamemap (HashMap.)
_ (doseq [[c-name c-idx] (->> (ds-proto/columns dataset)
(map (comp :name meta))
(map-indexed #(vector %2 (int %1))))]
(.put colnamemap c-name c-idx))
colnamemap (Collections/unmodifiableMap colnamemap)
readers (value-reader dataset)]
(reify ObjectReader
(lsize [rdr] (.lsize readers))
(readObject [rdr idx]
(FastStruct. colnamemap (readers idx)))))))