-
Notifications
You must be signed in to change notification settings - Fork 4
/
projections.clj
139 lines (115 loc) · 4.79 KB
/
projections.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
(ns scicloj.ml.smile.projections
(:require [tablecloth.api.utils :refer [column-names]]
[tablecloth.api.dataset :refer [rows dataset columns]]
[tablecloth.api.columns :refer [select-columns drop-columns add-or-replace-columns]]
[fastmath.kernel :as k])
(:import [smile.projection PCA ProbabilisticPCA KPCA GHA RandomProjection Projection]
[smile.math.kernel MercerKernel]))
(set! *warn-on-reflection* true)
(defn- pca
([rows target-dims] (pca rows target-dims false))
([rows ^long target-dims cor?]
(let [^PCA model (if cor? (PCA/cor rows) (PCA/fit rows))]
(.setProjection model target-dims)
model)))
(defn- pca-prob
[rows target-dims]
(ProbabilisticPCA/fit rows target-dims))
(defn- build-smile-kernel
[kernel kernel-params]
(cond
(instance? MercerKernel kernel) kernel
(fn? kernel) (k/smile-mercer kernel)
:else (k/smile-mercer (apply k/kernel kernel kernel-params))))
(defn- kpca
[rows target-dims kernel kernel-params threshold]
(KPCA/fit rows (build-smile-kernel kernel kernel-params) target-dims threshold))
(defn- gha
[rows target-dims learning-rate decay]
(let [^GHA model (GHA. (count (first rows)) target-dims learning-rate)]
(doseq [row rows]
(.setLearningRate model (* decay (.getLearningRate model)))
(.update model row))
model))
(defn- random
[rows target-dims]
(let [cnt (count (first rows))]
(RandomProjection/of cnt target-dims)))
(defn- build-model
[rows algorithm target-dims {:keys [kernel kernel-params
threshold learning-rate decay]
:or {kernel (k/kernel :gaussian)
threshold 0.0001
learning-rate 0.0001
decay 0.995}}]
(case algorithm
:pca-cov (pca rows target-dims)
:pca-cor (pca rows target-dims true)
:pca-prob (pca-prob rows target-dims)
:kpca (kpca rows target-dims kernel kernel-params threshold)
:gha (gha rows target-dims learning-rate decay)
:random (random rows target-dims)
(pca rows target-dims)))
(defn- rows->array
[ds names]
(-> ds
(select-columns names)
(rows :as-double-arrays)))
(defn- array->ds
[arr target-columns]
(->> arr
(map (partial zipmap target-columns))
(dataset)))
(defn process-reduction-fit
[ds algorithm target-dims cnames opts]
(let [target-columns (map #(str (name algorithm) "-" %) (range target-dims))
rows (rows->array ds cnames)
^Projection model (build-model rows algorithm target-dims opts)
ds-res (array->ds (.project model #^"[[D" rows) target-columns)]
{:dataset
(-> ds
(add-or-replace-columns (columns ds-res :as-map)))
:model model
:cnames cnames
:target-columns target-columns
}))
(defn process-reduction-transform
[ds model cnames target-columns]
(let [rows (rows->array ds cnames)
ds-res (array->ds (.project model #^"[[D" rows) target-columns)]
(-> ds
(add-or-replace-columns (columns ds-res :as-map)))
))
(defn reduce-dimensions
"Metamorph transformer, which reduces the dimensions of a given dataset.
`algorithm` can be any of:
* :pca-cov
* :pca-cor
* :pca-prob
* :kpca
* :gha
* :random
`target-dims` is number of dimensions to reduce to.
`cnames` is a sequence of column names on which the reduction get performed
`opts` are the options of the algorithm
metamorph | .
-------------------------------------|----------------------------------------------------------------------------
Behaviour in mode :fit | Reduces dimensions of the dataset at key `:metamorph/data` and stores the trained model in ctx under key at `:metamorph/id`
Behaviour in mode :transform | Reads trained reduction model from ctx and applies it to data in `:metamorph/data`
Reads keys from ctx | In mode `:transform` : Reads trained model to use from ctx at key in `:metamorph/id`.
Writes keys to ctx | In mode `:fit` : Stores trained model in ctx under key in `:metamorph/id`.
"
[algorithm target-dims cnames opts]
(fn [{:metamorph/keys [data id mode] :as ctx}]
(case mode
:fit
(let [fit-result (process-reduction-fit data algorithm target-dims cnames opts)]
(assoc ctx
id {:fit-result (dissoc fit-result :dataset)}
:metamorph/data (:dataset fit-result)))
:transform
(let [fit-result (get-in ctx [id :fit-result])]
(assoc ctx :metamorph/data (process-reduction-transform
(:metamorph/data ctx)
(:cnames fit-result)
(:target-columns fit-result)))))))