-
Notifications
You must be signed in to change notification settings - Fork 4
/
protocols.clj
84 lines (67 loc) · 2.84 KB
/
protocols.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
(ns scicloj.ml.smile.protocols
(:require [tech.v3.libs.smile.data :as smile-data]
[tech.v3.datatype :as dtype]
[tech.v3.dataset.utils :as ds-utils])
(:import [smile.data.formula Formula]
[smile.data.type StructType]
[smile.regression DataFrameRegression]
[smile.classification DataFrameClassifier]
[java.util Properties List]
[smile.data.formula Formula TechFactory Variable]))
(set! *warn-on-reflection* true)
(defprotocol PToFormula
(get-model-formula [item]))
(extend-protocol PToFormula
DataFrameRegression
(get-model-formula [item] (.formula item))
DataFrameClassifier
(get-model-formula [item] (.formula item)))
(defn ->formula
^Formula [item]
(if (instance? Formula item)
item
(get-model-formula item)))
(defn initialize-model-formula!
[model feature-ds]
(let [formula (->formula model)
^List fields (->> (vals feature-ds)
(map meta)
(mapv (fn [{:keys [name datatype]}]
(smile-data/smile-struct-field
(ds-utils/column-safe-name name)
datatype))))
struct-type (StructType. fields)]
(.bind formula struct-type)))
(defn- resolve-default
[item dataset]
(if (fn? item)
(item dataset)
item))
(defn options->properties
^Properties [metadata dataset options]
(let [pname-stem (:property-name-stem metadata)]
(->> (:options metadata)
(reduce (fn [^Properties props {:keys [name default lookup-table]}]
(let [default (if (fn? default)
(default dataset props)
(or (get lookup-table default)
default))
value (get options name)
value (get lookup-table value value)
]
(.put props (format "%s.%s"
pname-stem
(.replace ^String (clojure.core/name name)
"-" "."))
(str (dtype/cast (or value
(resolve-default default dataset))
(dtype/get-datatype default)))))
props)
(Properties.)))))
(defn make-formula
"Make a formula out of a response name and a sequence of feature names"
[^String response & [features]]
(Formula. (TechFactory/variable response)
^"[Lsmile.data.formula.Variable;" (->> features
(map #(TechFactory/variable %))
(into-array Variable ))))