-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_util.clj
82 lines (72 loc) · 3.14 KB
/
run_util.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
(ns louna.q.run-util)
(defn add-header [dataset col-names]
(.toDF dataset (into-array col-names)))
(defn add-indexed-qvars [qvars header indexed-qvars]
(reduce (fn [qvars indexed-qvar]
(let [parts (clojure.string/split indexed-qvar #":")
qvar-index (read-string (subs (first parts) 1))
qvar-index (if (number? qvar-index)
(dec qvar-index)
(.indexOf header (str qvar-index)))
qvar-name (str "?" (second parts))]
(assoc qvars qvar-index qvar-name)))
qvars
indexed-qvars))
(defn add-named-qvars [qvars header named-qvars]
(reduce (fn [qvars indexed-qvar]
(let [parts (clojure.string/split indexed-qvar #":")
name-on-header (subs (first parts) 1)
new-name (second parts)
named-qvar-index (.indexOf header name-on-header)]
(assoc qvars named-qvar-index (str "?" new-name))))
qvars
named-qvars))
(defn separate-constants-qvars [header qvars]
(reduce (fn [[temp-map qvars index] qvar]
(if (or (louna.q.transform/unnamed-qvar? qvar)
(clojure.string/starts-with? qvar "?"))
[temp-map (conj qvars qvar) (inc index)]
(let [header-var (str "?" (get header index))]
[(assoc temp-map header-var qvar)
(conj qvars header-var)
(inc index)])))
[{} [] 0]
qvars))
;;default is not add missing
(defn add-out-of-order-vars [qvars header]
(let [last-qvar (peek qvars)
add-missing? (= last-qvar "__")
qvars (if (or (= last-qvar "__") (= last-qvar "--"))
(pop qvars)
qvars)
;;?:name ?name:name1 ?index:name
out-of-order-qvars (filter (fn [qvar] (re-find #"\?.*:" qvar)) qvars)
;;?:name ?name:name1
named-qvars (filter (fn [qvar] (re-find #"\?\D.*:" qvar)) out-of-order-qvars)
;;?index:name
indexed-qvars (filter (fn [qvar] (re-find #"\?\d+:" qvar)) out-of-order-qvars)
;;?qvar
qvars (filter (fn [qvar] (nil? (re-find #"\?.*:" qvar))) qvars)
qvars (if add-missing?
(into [] (concat qvars (take (- (count header) (count qvars)) (repeat "_"))))
(into [] (concat qvars (take (- (count header) (count qvars)) (repeat "-")))))
qvars (add-indexed-qvars qvars header indexed-qvars)
qvars (add-named-qvars qvars header named-qvars)
]
qvars))
(defn get-var-position [sorted-vars var-name]
(loop [index 0]
(let [cur-var (get sorted-vars index)]
(if (> index (count sorted-vars))
(do (prn "Var" var-name " Not found in " sorted-vars) (System/exit 0))
(if (= cur-var var-name)
index
(recur (inc index)))))))
(defn sort-vars [sorted-vars vars]
(into [] (sort-by (partial get-var-position sorted-vars)
vars)))
;;join type is always in the end for example (mydf ?x ?y :outer)
(defn get-join-type [qvars]
(if (keyword? (peek qvars))
[(pop qvars) (name (peek qvars))]
[qvars "inner"]))