Skip to content

Commit

Permalink
Fixes #273
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Nov 7, 2021
1 parent 5ff85c3 commit 7ae0c78
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
14 changes: 14 additions & 0 deletions src/tech/v3/dataset/missing.clj
Expand Up @@ -79,6 +79,18 @@
(missing-map-fn secondary-f missing2))
(replace-missing-with-value step1 missing2 value)))))

(defn- replace-missing-with-abb
[col ^RoaringBitmap missing]
(let [^RoaringBitmap non-missing (doto (RoaringBitmap.)
(.add 0 (dtype/ecount col))
(.andNot missing)) ;; prepare non-missing indices
non-missing-cnt (.getCardinality non-missing) ;; how many non-missing we have
;; bootstrap `non-missing-count` values from a column
samples1 (col/select col (repeatedly non-missing-cnt #(.select non-missing (rand-int non-missing-cnt))))
;; bootstrap `missing-count` values for imputation from first bootstrap round
samples2 (col/select samples1 (repeatedly (.getCardinality missing) #(rand-int (dtype/ecount samples1))))]
(replace-missing-with-value col missing samples2)))

;; mid and range

(defn- find-missing-ranges
Expand Down Expand Up @@ -198,6 +210,7 @@
[missing-direction-prev missing-direction-next] col missing value)
:up (replace-missing-with-direction
[missing-direction-next missing-direction-prev] col missing value)
:abb (replace-missing-with-abb col missing)
:lerp (replace-missing-with-lerp :lerp col missing)
:nearest (replace-missing-with-nearest col missing)
:midpoint (replace-missing-with-lerp :midpoint col missing)
Expand All @@ -221,6 +234,7 @@
- `:nearest` - Use nearest of next or previous values. `:mid` is an alias for `:nearest`.
- `:midpoint` - Use midpoint of averaged values between previous and next nonmissing
rows.
- `:abb` - Impute missing with approximate bayesian bootstrap. See [r's ABB](https://search.r-project.org/CRAN/refmans/LaplacesDemon/html/ABB.html).
- `:lerp` - Linearly interpolate values between previous and next nonmissing rows.
- `:value` - Value will be provided - see below.
Expand Down
9 changes: 9 additions & 0 deletions test/tech/v3/dataset_test.clj
Expand Up @@ -864,6 +864,15 @@
(java.time.LocalDateTime/of 2020 10 1 1 1 1)]))))


(deftest replace-missing-abb
(let [dtds (ds/->dataset {:a [nil nil nil 1.0 2 nil nil nil
nil nil 4 nil 11 nil nil]
:b [2 2 2 nil nil nil nil nil
nil 13 nil 3 4 5 5]})
fds (ds/replace-missing dtds :abb)]
(is (= 0 (dtype/ecount (ds/missing fds))))))


(deftest dataset-column-nippy
(let [ds (ds/->dataset {:a [1 2]
:datasets [(ds/->dataset [{:a 1}])
Expand Down

0 comments on commit 7ae0c78

Please sign in to comment.