Skip to content

Commit

Permalink
Fixes #259, Fixes #311
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Jul 21, 2022
1 parent 9cef4dc commit aa36d63
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# Changelog
# 8.093
* Fixes for issue [259](https://github.com/techascent/tech.ml.dataset/issues/259/), which is same as new issue 311. `:key-fn` should only be applied once per column and does not have to
be idemptotent.

# 8.092
* Fixes for issues [312](https://github.com/techascent/tech.ml.dataset/issues/312/), [315](https://github.com/techascent/tech.ml.dataset/issues/315/), [316](https://github.com/techascent/tech.ml.dataset/issues/316/)

Expand Down
3 changes: 2 additions & 1 deletion src/tech/v3/dataset/io/context.clj
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@
(mapv (fn [{:keys [column-name column-parser]}]
(assoc (column-parsers/finalize! column-parser row-count)
:tech.v3.dataset/name column-name)))
(ds-impl/new-dataset options))))
;;key-fn has already been applied
(ds-impl/new-dataset (assoc options :key-fn nil)))))
([options parsers]
(parsers->dataset options parsers
(apply max 0 (map (comp dtype/ecount :column-parser)
Expand Down
3 changes: 2 additions & 1 deletion src/tech/v3/dataset/io/mapseq_colmap.clj
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
(recur (.hasNext iter)
(unchecked-inc row-idx)))
row-idx))]
(parse-context/parsers->dataset options parsers n-rows)))
;;key-fn has already been applied
(parse-context/parsers->dataset (assoc options :key-fn nil) parsers n-rows)))
([mapseq]
(mapseq->dataset {} mapseq)))

Expand Down
18 changes: 18 additions & 0 deletions test/tech/v3/dataset_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,24 @@
(ds/drop-rows (ds/->dataset [{:a 1 :c3 2}]) [0]))))))


(deftest issue-259
(let [ds (ds/->dataset [{"a o" 1 "b o" 2} {"a o" 5 "b o" 3}]
{:key-fn #(keyword (clojure.string/replace % " " "-"))})]
(is (= #{:b-o :a-o} (set (map (comp :name meta) (vals ds))))))
(let [ds (ds/->dataset {"a o" [1 5] "b o" [2 3]}
{:key-fn #(keyword (clojure.string/replace % " " "-"))})]
(is (= #{:b-o :a-o} (set (map (comp :name meta) (vals ds))))))
(let [ds (ds/->dataset [{"Foo" 1 , "Bar" 2}]
{:key-fn #(keyword (.toLowerCase %))})]
(is (= #{:foo :bar}
(set (map (comp :name meta) (vals ds))))))
(let [ds (ds/->dataset (java.io.ByteArrayInputStream. (.getBytes "Foo,Bar\n1,2"))
{:key-fn #(keyword (.toLowerCase %))
:file-type :csv})]
(is (= #{:foo :bar}
(set (map (comp :name meta) (vals ds)))))))


(comment

(def test-ds (ds/->dataset
Expand Down

0 comments on commit aa36d63

Please sign in to comment.