Skip to content

Commit

Permalink
row-map
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Aug 21, 2021
1 parent facb7ab commit c4073d0
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## 6.015
* row-map - map a function across the rows of the dataset (represented as maps). The result
should itself be a map and the dataset created from these maps will be merged back into
the original ds.

## 6.014
* tech.v3.dataset.reductions/group-by-column-agg can take a tuple of column names in addition
to a single column name. In the case of a tuple the grouping will be the vector of column
Expand Down
51 changes: 51 additions & 0 deletions src/tech/v3/dataset.clj
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,17 @@
((rows ds) idx))


(defn rowvecs
"Return a randomly addressable list of rows in persisent vector-like form"
[ds]
(value-reader ds))


(defn rowvec-at
[ds idx]
((rowvecs ds) idx))


(export-symbols tech.v3.dataset.io
->dataset
->>dataset
Expand Down Expand Up @@ -539,6 +550,46 @@ _unnamed [3 3]:
~src-colnames)))


(defn row-map
"Map a function across the rows of the dataset producing a new dataset
that is merged back into the original potentially replacing existing columns.
Options are passed into the [[->dataset]] function so you can control the resulting
column types by the usual dataset parsing options described there.
Examples:
```clojure
user> (def stocks (ds/->dataset \"test/data/stocks.csv\"))
#'user/stocks
user> (ds/head stocks)
test/data/stocks.csv [5 3]:
| symbol | date | price |
|--------|------------|------:|
| MSFT | 2000-01-01 | 39.81 |
| MSFT | 2000-02-01 | 36.35 |
| MSFT | 2000-03-01 | 43.22 |
| MSFT | 2000-04-01 | 28.37 |
| MSFT | 2000-05-01 | 25.45 |
user> (ds/head (ds/row-map stocks (fn [row]
{\"symbol\" (keyword (row \"symbol\"))
:price2 (* (row \"price\")(row \"price\"))})))
test/data/stocks.csv [5 4]:
| symbol | date | price | :price2 |
|--------|------------|------:|----------:|
| :MSFT | 2000-01-01 | 39.81 | 1584.8361 |
| :MSFT | 2000-02-01 | 36.35 | 1321.3225 |
| :MSFT | 2000-03-01 | 43.22 | 1867.9684 |
| :MSFT | 2000-04-01 | 28.37 | 804.8569 |
| :MSFT | 2000-05-01 | 25.45 | 647.7025 |
```"
[ds map-fn & [options]]
(merge ds (->> (rows ds)
(map map-fn)
(->>dataset options))))


(defn column-cast
"Cast a column to a new datatype. This is never a lazy operation. If the old
and new datatypes match and no cast-fn is provided then dtype/clone is called
Expand Down
10 changes: 4 additions & 6 deletions src/tech/v3/dataset/io.clj
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,10 @@
(data->dataset dataset options))
(instance? Map dataset)
(parse-mapseq-colmap/column-map->dataset options dataset)
;;Not everything has a conversion to seq.
(instance? Map (try (first (seq dataset))
(catch Throwable e nil)))
(parse-mapseq-colmap/mapseq->dataset options dataset)
(nil? (seq dataset))
(ds-impl/new-dataset options nil))]
:else
(if (nil? (seq dataset))
(ds-impl/new-dataset options nil)
(parse-mapseq-colmap/mapseq->dataset options dataset)))]
(if dataset-name
(ds-proto/set-dataset-name dataset dataset-name)
dataset)))
Expand Down
8 changes: 8 additions & 0 deletions test/tech/v3/dataset_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -1272,6 +1272,14 @@
(is (#{:local-date :packed-local-date} (dtype/elemwise-datatype (res-cp :a))))))


(deftest row-map-test
(let [ds (ds/->dataset "test/data/stocks.csv")]
(is (thrown? Exception (ds/row-map ds #(hash-map :price2 (* (% :price) (% :price))))))
(is (dfn/equals (dfn/sq (ds "price"))
(-> (ds/row-map ds #(hash-map :price2 (* (% "price") (% "price"))))
(ds/column :price2))))))


(comment

(def test-ds (ds/->dataset
Expand Down

0 comments on commit c4073d0

Please sign in to comment.