Skip to content

Commit

Permalink
generic dataset are types now.
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Aug 22, 2019
1 parent 4b461c7 commit f0a11cc
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 31 deletions.
26 changes: 18 additions & 8 deletions src/tech/libs/tablesaw.clj
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
NumericColumn DoubleColumn
StringColumn BooleanColumn]
[tech.tablesaw.columns Column]
[tech.tablesaw.io.csv CsvReadOptions]
[tech.tablesaw.io.csv CsvReadOptions
CsvReadOptions$Builder]
[java.util UUID]
[java.io InputStream]
[org.apache.commons.math3.stat.descriptive.moment Skewness]))


Expand Down Expand Up @@ -196,13 +198,21 @@


(defn ^tech.tablesaw.io.csv.CsvReadOptions$Builder
->csv-builder [^String path & {:keys [separator header? date-format]}]
(if separator
(doto (CsvReadOptions/builder path)
(.separator separator)
(.header (boolean header?)))
(doto (CsvReadOptions/builder path)
(.header (boolean header?)))))
->csv-builder [path & {:keys [separator header? date-format]}]
(let [^CsvReadOptions$Builder builder
(cond
(instance? InputStream path)
(CsvReadOptions/builder ^InputStream path)
(string? path)
(CsvReadOptions/builder ^String path)
:else
(throw (ex-info "Failed to make builder" {})))]
(if separator
(doto builder
(.separator separator)
(.header (boolean header?)))
(doto builder
(.header (boolean header?))))))


(defn tablesaw-columns->tablesaw-dataset
Expand Down
45 changes: 22 additions & 23 deletions src/tech/ml/dataset/generic_columnar_dataset.clj
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@
(declare make-dataset)


(defrecord GenericColumnarDataset [table-name
column-names
colmap
metadata]
(deftype GenericColumnarDataset [table-name
column-names
colmap
metadata]
ds-proto/PColumnarDataset
(dataset-name [dataset] table-name)
(maybe-column [dataset column-name]
(get colmap column-name))

(metadata [dataset] metadata)
(set-metadata [dataset meta-map]
(->GenericColumnarDataset table-name column-names colmap
meta-map))
(GenericColumnarDataset. table-name column-names colmap
meta-map))

(columns [dataset] (mapv (partial get colmap) column-names))

Expand Down Expand Up @@ -56,7 +56,7 @@
:col-names (keys colmap)})))
(let [col (get colmap col-name)
new-col-data (col-fn col)]
(->GenericColumnarDataset
(GenericColumnarDataset.
table-name
column-names
(assoc colmap col-name
Expand Down Expand Up @@ -126,23 +126,22 @@
dtype-proto/PCopyRawData
(copy-raw->item! [raw-data ary-target target-offset options]
(dtype-proto/copy-raw->item! (ds/columns raw-data) ary-target
target-offset options)))
target-offset options))

Object
(toString [item]
(format "%s %s:\n%s"
(ds-proto/dataset-name item)
;;make row major shape to avoid confusion
(vec (reverse (dtype/shape item)))
(ds/dataset->string item))))


(defn make-dataset
[table-name column-seq ds-metadata]
(->GenericColumnarDataset table-name
(map ds-col/column-name column-seq)
(->> column-seq
(map (juxt ds-col/column-name identity))
(into {}))
ds-metadata))


(defmethod print-method GenericColumnarDataset
[ds w]
(.write ^Writer w (format "%s %s:\n%s"
(ds/dataset-name ds)
;;make row major shape to avoid confusion
(vec (reverse (dtype/shape ds)))
(ds/dataset->string ds))))
(GenericColumnarDataset. table-name
(map ds-col/column-name column-seq)
(->> column-seq
(map (juxt ds-col/column-name identity))
(into {}))
ds-metadata))

0 comments on commit f0a11cc

Please sign in to comment.