Skip to content

Commit

Permalink
Slightly faster intlist
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Mar 11, 2024
1 parent 0a2b29c commit b1c380d
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 16 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Changelog
# 7.029
* large parquet files can be read without OOM exceptions.

# 7.028
* [issue 400](https://github.com/techascent/tech.ml.dataset/issues/400) - CSV parser issue and upgrade.
* [issue 401](https://github.com/techascent/tech.ml.dataset/issues/401) - parquet file failed to parse - missing columns.
Expand Down
2 changes: 1 addition & 1 deletion deps.edn
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{:paths ["src" "resources" "target/classes"]
:deps {;;org.clojure/clojure {:mvn/version "1.11.1"}
cnuernber/dtype-next {:mvn/version "10.111"}
cnuernber/dtype-next {:mvn/version "10.112"}
techascent/tech.io {:mvn/version "4.31"
:exclusions [org.apache.commons/commons-compress]}
org.apache.datasketches/datasketches-java {:mvn/version "4.2.0"}
Expand Down
12 changes: 12 additions & 0 deletions java/tech/v3/dataset/IntRanges.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package tech.v3.dataset;



public class IntRanges {
public static boolean byteRange(long v) {
return v <= Byte.MAX_VALUE && v >= Byte.MIN_VALUE;
}
public static boolean shortRange(long v) {
return v <= Short.MAX_VALUE && v >= Short.MIN_VALUE;
}
}
20 changes: 5 additions & 15 deletions src/tech/v3/dataset/dynamic_int_list.clj
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,13 @@
[tech.v3.parallel.for :as parallel-for]
[clj-commons.primitive-math :as pmath])
(:import [tech.v3.datatype LongBuffer]
[tech.v3.dataset IntRanges]
[ham_fisted IMutList]))

(set! *warn-on-reflection* true)
(set! *unchecked-math* :warn-on-boxed)


(defmacro ^:private byte-range?
[number]
`(and (<= ~number Byte/MAX_VALUE)
(>= ~number Byte/MIN_VALUE)))


(defmacro ^:private short-range?
[number]
`(and (<= ~number Short/MAX_VALUE)
(>= ~number Short/MIN_VALUE)))


(deftype DynamicIntList [^:unsynchronized-mutable ^IMutList backing-store
^:unsynchronized-mutable ^long int-width]
Expand All @@ -53,9 +43,9 @@
(addLong [_this value]
;;perform container conversion
(cond
(byte-range? value)
(IntRanges/byteRange value)
nil
(short-range? value)
(IntRanges/shortRange value)
(when (pmath/< int-width 16)
(set! backing-store (dtype/make-list :int16 backing-store))
(set! int-width 16))
Expand All @@ -73,9 +63,9 @@
(writeLong [this idx value]
(locking this
(cond
(byte-range? value)
(IntRanges/byteRange value)
nil
(short-range? value)
(IntRanges/shortRange value)
(when (pmath/< int-width 16)
(set! backing-store (dtype/make-list :int16 backing-store))
(set! int-width 16))
Expand Down

0 comments on commit b1c380d

Please sign in to comment.