Skip to content

Commit

Permalink
More missing numpy and matlab (#649)
Browse files Browse the repository at this point in the history
* Add a `union` procedure

`union` returns the unique, unsorted Tensor of values that are found in either of the two input Tensors.

Note that an equivalent function exists both in `numpy` (where it is called `union1d`) and in `Matlab` (where it is called `union1d`). However, those functions always sort the output, while Arraymancer's version does not. To replicate the same behavior, simply apply `sort` to the output of this function.

* Add a version of `toTensor` that takes SomeSet as its input

This will let us avoid having to convert HashSets into seqs before converting them into tensors.

Note that this also improves a little the docstrings of a couple of the existing `toTensor` procedures.

* Add an `intersection` procedure

`intersection` returns the "intersection" of 2 Tensors as an unsorted rank-1 Tensor.

Note that an equivalent function exists both in `numpy` (where it is called `intersect1d`) and in `Matlab` (where it is called `intersect`). However, those functions always sort the output, while Arraymancer's version does not. To replicate the same behavior, simply apply `sort` to the output of this function.

Also note that to implement this feature we moved (and made public) the existing, private toHashSet procedure from spatial/distances.nim into tensor/initialization.nim.

* Add a `setDiff` procedure

`setDiff` returns the (symmetric or non symmetric) "difference" between 2 Tensors as an unsorted rank-1 Tensor.

Note that an equivalent function exists both in `numpy` (where it is called `setdiff1d`) and in `Matlab` (where it is called `setdiff`). However, those functions always sort the output, while Arraymancer's version does not. To replicate the same behavior, simply apply `sort` to the output of this function.

* Add a `contains` function (and thus add support for `in` and `notin`)

`find` (which is used to implement `contains`) was already supported (since `system.find` is generic and works with Tensors) but was untested, so this also adds a test for it.

* Add support for `almostEqual`

This was a useful std/math function that we did not support yet.

* Update src/arraymancer/laser/tensor/initialization.nim

Fix typo in export comment & add alternative for reader

---------

Co-authored-by: Vindaar <basti90@gmail.com>
  • Loading branch information
AngelEzquerra and Vindaar committed May 12, 2024
1 parent b202709 commit 2c4f2cd
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 16 deletions.
37 changes: 33 additions & 4 deletions src/arraymancer/laser/tensor/initialization.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ import
../private/nested_containers,
./datatypes
# Standard library
import std / [typetraits, sequtils]
import std / [typetraits, sequtils, sets]

# The following export is needed to avoid a compilation error in
# algorithms.nim/intersection() when running the test_algorithms test:
# `Error: type mismatch - Expression: items(s1)`
# (Alternative: could use `bind sets.items` in `intersection` and `setDiff`)
export sets

# Third-party
import nimblas

Expand Down Expand Up @@ -210,11 +217,11 @@ proc newTensor*[T](shape: Metadata): Tensor[T] =

proc toTensor[T](a: openArray[T], shape: Metadata): Tensor[T] =
## Convert an openArray to a Tensor
##
## Input:
## - An array or a seq, must be flattened. Called by `toTensor` below.
## Result:
## - A Tensor of the same shape
##
var data = @a
if unlikely(shape.product != data.len):
raise newException(
Expand All @@ -235,13 +242,14 @@ proc toTensor[T](a: openArray[T], shape: Metadata): Tensor[T] =
shallowCopy(result.storage.raw_buffer, data)

proc toTensor*[T](a: openArray[T]): auto =
## Convert an openArray to a Tensor
## Convert an openArray into a Tensor
##
## Input:
## - An array or a seq (can be nested)
## Result:
## - A Tensor of the same shape
##
# Note: we removed the dummy static bugfixe related to Nim issue
# Note: we removed the dummy static bugfix related to Nim issue
# https://github.com/nim-lang/Nim/issues/6343
# motivated by
# https://github.com/nim-lang/Nim/issues/20993
Expand All @@ -250,6 +258,18 @@ proc toTensor*[T](a: openArray[T]): auto =
let data = toSeq(flatIter(a))
result = toTensor(data, shape)

proc toTensor*[T](a: SomeSet[T]): auto =
## Convert a HashSet or an OrderedSet into a Tensor
##
## Input:
## - An HashSet or an OrderedSet
## Result:
## - A Tensor of the same shape
var shape = MetaData()
shape.add(a.len)
let data = toSeq(a)
result = toTensor(data, shape)

proc fromBuffer*[T](rawBuffer: ptr UncheckedArray[T], shape: varargs[int], layout: static OrderType): Tensor[T] =
## Creates a `Tensor[T]` from a raw buffer, cast as `ptr UncheckedArray[T]`. The
## size derived from the given shape must match the size of the buffer!
Expand Down Expand Up @@ -288,6 +308,15 @@ func toUnsafeView*[T: KnownSupportsCopyMem](t: Tensor[T], aligned: static bool =
## Unsafe: the pointer can outlive the input tensor.
unsafe_raw_offset(t, aligned).distinctBase()

proc toHashSet*[T](t: Tensor[T]): HashSet[T] =
## Convert a Tensor into a `HashSet`
##
## Note that this is a lossy operation, since a HashSet only stores an
## unsorted set of unique elements.
result = initHashSet[T](t.size)
for x in t:
result.incl x

func item*[T_IN, T_OUT](t: Tensor[T_IN], _: typedesc[T_OUT]): T_OUT =
## Returns the value of the input Tensor as a scalar of the selected type.
## This only works for Tensors (of any rank) that contain one single element.
Expand Down
9 changes: 0 additions & 9 deletions src/arraymancer/spatial/distances.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,6 @@ type

AnyMetric* = Euclidean | Manhattan | Minkowski | Jaccard | CustomMetric

when (NimMajor, NimMinor, NimPatch) < (1, 4, 0):
# have to export sets for 1.0, because `bind` didn't exist apparently
export sets

proc toHashSet[T](t: Tensor[T]): HashSet[T] =
result = initHashSet[T](t.size)
for x in t:
result.incl x

proc distance*(metric: typedesc[Manhattan], v, w: Tensor[float]): float =
## Computes the Manhattan distance between points `v` and `w`. Both need to
## be rank 1 tensors with `k` elements, where `k` is the dimensionality
Expand Down
130 changes: 127 additions & 3 deletions src/arraymancer/tensor/algorithms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import ./data_structure,
./init_cpu,
./init_copy_cpu

import std / [algorithm, sequtils]
import std / [algorithm, sequtils, sets]
export SortOrder

proc sort*[T](t: var Tensor[T], order = SortOrder.Ascending) =
Expand Down Expand Up @@ -109,10 +109,12 @@ proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] =
##
## Inputs:
## - t: The input Tensor
## - order: The order in which elements are sorted (`SortOrder.Ascending` or `SortOrder.Descending`)
## - order: The order in which elements are sorted (`SortOrder.Ascending`
## or `SortOrder.Descending`)
##
## Result:
## - A new Tensor with the unique elements of the input Tensor sorted in the specified order.
## - A new Tensor with the unique elements of the input Tensor sorted in
## the specified order.
##
## Examples:
## ```nim
Expand All @@ -134,3 +136,125 @@ proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] =
# We need to clone the tensor in order to make it C continuous
# and then we can make it unique assuming that it is already sorted
sorted(t, order = order).unique(isSorted = true)

proc union*[T](t1, t2: Tensor[T]): Tensor[T] =
## Return the unsorted "union" of two Tensors as a rank-1 Tensor
##
## Returns the unique, unsorted Tensor of values that are found in either of
## the two input Tensors.
##
## Inputs:
## - t1, t2: Input Tensors.
##
## Result:
## - A rank-1 Tensor containing the (unsorted) union of the two input Tensors.
##
## Notes:
## - The equivalent `numpy` function is called `union1d`, while the
## equivalent `Matlab` function is called `union`. However, both of
## those functions always sort the output. To replicate the same
## behavior, simply apply `sort` to the output of this function.
##
## Example:
## ```nim
## let t1 = [3, 1, 3, 2, 1, 0].toTensor
## let t2 = [4, 2, 2, 3].toTensor
## echo union(t1, t2)
## # Tensor[system.int] of shape "[5]" on backend "Cpu"
## # 3 1 2 0 4
## ```
concat([t1, t2], axis = 0).unique()

proc intersection*[T](t1, t2: Tensor[T]): Tensor[T] =
## Return the "intersection" of 2 Tensors as an unsorted rank-1 Tensor
##
## Inputs:
## - t1, t2: Input Tensors.
##
## Result:
## - An unsorted rank-1 Tensor containing the intersection of
## the input Tensors.
##
## Note:
## - The equivalent `numpy` function is called `intersect1d`, while the
## equivalent `Matlab` function is called `intersect`. However, both of
## those functions always sort the output. To replicate the same
## behavior, simply apply `sort` to the output of this function.
##
## Example:
## ```nim
## let t1 = arange(0, 5)
## let t2 = arange(3, 8)
##
## echo intersection(t1, t2)
## # Tensor[system.int] of shape "[3]" on backend "Cpu"
## # 4 3
## ```
intersection(toHashSet(t1), toHashSet(t2)).toTensor

proc setDiff*[T](t1, t2: Tensor[T], symmetric = false): Tensor[T] =
## Return the (symmetric or non symmetric) "difference" between 2 Tensors as an unsorted rank-1 Tensor
##
## By default (i.e. when `symmetric` is `false`) return all the elements in
## `t1` that are ``not`` found in `t2`.
##
## If `symmetric` is true, the "symmetric" difference of the Tensors is
## returned instead, i.e. the elements which are either not in `t1` ``or``
## not in `t2`.
##
## Inputs:
## - t1, t2: Input Tensors.
## - symmetric: Whether to return a symmetric or non symmetric difference.
## Defaults to `false`.
##
## Result:
## - An unsorted rank-1 Tensor containing the selected "difference" between
## the input Tensors.
##
## Note:
## - The equivalent `numpy` function is called `setdiff1d`, while the
## equivalent `Matlab` function is called `setdiff`. However, both of
## those functions always sort the output. To replicate the same
## behavior, simply apply `sort` to the output of this function.
##
## Examples:
## ```nim
## let t1 = arange(0, 5)
## let t2 = arange(3, 8)
##
## echo setDiff(t1, t2)
## # Tensor[system.int] of shape "[3]" on backend "Cpu"
## # 2 1 0
##
## echo setDiff(t1, t2, symmetric = true)
## # Tensor[system.int] of shape "[6]" on backend "Cpu"
## # 5 2 6 1 7 0
## ```
let h1 = toHashSet(t1)
let h2 = toHashSet(t2)
let diff = if symmetric:
symmetricDifference(h1, h2)
else:
h1 - h2
result = diff.toTensor

proc contains*[T](t: Tensor[T], item: T): bool {.inline.}=
## Returns true if `item` is in the input Tensor `t` or false if not found.
## This is a shortcut for `find(t, item) >= 0`.
##
## This allows the `in` and `notin` operators, i.e.:
## `t.contains(item)` is the same as `item in a`.
##
## Examples:
## ```nim
## var t = [1, 3, 5].toTensor
## assert t.contains(5)
## assert 3 in t
## assert 99 notin t
## ```
return find(t, item) >= 0

proc ismember*[T](t1, t2: Tensor[T]): Tensor[bool] {.noinit.} =
result = newTensor[bool](t1.len)
for n, it in t1.enumerate():
result[n] = it in t2
34 changes: 34 additions & 0 deletions src/arraymancer/tensor/math_functions.nim
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,40 @@ proc classify*[T: SomeFloat](t: Tensor[T]): Tensor[FloatClass] {.noinit.} =
## - fcNegInf: value is negative infinity
t.map_inline(classify(x))

proc almostEqual*[T: SomeFloat | Complex32 | Complex64](t1, t2: Tensor[T],
unitsInLastPlace: Natural = 4): Tensor[bool] {.noinit.} =
## Element-wise almostEqual function
##
## Checks whether pairs of elements of two tensors are almost equal, using
## the [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon).
##
## For more details check the section covering the `almostEqual` procedure in
## nim's standard library documentation.
##
## Inputs:
## - t1, t2: Input (floating point or complex) tensors of the same shape.
## - unitsInLastPlace: The max number of
## [units in the last place](https://en.wikipedia.org/wiki/Unit_in_the_last_place)
## difference tolerated when comparing two numbers. The
## larger the value, the more error is allowed. A `0`
## value means that two numbers must be exactly the
## same to be considered equal.
##
## Result:
## - A new boolean tensor of the same shape as the inputs, in which elements
## are true if the two values in the same position on the two input tensors
## are almost equal (and false if they are not).
##
## Note:
## - You can combine this function with `all` to check if two real tensors
## are almost equal.
map2_inline(t1, t2):
when T is Complex:
almostEqual(x.re, y.re, unitsInLastPlace=unitsInLastPlace) and
almostEqual(x.im, y.im, unitsInLastPlace=unitsInLastPlace)
else:
almostEqual(x, y, unitsInLastPlace=unitsInLastPlace)

type ConvolveMode* = enum full, same, valid

proc convolveImpl[T: SomeNumber | Complex32 | Complex64](
Expand Down
33 changes: 33 additions & 0 deletions tests/tensor/test_algorithms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,36 @@ suite "[Core] Testing algorithm functions":
check unique_sorted_descending == [8, 4, 3, 2, 1].toTensor
check unique_not_c_continuous == [1, 2, 4].toTensor
check unique_sorted_not_c_continuous == [4, 2, 1].toTensor

test "Union":
block:
let t1 = [3, 1, 3, 2, 1, 0].toTensor
let t2 = [4, 2, 2, 3].toTensor
check: sorted(union(t1, t2)) == [0, 1, 2, 3, 4].toTensor

test "Intersection":
block:
let t1 = [3, 1, 3, 2, 1, 0].toTensor
let t2 = [4, 2, 2, 3].toTensor
check: sorted(intersection(t1, t2)) == [2, 3].toTensor

test "setDiff":
block:
let t1 = arange(0, 5)
let t2 = arange(3, 8)

check: sorted(setDiff(t1, t2)) == [0, 1, 2].toTensor
check: sorted(setDiff(t1, t2, symmetric = true)) == [0, 1, 2, 5, 6, 7].toTensor

test "Find and Contains":
let t = arange(-2, 5)

block:
check: t.find(3) == 5
check: t.find(-6) == -1

block:
check: 3 in t
check: 3 notin t == false
check: -6 in t == false
check: -6 notin t
18 changes: 18 additions & 0 deletions tests/tensor/test_math_functions.nim
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,24 @@ proc main() =
check: expected_isNaN == a.isNaN
check: expected_classification == a.classify

test "almostEqual":
block: # Real
let t1 = arange(1.0, 5.0)
let t2 = t1.clone()
check: all(almostEqual(t1, t2)) == true
var t3 = t1.clone()
t3[0] += 2e-15
check: almostEqual(t1, t3) == [false, true, true, true].toTensor()
check: all(almostEqual(t1, t3, unitsInLastPlace = 5)) == true
block: # Complex
let t1 = complex(arange(1.0, 5.0), arange(1.0, 5.0))
let t2 = t1.clone()
check: all(almostEqual(t1, t2)) == true
var t3 = t1.clone()
t3[0] += complex(2e-15)
check: almostEqual(t1, t3) == [false, true, true, true].toTensor()
check: all(almostEqual(t1, t3, unitsInLastPlace = 5)) == true

test "1-D convolution":
block:
let a = arange(4)
Expand Down

0 comments on commit 2c4f2cd

Please sign in to comment.