Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ name = "lance"
crate-type = ["cdylib"]

[dependencies]
arrow-array = "33.0"
arrow-data = "33.0"
arrow-schema = "33.0"
arrow-array = "37.0"
arrow-data = "37.0"
arrow-schema = "37.0"
chrono = "0.4.23"
tokio = { version = "1.23", features = ["rt-multi-thread"] }
futures = "0.3"
pyo3 = { version = "0.18.1", features = ["extension-module", "abi3-py38"] }
arrow = { version = "33.0.0", features = ["pyarrow"] }
arrow = { version = "37.0.0", features = ["pyarrow"] }
lance = { path = "../rust"}
uuid = "1.3.0"

Expand Down
20 changes: 10 additions & 10 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ categories = [

[dependencies]
bytes = "1.3"
arrow-arith = "33.0"
arrow-array = "33.0"
arrow-buffer = "33.0"
arrow-cast = "33.0.0"
arrow-data = "33.0"
arrow-ord = "33.0"
arrow-schema = "33.0"
arrow-select = "33.0"
arrow-arith = "37.0"
arrow-array = "37.0"
arrow-buffer = "37.0"
arrow-cast = "37.0.0"
arrow-data = "37.0"
arrow-ord = "37.0"
arrow-schema = "37.0"
arrow-select = "37.0"
async-recursion = "1.0"
async-trait = "0.1.60"
byteorder = "1.4.3"
Expand All @@ -51,11 +51,11 @@ futures = "0.3"
uuid = { version = "1.2", features = ["v4"] }
path-absolutize = "3.0.14"
shellexpand = "3.0.0"
arrow = { version = "33.0.0", features = ["prettyprint"] }
arrow = { version = "37.0.0", features = ["prettyprint"] }
num_cpus = "1.0"
sqlparser-lance = "0.32.0"
# TODO: use datafusion sub-modules to reduce build size?
datafusion = { version = "19.0.0", default-features = false }
datafusion = { version = "23.0.0", default-features = false }
faiss = { version = "0.11.0", features = ["gpu"], optional = true }
lapack = "0.19.0"
cblas = "0.4.0"
Expand Down
22 changes: 11 additions & 11 deletions rust/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use arrow_array::{
OffsetSizeTrait, PrimitiveArray, RecordBatch, UInt8Array,
};
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType, Field, Schema};
use arrow_schema::{DataType, Field, FieldRef, Fields, Schema};

mod kernels;
pub mod linalg;
Expand Down Expand Up @@ -169,13 +169,13 @@ where
{
fn try_new<T: Array>(values: T, offsets: &PrimitiveArray<Offset>) -> Result<Self> {
let data_type = if Offset::Native::IS_LARGE {
DataType::LargeList(Box::new(Field::new(
DataType::LargeList(Arc::new(Field::new(
"item",
values.data_type().clone(),
true,
)))
} else {
DataType::List(Box::new(Field::new(
DataType::List(Arc::new(Field::new(
"item",
values.data_type().clone(),
true,
Expand Down Expand Up @@ -216,12 +216,12 @@ pub trait FixedSizeListArrayExt {
impl FixedSizeListArrayExt for FixedSizeListArray {
fn try_new<T: Array>(values: T, list_size: i32) -> Result<Self> {
let list_type = DataType::FixedSizeList(
Box::new(Field::new("item", values.data_type().clone(), true)),
Arc::new(Field::new("item", values.data_type().clone(), true)),
list_size,
);
let data = ArrayDataBuilder::new(list_type)
.len(values.len() / list_size as usize)
.add_child_data(values.data().clone())
.add_child_data(values.into_data())
.build()?;

Ok(Self::from(data))
Expand Down Expand Up @@ -261,7 +261,7 @@ impl FixedSizeBinaryArrayExt for FixedSizeBinaryArray {
let data_type = DataType::FixedSizeBinary(stride);
let data = ArrayDataBuilder::new(data_type)
.len(values.len() / stride as usize)
.add_buffer(values.data().buffers()[0].clone())
.add_buffer(values.into_data().buffers()[0].clone())
.build()?;
Ok(Self::from(data))
}
Expand Down Expand Up @@ -353,10 +353,10 @@ pub trait RecordBatchExt {

impl RecordBatchExt for RecordBatch {
fn try_with_column(&self, field: Field, arr: ArrayRef) -> Result<Self> {
let mut new_fields = self.schema().fields.clone();
new_fields.push(field);
let mut new_fields: Vec<FieldRef> = self.schema().fields.iter().cloned().collect();
new_fields.push(FieldRef::new(field));
let new_schema = Arc::new(Schema::new_with_metadata(
new_fields,
Fields::from(new_fields.as_slice()),
self.schema().metadata.clone(),
));
let mut new_columns = self.columns().to_vec();
Expand All @@ -373,9 +373,9 @@ impl RecordBatchExt for RecordBatch {
)));
}

let mut fields = self.schema().fields.clone();
let mut fields: Vec<FieldRef> = self.schema().fields.iter().cloned().collect();
let mut columns = Vec::from(self.columns());
for field in other.schema().fields.as_slice() {
for field in other.schema().fields.iter() {
if !fields.iter().any(|f| f.name() == field.name()) {
fields.push(field.clone());
columns.push(
Expand Down
44 changes: 29 additions & 15 deletions rust/src/arrow/linalg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@

use std::sync::Arc;

use arrow::{
array::{as_primitive_array, Float32Builder},
datatypes::Float32Type,
};
use arrow::array::{as_primitive_array, Float32Builder};
use arrow_array::{Array, FixedSizeListArray, Float32Array};
use arrow_schema::DataType;
use rand::{distributions::Standard, rngs::SmallRng, seq::IteratorRandom, Rng, SeedableRng};
Expand Down Expand Up @@ -254,7 +251,7 @@ impl MatrixView {
let mut builder = Float32Builder::with_capacity(n * dim);
for idx in chosen.iter() {
let s = self.data.slice(idx * dim, dim);
builder.append_slice(as_primitive_array::<Float32Type>(s.as_ref()).values());
builder.append_slice(s.values());
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slice is strong typed now?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, must be part of the effort towards arrow-rs arrow2 merging?

}
let data = Arc::new(builder.finish());
Self {
Expand Down Expand Up @@ -447,13 +444,18 @@ mod tests {
-0.6525516,
0.10910681,
];
assert_relative_eq!(u.data().values(), expected_u.as_slice(), epsilon = 0.0001,);

assert_relative_eq!(
sigma.values(),
vec![27.46873242, 22.64318501, 8.55838823, 5.9857232, 2.01489966].as_slice(),
epsilon = 0.0001,
);
u.data()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why can we just use slice comparision here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

complains about ScalarBuffe not having AbsDiffEq

.values()
.iter()
.zip(expected_u.iter())
.for_each(|(a, b)| {
assert_relative_eq!(a, b, epsilon = 0.0001);
});

let expected = vec![27.46873242, 22.64318501, 8.55838823, 5.9857232, 2.01489966];
sigma.values().iter().zip(expected).for_each(|(&a, b)| {
assert_relative_eq!(a, b, epsilon = 0.0001);
});

// Obtained from `numpy.linagl.svd()`.
let expected_vt = vec![
Expand Down Expand Up @@ -483,7 +485,13 @@ mod tests {
-0.62652825,
-0.43955169,
];
assert_relative_eq!(vt.data().values(), expected_vt.as_slice(), epsilon = 0.0001,)
vt.data()
.values()
.iter()
.zip(expected_vt)
.for_each(|(&a, b)| {
assert_relative_eq!(a, b, epsilon = 0.0001);
});
}

#[test]
Expand All @@ -499,7 +507,10 @@ mod tests {
let b = MatrixView::new(b_data, 2);

let c = a.dot(&b).unwrap();
assert_relative_eq!(c.data.values(), vec![44.0, 50.0, 98.0, 113.0].as_slice(),);
let expected = vec![44.0, 50.0, 98.0, 113.0];
c.data.values().iter().zip(expected).for_each(|(&a, b)| {
assert_relative_eq!(a, b, epsilon = 0.0001);
});
}

#[test]
Expand All @@ -515,7 +526,10 @@ mod tests {
let b = MatrixView::new(b_data, 2);

let c_t = b.transpose().dot(&a.transpose()).unwrap();
assert_relative_eq!(c_t.data.values(), vec![44.0, 98.0, 50.0, 113.0].as_slice(),);
let expected = vec![44.0, 98.0, 50.0, 113.0];
c_t.data.values().iter().zip(expected).for_each(|(&a, b)| {
assert_relative_eq!(a, b, epsilon = 0.0001);
});
}

#[test]
Expand Down
6 changes: 3 additions & 3 deletions rust/src/arrow/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

//! Extension to arrow schema

use arrow_schema::{ArrowError, Field, Schema};
use arrow_schema::{ArrowError, Field, FieldRef, Schema};

/// Extends the functionality of [arrow_schema::Schema].
pub trait SchemaExt {
Expand All @@ -33,8 +33,8 @@ impl SchemaExt for Schema {
self
)));
};
let mut fields = self.fields.clone();
fields.push(field);
let mut fields: Vec<FieldRef> = self.fields().iter().cloned().collect();
fields.push(FieldRef::new(field));
Ok(Schema::new_with_metadata(fields, self.metadata.clone()))
}

Expand Down
28 changes: 14 additions & 14 deletions rust/src/datafusion/physical_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ impl Column {
}
}

impl PartialEq<dyn Any> for Column {
fn eq(&self, other: &dyn Any) -> bool {
other
.downcast_ref::<Self>()
.map(|x| self == x)
.unwrap_or(false)
}
}

impl PhysicalExpr for Column {
fn as_any(&self) -> &dyn Any {
self
Expand Down Expand Up @@ -89,15 +98,6 @@ impl PhysicalExpr for Column {
}
}

impl PartialEq<dyn Any> for Column {
fn eq(&self, other: &dyn Any) -> bool {
other
.downcast_ref::<Self>()
.map(|x| self == x)
.unwrap_or(false)
}
}

struct ColumnVisitor {
columns: Vec<String>,
}
Expand Down Expand Up @@ -129,7 +129,7 @@ mod tests {
use super::*;

use arrow_array::{ArrayRef, Float32Array, Int32Array, StringArray, StructArray};
use arrow_schema::Field;
use arrow_schema::{Field, Fields};

#[test]
fn test_simple_column() {
Expand All @@ -138,10 +138,10 @@ mod tests {
Field::new("s", DataType::Utf8, true),
Field::new(
"st",
DataType::Struct(vec![
DataType::Struct(Fields::from(vec![
Field::new("x", DataType::Float32, false),
Field::new("y", DataType::Float32, false),
]),
])),
true,
),
]));
Expand Down Expand Up @@ -169,10 +169,10 @@ mod tests {
Field::new("s", DataType::Utf8, true),
Field::new(
"st",
DataType::Struct(vec![
DataType::Struct(Fields::from(vec![
Field::new("x", DataType::Float32, false),
Field::new("y", DataType::Float32, false),
]),
])),
true,
),
]));
Expand Down
2 changes: 1 addition & 1 deletion rust/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1017,7 +1017,7 @@ mod tests {
let schema = Arc::new(ArrowSchema::new(vec![Field::new(
"embeddings",
DataType::FixedSizeList(
Box::new(Field::new("item", DataType::Float32, true)),
Arc::new(Field::new("item", DataType::Float32, true)),
dimension,
),
false,
Expand Down
8 changes: 4 additions & 4 deletions rust/src/dataset/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ mod test {
ArrowField::new(
"vec",
DataType::FixedSizeList(
Box::new(ArrowField::new("item", DataType::Float32, true)),
Arc::new(ArrowField::new("item", DataType::Float32, true)),
32,
),
true,
Expand Down Expand Up @@ -843,7 +843,7 @@ mod test {
ArrowField::new(
"vec",
DataType::FixedSizeList(
Box::new(ArrowField::new("item", DataType::Float32, true)),
Arc::new(ArrowField::new("item", DataType::Float32, true)),
32,
),
true,
Expand Down Expand Up @@ -894,7 +894,7 @@ mod test {
ArrowField::new(
"vec",
DataType::FixedSizeList(
Box::new(ArrowField::new("item", DataType::Float32, true)),
Arc::new(ArrowField::new("item", DataType::Float32, true)),
32,
),
true,
Expand Down Expand Up @@ -944,7 +944,7 @@ mod test {
ArrowField::new(
"vec",
DataType::FixedSizeList(
Box::new(ArrowField::new("item", DataType::Float32, true)),
Arc::new(ArrowField::new("item", DataType::Float32, true)),
32,
),
true,
Expand Down
Loading