From 2aabed1a8102bbad0833216cab576dd60121cfc8 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 28 Apr 2021 10:26:26 +0200 Subject: [PATCH 1/2] remove unneeded collect in ChunkedArray::null_bits --- polars/polars-core/src/chunked_array/mod.rs | 11 ++++------- .../polars-core/src/series/implementations/dates.rs | 2 +- polars/polars-core/src/series/implementations/mod.rs | 2 +- .../polars-core/src/series/implementations/object.rs | 2 +- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/polars/polars-core/src/chunked_array/mod.rs b/polars/polars-core/src/chunked_array/mod.rs index ff98c9e1940e..f52f12805c2d 100644 --- a/polars/polars-core/src/chunked_array/mod.rs +++ b/polars/polars-core/src/chunked_array/mod.rs @@ -182,8 +182,8 @@ impl ChunkedArray { Some(0) } else { let mut offset = 0; - for (idx, (null_count, null_bit_buffer)) in self.null_bits().iter().enumerate() { - if *null_count == 0 { + for (idx, (null_count, null_bit_buffer)) in self.null_bits().enumerate() { + if null_count == 0 { return Some(offset); } else { let arr = &self.chunks[idx]; @@ -206,11 +206,8 @@ impl ChunkedArray { } /// Get the null count and the buffer of bits representing null values - pub fn null_bits(&self) -> Vec<(usize, Option)> { - self.chunks - .iter() - .map(|arr| get_bitmap(arr.as_ref())) - .collect() + pub fn null_bits(&self) -> impl Iterator)> + '_ { + self.chunks.iter().map(|arr| get_bitmap(arr.as_ref())) } /// Unpack a Series to the same physical type. diff --git a/polars/polars-core/src/series/implementations/dates.rs b/polars/polars-core/src/series/implementations/dates.rs index fcef1540603e..57c1e068117e 100644 --- a/polars/polars-core/src/series/implementations/dates.rs +++ b/polars/polars-core/src/series/implementations/dates.rs @@ -558,7 +558,7 @@ macro_rules! impl_dyn_series { } fn null_bits(&self) -> Vec<(usize, Option)> { - self.0.null_bits() + self.0.null_bits().collect() } fn reverse(&self) -> Series { diff --git a/polars/polars-core/src/series/implementations/mod.rs b/polars/polars-core/src/series/implementations/mod.rs index d3bbba79943f..1d04d00e4d27 100644 --- a/polars/polars-core/src/series/implementations/mod.rs +++ b/polars/polars-core/src/series/implementations/mod.rs @@ -725,7 +725,7 @@ macro_rules! impl_dyn_series { } fn null_bits(&self) -> Vec<(usize, Option)> { - self.0.null_bits() + self.0.null_bits().collect() } fn reverse(&self) -> Series { diff --git a/polars/polars-core/src/series/implementations/object.rs b/polars/polars-core/src/series/implementations/object.rs index 45511c9b6fc6..759641f6550b 100644 --- a/polars/polars-core/src/series/implementations/object.rs +++ b/polars/polars-core/src/series/implementations/object.rs @@ -190,7 +190,7 @@ where } fn null_bits(&self) -> Vec<(usize, Option)> { - ObjectChunked::null_bits(&self.0) + ObjectChunked::null_bits(&self.0).collect() } fn reverse(&self) -> Series { From ba2e58bc512157a41f75bce932ecc642e80e1325 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 28 Apr 2021 10:50:38 +0200 Subject: [PATCH 2/2] Fix covariance of arrays with null values The covariance formula divides by 'n'. In case of arrays with null values, 'n' should not be set to the length but the number of non null outputs. --- polars/polars-core/src/functions.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/polars/polars-core/src/functions.rs b/polars/polars-core/src/functions.rs index 1cb0dc3d8931..126ed6925c19 100644 --- a/polars/polars-core/src/functions.rs +++ b/polars/polars-core/src/functions.rs @@ -2,7 +2,6 @@ use crate::prelude::*; use num::{Float, NumCast}; use std::ops::Div; -// todo! make numerical stable from catastrophic cancellation pub fn cov(a: &ChunkedArray, b: &ChunkedArray) -> Option where T: PolarsFloatType, @@ -11,7 +10,9 @@ where if a.len() != b.len() { None } else { - Some((&(a - a.mean()?) * &(b - b.mean()?)).sum()? / NumCast::from(a.len() - 1).unwrap()) + let tmp = (a - a.mean()?) * (b - b.mean()?); + let n = tmp.len() - tmp.null_count(); + Some(tmp.sum()? / NumCast::from(n - 1).unwrap()) } } pub fn pearson_corr(a: &ChunkedArray, b: &ChunkedArray) -> Option