Skip to content

Commit

Permalink
fix(rust, python): fix bugs in skew and kurtosis (#5484)
Browse files Browse the repository at this point in the history
  • Loading branch information
sorhawell committed Nov 11, 2022
1 parent 9074f48 commit 0c84d41
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 15 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ name: Benchmark
on:
pull_request:
paths:
- 'polars/**'
- 'py-polars/tests/db-benchmark/**'
- '.github/workflows/benchmark.yaml'
- "polars/**"
- "py-polars/tests/db-benchmark/**"
- ".github/workflows/benchmark.yaml"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand All @@ -20,9 +20,9 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
cache: 'pip'
cache-dependency-path: 'py-polars/requirements-dev.txt'
python-version: "3.10"
cache: "pip"
cache-dependency-path: "py-polars/requirements-dev.txt"

- name: Create virtual environment
working-directory: py-polars
Expand All @@ -39,12 +39,12 @@ jobs:
- name: Set up R
uses: r-lib/actions/setup-r@v2
with:
r-version: '3.5.3'
r-version: "3.5.3"

- name: Generate data
working-directory: py-polars/tests/db-benchmark
run: |
Rscript -e 'install.packages("data.table", repos="https://Rdatatable.github.io/data.table")'
Rscript -e 'install.packages("data.table"); data.table::update_dev_pkg()'
Rscript groupby-datagen.R 1e7 1e2 5 0
- name: Set up Rust
Expand Down
33 changes: 26 additions & 7 deletions polars/polars-core/src/series/ops/moment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ impl Series {
let out = m3 / m2.powf(1.5);

if !bias {
let n = self.len() as f64;
let n = (self.len() - self.null_count()) as f64;
Ok(Some(((n - 1.0) * n).sqrt() / (n - 2.0) * out))
} else {
Ok(Some(out))
Expand All @@ -88,8 +88,8 @@ impl Series {
let m4 = moment_precomputed_mean(self, 4, mean)?.unwrap();

let out = if !bias {
let n = self.len() as f64;
1.0 / (n - 2.0) / (n - 3.0)
let n = (self.len() - self.null_count()) as f64;
3.0 + 1.0 / (n - 2.0) / (n - 3.0)
* ((n.powf(2.0) - 1.0) * m4 / m2.powf(2.0) - 3.0 * (n - 1.0).powf(2.0))
} else {
m4 / m2.powf(2.0)
Expand Down Expand Up @@ -130,16 +130,35 @@ mod test {
#[test]
fn test_skew() -> PolarsResult<()> {
let s = Series::new("", &[1, 2, 3, 4, 5, 23]);
assert!(s.skew(false)?.unwrap() - 2.2905330058490514 < 0.0001);
assert!(s.skew(true)?.unwrap() - 2.2905330058490514 < 0.0001);
let s2 = Series::new("", &[Some(1), Some(2), Some(3), None, Some(1)]);

assert!((s.skew(false)?.unwrap() - 2.2905330058490514).abs() < 0.0001);
assert!((s.skew(true)?.unwrap() - 1.6727687946848508).abs() < 0.0001);

assert!((s2.skew(false)?.unwrap() - 0.8545630383279711).abs() < 0.0001);
assert!((s2.skew(true)?.unwrap() - 0.49338220021815865).abs() < 0.0001);

Ok(())
}

#[test]
fn test_kurtosis() -> PolarsResult<()> {
let s = Series::new("", &[1, 2, 3, 4, 5, 23]);
assert!(s.kurtosis(true, false)?.unwrap() - 5.400820058440946 < 0.0001);
assert!(s.kurtosis(true, true)?.unwrap() - 0.9945668771797536 < 0.0001);

assert!((s.kurtosis(true, true)?.unwrap() - 0.9945668771797536).abs() < 0.0001);
assert!((s.kurtosis(true, false)?.unwrap() - 5.400820058440946).abs() < 0.0001);
assert!((s.kurtosis(false, true)?.unwrap() - 3.994566877179754).abs() < 0.0001);
assert!((s.kurtosis(false, false)?.unwrap() - 8.400820058440946).abs() < 0.0001);

let s2 = Series::new(
"",
&[Some(1), Some(2), Some(3), None, Some(1), Some(2), Some(3)],
);
assert!((s2.kurtosis(true, true)?.unwrap() - (-1.5)).abs() < 0.0001);
assert!((s2.kurtosis(true, false)?.unwrap() - (-1.875)).abs() < 0.0001);
assert!((s2.kurtosis(false, true)?.unwrap() - 1.5).abs() < 0.0001);
assert!((s2.kurtosis(false, false)?.unwrap() - 1.125).abs() < 0.0001);

Ok(())
}
}

0 comments on commit 0c84d41

Please sign in to comment.