Skip to content

Commit

Permalink
add more distributions for random sampling
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 9, 2020
1 parent 4e5901a commit 0cf3678
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 10 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,5 +192,5 @@ Additional cargo features:
- Conversions between Chrono and Polars for temporal data
* `simd`
- SIMD operations
* `paquet_ser`
* `paquet`
- Read Apache Parquet format
36 changes: 34 additions & 2 deletions polars/src/chunked_array/random.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::prelude::*;
use num::{Float, NumCast};
use rand::distributions::Bernoulli;
use rand::prelude::*;
use rand_distr::{Distribution, Normal, StandardNormal};
use rand_distr::{Distribution, Normal, StandardNormal, Uniform};

impl<T> ChunkedArray<T>
where
Expand All @@ -10,7 +11,10 @@ where
{
/// Create `ChunkedArray` with samples from a Normal distribution.
pub fn rand_normal(name: &str, length: usize, mean: f64, std_dev: f64) -> Result<Self> {
let normal = Normal::new(mean, std_dev)?;
let normal = match Normal::new(mean, std_dev) {
Ok(dist) => dist,
Err(e) => return Err(PolarsError::RandError(format!("{:?}", e))),
};
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, length);
for _ in 0..length {
let smpl = normal.sample(&mut rand::thread_rng());
Expand All @@ -30,4 +34,32 @@ where
}
builder.finish()
}

/// Create `ChunkedArray` with samples from a Uniform distribution.
pub fn rand_uniform(name: &str, length: usize, low: f64, high: f64) -> Self {
let uniform = Uniform::new(low, high);
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, length);
for _ in 0..length {
let smpl = uniform.sample(&mut rand::thread_rng());
let smpl = NumCast::from(smpl).unwrap();
builder.append_value(smpl)
}
builder.finish()
}
}

impl BooleanChunked {
/// Create `ChunkedArray` with samples from a Bernoulli distribution.
pub fn rand_bernoulli(name: &str, length: usize, p: f64) -> Result<Self> {
let dist = match Bernoulli::new(p) {
Ok(dist) => dist,
Err(e) => return Err(PolarsError::RandError(format!("{:?}", e))),
};
let mut builder = BooleanChunkedBuilder::new(name, length);
for _ in 0..length {
let smpl = dist.sample(&mut rand::thread_rng());
builder.append_value(smpl)
}
Ok(builder.finish())
}
}
10 changes: 6 additions & 4 deletions polars/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pub enum PolarsError {
ArrowError(#[from] arrow::error::ArrowError),
#[error("Invalid operation")]
InvalidOperation,
#[error("Chunk don't match")]
#[error("Chunks don't match")]
ChunkMisMatch,
#[error("Data types don't match")]
DataTypeMisMatch,
Expand All @@ -26,12 +26,14 @@ pub enum PolarsError {
NoData,
#[error("Memory should be 64 byte aligned")]
MemoryNotAligned,
#[cfg(feature = "parquet_ser")]
#[cfg(feature = "parquet")]
#[error(transparent)]
ParquetError(#[from] parquet::errors::ParquetError),
#[cfg(feature = "random")]
#[error(transparent)]
RandError(#[from] rand_distr::NormalError),
#[error("{0}")]
RandError(String),
#[error("This operation requires data without None values")]
HasNullValues,
}

pub type Result<T> = std::result::Result<T, PolarsError>;
4 changes: 2 additions & 2 deletions polars/src/frame/ser/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
pub mod csv;
pub mod ipc;
pub mod json;
#[cfg(feature = "parquet_ser")]
#[doc(cfg(feature = "parquet_ser"))]
#[cfg(feature = "parquet")]
#[doc(cfg(feature = "parquet"))]
pub mod parquet;
use crate::prelude::*;
use arrow::{
Expand Down
4 changes: 3 additions & 1 deletion polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,12 @@
//! - Conversions between Chrono and Polars for temporal data
//! * `simd`
//! - SIMD operations
//! * `paquet_ser`
//! * `paquet`
//! - Read Apache Parquet format
//! * `random`
//! - Generate array's with randomly sampled values
//! * `ndarray`
//! - Convert from `DataFrame` to `ndarray`
#![allow(dead_code)]
#![feature(iterator_fold_self)]
#![feature(doc_cfg)]
Expand Down

0 comments on commit 0cf3678

Please sign in to comment.