Skip to content

Commit

Permalink
Docs (#1400)
Browse files Browse the repository at this point in the history
  • Loading branch information
elbaro committed Sep 20, 2021
1 parent cd81f5e commit 86515ca
Show file tree
Hide file tree
Showing 20 changed files with 32 additions and 32 deletions.
14 changes: 7 additions & 7 deletions examples/aggregate_multiple_files_in_chunks/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,12 @@ fn compute_mean(
.drop_in_place(count_column_name)?
.cast_with_dtype(&DataType::Float64)?;

// Compute the mean serie and rename to the `mean_column_name` provided
// Compute the mean series and rename to the `mean_column_name` provided
// as input.
let mut mean_column = &sum_column / &count_column;
mean_column.rename(mean_column_name);

// Return successfully the serie.
// Return successfully the series.
Ok(mean_column)
}

Expand Down Expand Up @@ -275,7 +275,7 @@ fn main() -> Result<(), Box<dyn Error>> {
// aggregation of the `FILES_IN_PARALLEL`. At this point the
// schema will change to ['category', 'calories_sum', 'calories_count',
// 'fats_g_sum', 'fats_g_count', 'sugars_g_sum', 'sugars_g_count'].
// 3. Append sequencially each partial dataframe to the final dataframe.
// 3. Append sequentially each partial dataframe to the final dataframe.
// 4. Group by category.
// 5. Aggregate summing the sums and the counts of the partial aggregation
// to get the global sums '${field}_sum_sum' and the global counts
Expand Down Expand Up @@ -307,17 +307,17 @@ fn main() -> Result<(), Box<dyn Error>> {
// - calories_mean from calories_sum_sum and calories_count_sum
// - fats_g_mean from fats_g_sum_sum and fats_g_count_sum
// - sugars_g_mean from sugars_g_sum_sum and sugars_g_count_sum
// The ${field}_sum_sum and ${field}_count_sum colums will be
// droped after computing the mean as they are not needed anymore.
// The ${field}_sum_sum and ${field}_count_sum columns will be
// dropped after computing the mean as they are not needed anymore.
let mean_series = compute_all_means(&mut main_df)?;

// Add the computed mean series to the main dataframe.
// The schema at this point is ['category', 'calories_mean', 'fats_g_mean',
// 'sugars_g_mean']
mean_series
.into_iter()
.try_for_each(|serie| -> PolarResult<()> {
main_df.with_column(serie)?;
.try_for_each(|series| -> PolarResult<()> {
main_df.with_column(series)?;
Ok(())
})?;

Expand Down
2 changes: 1 addition & 1 deletion polars/polars-arrow/src/trusted_len/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub trait PushUnchecked<T> {
/// Caller must ensure the array has enough capacity to hold `T`.
unsafe fn push_unchecked(&mut self, value: T);

/// Will push an item and not check if there is enough capacity nor update the array's lenght
/// Will push an item and not check if there is enough capacity nor update the array's length
/// # Safety
/// Caller must ensure the array has enough capacity to hold `T`.
/// Caller must update the length when its done updating the vector.
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/iterator/par/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rayon::prelude::*;
///
/// # Input
///
/// seq_iter: The sequential iterator to cast the parallel iterator once it is splitted in threads.
/// seq_iter: The sequential iterator to cast the parallel iterator once it is split in threads.
macro_rules! impl_numeric_parallel_iterator_body {
($seq_iter:ty) => {
type Item = <$seq_iter as Iterator>::Item;
Expand Down Expand Up @@ -66,7 +66,7 @@ macro_rules! impl_numeric_indexed_parallel_iterator_body {
///
/// # Input
///
/// seq_iter: The sequential iterator this producer cast after spliting.
/// seq_iter: The sequential iterator this producer cast after splitting.
macro_rules! impl_numeric_producer_body {
($seq_iter:ty) => {
type Item = <$seq_iter as Iterator>::Item;
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ impl<T> ChunkedArray<T> {
///
/// # Safety
///
/// This is unsafe as the dtype may be uncorrect and
/// This is unsafe as the dtype may be incorrect and
/// is assumed to be correct in other safe code.
pub(crate) unsafe fn unpack_series_matching_physical_type(
&self,
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ where

// in the case that the value array has got null values, we need to check every validity
// value and collect the indices.
// because the length of the array is not known, we first collect the null indexes, ofsetted
// because the length of the array is not known, we first collect the null indexes, offsetted
// with the insertion of empty rows (as None) and later create a validity bitmap
if arr.null_count() > 0 {
let validity_values = arr.validity().as_ref().unwrap();
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/unique/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ macro_rules! is_unique_duplicated {
($ca:expr, $inverse:expr) => {{
let mut idx_key = PlHashMap::new();

// instead of grouptuples, wich allocates a full vec per group, we now just toggle a boolean
// instead of grouptuples, which allocates a full vec per group, we now just toggle a boolean
// that's false if a group has multiple entries.
$ca.into_iter().enumerate().for_each(|(idx, key)| {
idx_key
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ impl DataFrame {
false => create_rand_index_no_replacement(n, self.height()).1,
};
// Safety:
// indices are withing bounds
// indices are within bounds
Ok(unsafe { self.take_unchecked(&idx) })
}

Expand Down
10 changes: 5 additions & 5 deletions polars/polars-core/src/frame/groupby/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,10 @@ impl IntoGroupTuples for Utf8Chunked {
if multithreaded {
let n_partitions = set_partition_size();

let splitted = split_ca(self, n_partitions).unwrap();
let split = split_ca(self, n_partitions).unwrap();

let str_hashes = POOL.install(|| {
splitted
split
.par_iter()
.map(|ca| {
ca.into_iter()
Expand Down Expand Up @@ -1485,12 +1485,12 @@ mod test {
vec![1, 2, 3, 4, 4, 4],
] {
let ca = UInt32Chunked::new_from_slice("", &slice);
let splitted = split_ca(&ca, 4).unwrap();
let split = split_ca(&ca, 4).unwrap();

let a = groupby(ca.into_iter()).into_iter().sorted().collect_vec();

let keys = splitted.iter().map(|ca| ca.cont_slice().unwrap()).collect();
let b = groupby_threaded_num(keys, 0, splitted.len() as u64)
let keys = split.iter().map(|ca| ca.cont_slice().unwrap()).collect();
let b = groupby_threaded_num(keys, 0, split.len() as u64)
.into_iter()
.sorted()
.collect_vec();
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ impl DataFrame {

/// Create a new `DataFrame` but does not check the length or duplicate occurrence of the `Series`.
///
/// It is adviced to use [Series::new](Series::new) in favor of this method.
/// It is advised to use [Series::new](Series::new) in favor of this method.
///
/// # Panic
/// It is the callers responsibility to uphold the contract of all `Series`
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ pub trait SeriesTrait:

#[cfg(feature = "random")]
#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
/// Sample a fraction between 0.0-1.0 of this ChunkedArray.
/// Sample a fraction between 0.0-1.0 of this Series.
fn sample_frac(&self, frac: f64, with_replacement: bool) -> Result<Series>;

#[cfg(feature = "object")]
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/src/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ where
self
}

/// Automatically try to parse dates/ datetimes. If parsing failes, columns remain of dtype Utf8.
/// Automatically try to parse dates/ datetimes. If parsing fails, columns remain of dtype Utf8.
#[cfg(feature = "temporal")]
pub fn with_parse_dates(mut self, toggle: bool) -> Self {
self.parse_dates = toggle;
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Lazy API of Polars
//!
//! *Credits to the work of Andy Grove and Ballista/ DataFusion / Apache Arrow, which served as
//! insipration for the lazy API.*
//! inspiration for the lazy API.*
//!
//! The lazy api of Polars supports a subset of the eager api. Apart from the distributed compute,
//! it is very similar to [Apache Spark](https://spark.apache.org/). You write queries in a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ pub(super) fn is_pushdown_boundary(node: Node, expr_arena: &Arena<AExpr>) -> boo
AExpr::Shift { .. } | AExpr::Sort { .. } | AExpr::SortBy { .. }
| AExpr::Agg(_) // an aggregation needs all rows
| AExpr::Reverse(_)
// everyting that works on groups likely changes to order of elements w/r/t the other columns
// everything that works on groups likely changes to order of elements w/r/t the other columns
| AExpr::Function {options: FunctionOptions { collect_groups: ApplyOptions::ApplyGroups, .. }, ..}
| AExpr::Function {options: FunctionOptions { collect_groups: ApplyOptions::ApplyList, .. }, ..}
// Could be fine, could be not, for now let's be conservative on this one
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/physical_plan/executors/groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ fn run_partitions(
) -> Result<Vec<DataFrame>> {
// We do a partitioned groupby.
// Meaning that we first do the groupby operation arbitrarily
// splitted on several threads. Than the final result we apply the same groupby again.
// split on several threads. Than the final result we apply the same groupby again.
let dfs = split_df(df, n_threads)?;

POOL.install(|| {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ impl DefaultPlanner {
// checks:
// 1. complex expressions in the groupby itself are also not partitionable
// in this case anything more than col("foo")
// 2. a custom funciton cannot be partitioned
// 2. a custom function cannot be partitioned
// 3. maintain order is likely cheaper in default groupby
if keys.len() == 1 && apply.is_none() && !maintain_order {
// complex expressions in the groupby itself are also not partitionable
Expand Down
2 changes: 1 addition & 1 deletion polars/src/docs/lazy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@
//! ## Conditionally apply
//! If we want to create a new column based on some condition, we can use the `.when()/.then()/.otherwise()` expressions.
//!
//! * `when` - accpets a predicate epxression
//! * `when` - accepts a predicate epxression
//! * `then` - expression to use when `predicate == true`
//! * `otherwise` - expression to use when `predicate == false`
//!
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def from_records(
If not specified, columns will be named `column_0`, `column_1`, etc.
orient : {'col', 'row'}, default None
Whether to interpret two-dimensional data as columns or as rows. If None,
the orientation is infered by matching the columns and data dimensions. If
the orientation is inferred by matching the columns and data dimensions. If
this does not yield conclusive results, column orientation is used.
nullable : bool, default True
If your data does not contain null values, set to False to speed up
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/eager/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class DataFrame:
labels already present in the data. Must match data dimensions.
orient : {'col', 'row'}, default None
Whether to interpret two-dimensional data as columns or as rows. If None,
the orientation is infered by matching the columns and data dimensions. If
the orientation is inferred by matching the columns and data dimensions. If
this does not yield conclusive results, column orientation is used.
nullable : bool, default True
If your data does not contain null values, set to False to speed up
Expand Down Expand Up @@ -286,7 +286,7 @@ def _from_records(
If not specified, columns will be named `column_0`, `column_1`, etc.
orient : {'col', 'row'}, default None
Whether to interpret two-dimensional data as columns or as rows. If None,
the orientation is infered by matching the columns and data dimensions. If
the orientation is inferred by matching the columns and data dimensions. If
this does not yield conclusive results, column orientation is used.
nullable : bool, default True
If your data does not contain null values, set to False to speed up
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/eager/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2441,7 +2441,7 @@ def rolling_apply(
) -> "pl.Series":
"""
Allows a custom rolling window function.
Prefer the specific rolling window fucntions over this one, as they are faster.
Prefer the specific rolling window functions over this one, as they are faster.
Prefer:
* rolling_min
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_version():


def test_init_empty():
# Empty intialization
# Empty initialization
df1 = pl.DataFrame()
assert df1.shape == (0, 0)

Expand Down

0 comments on commit 86515ca

Please sign in to comment.