Docs (#1400)

pola-rs · Sep 20, 2021 · 86515ca · 86515ca
1 parent cd81f5e
commit 86515ca
Show file tree

Hide file tree

Showing 20 changed files with 32 additions and 32 deletions.
diff --git a/examples/aggregate_multiple_files_in_chunks/src/main.rs b/examples/aggregate_multiple_files_in_chunks/src/main.rs
@@ -175,12 +175,12 @@ fn compute_mean(
         .drop_in_place(count_column_name)?
         .cast_with_dtype(&DataType::Float64)?;
 
-    // Compute the mean serie and rename to the `mean_column_name` provided
+    // Compute the mean series and rename to the `mean_column_name` provided
     // as input.
     let mut mean_column = &sum_column / &count_column;
     mean_column.rename(mean_column_name);
 
-    // Return successfully the serie.
+    // Return successfully the series.
     Ok(mean_column)
 }
 
@@ -275,7 +275,7 @@ fn main() -> Result<(), Box<dyn Error>> {
     //    aggregation of the `FILES_IN_PARALLEL`. At this point the
     //    schema will change to ['category', 'calories_sum', 'calories_count',
     //    'fats_g_sum', 'fats_g_count', 'sugars_g_sum', 'sugars_g_count'].
-    // 3. Append sequencially each partial dataframe to the final dataframe.
+    // 3. Append sequentially each partial dataframe to the final dataframe.
     // 4. Group by category.
     // 5. Aggregate summing the sums and the counts of the partial aggregation
     //    to get the global sums '${field}_sum_sum' and the global counts
@@ -307,17 +307,17 @@ fn main() -> Result<(), Box<dyn Error>> {
     // - calories_mean from calories_sum_sum and calories_count_sum
     // - fats_g_mean from fats_g_sum_sum and fats_g_count_sum
     // - sugars_g_mean from sugars_g_sum_sum and sugars_g_count_sum
-    // The ${field}_sum_sum and ${field}_count_sum colums will be
-    // droped after computing the mean as they are not needed anymore.
+    // The ${field}_sum_sum and ${field}_count_sum columns will be
+    // dropped after computing the mean as they are not needed anymore.
     let mean_series = compute_all_means(&mut main_df)?;
 
     // Add the computed mean series to the main dataframe.
     // The schema at this point is ['category', 'calories_mean', 'fats_g_mean',
     // 'sugars_g_mean']
     mean_series
         .into_iter()
-        .try_for_each(|serie| -> PolarResult<()> {
-            main_df.with_column(serie)?;
+        .try_for_each(|series| -> PolarResult<()> {
+            main_df.with_column(series)?;
             Ok(())
         })?;
 

diff --git a/polars/polars-arrow/src/trusted_len/mod.rs b/polars/polars-arrow/src/trusted_len/mod.rs
@@ -75,7 +75,7 @@ pub trait PushUnchecked<T> {
     /// Caller must ensure the array has enough capacity to hold `T`.
     unsafe fn push_unchecked(&mut self, value: T);
 
-    /// Will push an item and not check if there is enough capacity nor update the array's lenght
+    /// Will push an item and not check if there is enough capacity nor update the array's length
     /// # Safety
     /// Caller must ensure the array has enough capacity to hold `T`.
     /// Caller must update the length when its done updating the vector.

diff --git a/polars/polars-core/src/chunked_array/iterator/par/numeric.rs b/polars/polars-core/src/chunked_array/iterator/par/numeric.rs
@@ -13,7 +13,7 @@ use rayon::prelude::*;
 ///
 /// # Input
 ///
-/// seq_iter: The sequential iterator to cast the parallel iterator once it is splitted in threads.
+/// seq_iter: The sequential iterator to cast the parallel iterator once it is split in threads.
 macro_rules! impl_numeric_parallel_iterator_body {
     ($seq_iter:ty) => {
         type Item = <$seq_iter as Iterator>::Item;
@@ -66,7 +66,7 @@ macro_rules! impl_numeric_indexed_parallel_iterator_body {
 ///
 /// # Input
 ///
-/// seq_iter: The sequential iterator this producer cast after spliting.
+/// seq_iter: The sequential iterator this producer cast after splitting.
 macro_rules! impl_numeric_producer_body {
     ($seq_iter:ty) => {
         type Item = <$seq_iter as Iterator>::Item;

diff --git a/polars/polars-core/src/chunked_array/mod.rs b/polars/polars-core/src/chunked_array/mod.rs
@@ -217,7 +217,7 @@ impl<T> ChunkedArray<T> {
     ///
     /// # Safety
     ///
-    /// This is unsafe as the dtype may be uncorrect and
+    /// This is unsafe as the dtype may be incorrect and
     /// is assumed to be correct in other safe code.
     pub(crate) unsafe fn unpack_series_matching_physical_type(
         &self,

diff --git a/polars/polars-core/src/chunked_array/ops/explode.rs b/polars/polars-core/src/chunked_array/ops/explode.rs
@@ -36,7 +36,7 @@ where
 
         // in the case that the value array has got null values, we need to check every validity
         // value and collect the indices.
-        // because the length of the array is not known, we first collect the null indexes, ofsetted
+        // because the length of the array is not known, we first collect the null indexes, offsetted
         // with the insertion of empty rows (as None) and later create a validity bitmap
         if arr.null_count() > 0 {
             let validity_values = arr.validity().as_ref().unwrap();

diff --git a/polars/polars-core/src/chunked_array/ops/unique/mod.rs b/polars/polars-core/src/chunked_array/ops/unique/mod.rs
@@ -72,7 +72,7 @@ macro_rules! is_unique_duplicated {
     ($ca:expr, $inverse:expr) => {{
         let mut idx_key = PlHashMap::new();
 
-        // instead of grouptuples, wich allocates a full vec per group, we now just toggle a boolean
+        // instead of grouptuples, which allocates a full vec per group, we now just toggle a boolean
         // that's false if a group has multiple entries.
         $ca.into_iter().enumerate().for_each(|(idx, key)| {
             idx_key

diff --git a/polars/polars-core/src/chunked_array/random.rs b/polars/polars-core/src/chunked_array/random.rs
@@ -77,7 +77,7 @@ impl DataFrame {
             false => create_rand_index_no_replacement(n, self.height()).1,
         };
         // Safety:
-        // indices are withing bounds
+        // indices are within bounds
         Ok(unsafe { self.take_unchecked(&idx) })
     }
 

diff --git a/polars/polars-core/src/frame/groupby/mod.rs b/polars/polars-core/src/frame/groupby/mod.rs
@@ -135,10 +135,10 @@ impl IntoGroupTuples for Utf8Chunked {
         if multithreaded {
             let n_partitions = set_partition_size();
 
-            let splitted = split_ca(self, n_partitions).unwrap();
+            let split = split_ca(self, n_partitions).unwrap();
 
             let str_hashes = POOL.install(|| {
-                splitted
+                split
                     .par_iter()
                     .map(|ca| {
                         ca.into_iter()
@@ -1485,12 +1485,12 @@ mod test {
             vec![1, 2, 3, 4, 4, 4],
         ] {
             let ca = UInt32Chunked::new_from_slice("", &slice);
-            let splitted = split_ca(&ca, 4).unwrap();
+            let split = split_ca(&ca, 4).unwrap();
 
             let a = groupby(ca.into_iter()).into_iter().sorted().collect_vec();
 
-            let keys = splitted.iter().map(|ca| ca.cont_slice().unwrap()).collect();
-            let b = groupby_threaded_num(keys, 0, splitted.len() as u64)
+            let keys = split.iter().map(|ca| ca.cont_slice().unwrap()).collect();
+            let b = groupby_threaded_num(keys, 0, split.len() as u64)
                 .into_iter()
                 .sorted()
                 .collect_vec();

diff --git a/polars/polars-core/src/frame/mod.rs b/polars/polars-core/src/frame/mod.rs
@@ -179,7 +179,7 @@ impl DataFrame {
 
     /// Create a new `DataFrame` but does not check the length or duplicate occurrence of the `Series`.
     ///
-    /// It is adviced to use [Series::new](Series::new) in favor of this method.
+    /// It is advised to use [Series::new](Series::new) in favor of this method.
     ///
     /// # Panic
     /// It is the callers responsibility to uphold the contract of all `Series`

diff --git a/polars/polars-core/src/series/mod.rs b/polars/polars-core/src/series/mod.rs
@@ -1001,7 +1001,7 @@ pub trait SeriesTrait:
 
     #[cfg(feature = "random")]
     #[cfg_attr(docsrs, doc(cfg(feature = "random")))]
-    /// Sample a fraction between 0.0-1.0 of this ChunkedArray.
+    /// Sample a fraction between 0.0-1.0 of this Series.
     fn sample_frac(&self, frac: f64, with_replacement: bool) -> Result<Series>;
 
     #[cfg(feature = "object")]

diff --git a/polars/polars-io/src/csv.rs b/polars/polars-io/src/csv.rs
@@ -361,7 +361,7 @@ where
         self
     }
 
-    /// Automatically try to parse dates/ datetimes. If parsing failes, columns remain of dtype Utf8.
+    /// Automatically try to parse dates/ datetimes. If parsing fails, columns remain of dtype Utf8.
     #[cfg(feature = "temporal")]
     pub fn with_parse_dates(mut self, toggle: bool) -> Self {
         self.parse_dates = toggle;

diff --git a/polars/polars-lazy/src/lib.rs b/polars/polars-lazy/src/lib.rs
@@ -1,7 +1,7 @@
 //! Lazy API of Polars
 //!
 //! *Credits to the work of Andy Grove and Ballista/ DataFusion / Apache Arrow, which served as
-//! insipration for the lazy API.*
+//! inspiration for the lazy API.*
 //!
 //! The lazy api of Polars supports a subset of the eager api. Apart from the distributed compute,
 //! it is very similar to [Apache Spark](https://spark.apache.org/). You write queries in a

diff --git a/polars/polars-lazy/src/logical_plan/optimizer/predicate_pushdown/utils.rs b/polars/polars-lazy/src/logical_plan/optimizer/predicate_pushdown/utils.rs
@@ -116,7 +116,7 @@ pub(super) fn is_pushdown_boundary(node: Node, expr_arena: &Arena<AExpr>) -> boo
             AExpr::Shift { .. } | AExpr::Sort { .. } | AExpr::SortBy { .. }
             | AExpr::Agg(_) // an aggregation needs all rows
             | AExpr::Reverse(_)
-            // everyting that works on groups likely changes to order of elements w/r/t the other columns
+            // everything that works on groups likely changes to order of elements w/r/t the other columns
             | AExpr::Function {options: FunctionOptions { collect_groups: ApplyOptions::ApplyGroups, .. }, ..}
             | AExpr::Function {options: FunctionOptions { collect_groups: ApplyOptions::ApplyList, .. }, ..}
             // Could be fine, could be not, for now let's be conservative on this one

diff --git a/polars/polars-lazy/src/physical_plan/executors/groupby.rs b/polars/polars-lazy/src/physical_plan/executors/groupby.rs
@@ -136,7 +136,7 @@ fn run_partitions(
 ) -> Result<Vec<DataFrame>> {
     // We do a partitioned groupby.
     // Meaning that we first do the groupby operation arbitrarily
-    // splitted on several threads. Than the final result we apply the same groupby again.
+    // split on several threads. Than the final result we apply the same groupby again.
     let dfs = split_df(df, n_threads)?;
 
     POOL.install(|| {

diff --git a/polars/polars-lazy/src/physical_plan/planner.rs b/polars/polars-lazy/src/physical_plan/planner.rs
@@ -253,7 +253,7 @@ impl DefaultPlanner {
                 // checks:
                 //      1. complex expressions in the groupby itself are also not partitionable
                 //          in this case anything more than col("foo")
-                //      2. a custom funciton cannot be partitioned
+                //      2. a custom function cannot be partitioned
                 //      3. maintain order is likely cheaper in default groupby
                 if keys.len() == 1 && apply.is_none() && !maintain_order {
                     // complex expressions in the groupby itself are also not partitionable

diff --git a/polars/src/docs/lazy.rs b/polars/src/docs/lazy.rs
@@ -198,7 +198,7 @@
 //! ## Conditionally apply
 //! If we want to create a new column based on some condition, we can use the `.when()/.then()/.otherwise()` expressions.
 //!
-//! * `when` - accpets a predicate epxression
+//! * `when` - accepts a predicate epxression
 //! * `then` - expression to use when `predicate == true`
 //! * `otherwise` - expression to use when `predicate == false`
 //!

diff --git a/py-polars/polars/convert.py b/py-polars/polars/convert.py
@@ -81,7 +81,7 @@ def from_records(
         If not specified, columns will be named `column_0`, `column_1`, etc.
     orient : {'col', 'row'}, default None
         Whether to interpret two-dimensional data as columns or as rows. If None,
-        the orientation is infered by matching the columns and data dimensions. If
+        the orientation is inferred by matching the columns and data dimensions. If
         this does not yield conclusive results, column orientation is used.
     nullable : bool, default True
         If your data does not contain null values, set to False to speed up

diff --git a/py-polars/polars/eager/frame.py b/py-polars/polars/eager/frame.py
@@ -90,7 +90,7 @@ class DataFrame:
         labels already present in the data. Must match data dimensions.
     orient : {'col', 'row'}, default None
         Whether to interpret two-dimensional data as columns or as rows. If None,
-        the orientation is infered by matching the columns and data dimensions. If
+        the orientation is inferred by matching the columns and data dimensions. If
         this does not yield conclusive results, column orientation is used.
     nullable : bool, default True
         If your data does not contain null values, set to False to speed up
@@ -286,7 +286,7 @@ def _from_records(
             If not specified, columns will be named `column_0`, `column_1`, etc.
         orient : {'col', 'row'}, default None
             Whether to interpret two-dimensional data as columns or as rows. If None,
-            the orientation is infered by matching the columns and data dimensions. If
+            the orientation is inferred by matching the columns and data dimensions. If
             this does not yield conclusive results, column orientation is used.
         nullable : bool, default True
             If your data does not contain null values, set to False to speed up

diff --git a/py-polars/polars/eager/series.py b/py-polars/polars/eager/series.py
@@ -2441,7 +2441,7 @@ def rolling_apply(
     ) -> "pl.Series":
         """
         Allows a custom rolling window function.
-        Prefer the specific rolling window fucntions over this one, as they are faster.
+        Prefer the specific rolling window functions over this one, as they are faster.
 
         Prefer:
             * rolling_min

diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py
@@ -18,7 +18,7 @@ def test_version():
 
 
 def test_init_empty():
-    # Empty intialization
+    # Empty initialization
     df1 = pl.DataFrame()
     assert df1.shape == (0, 0)