remove prettytable.rs (#2827)

pola-rs · Mar 4, 2022 · e1609c8 · e1609c8
1 parent 7ab9bda
commit e1609c8
Show file tree

Hide file tree

Showing 11 changed files with 60 additions and 112 deletions.
diff --git a/.github/workflows/build-test.yaml b/.github/workflows/build-test.yaml
@@ -14,9 +14,6 @@ jobs:
       - name: Install Rust
         run: rustup update stable
       - uses: Swatinem/rust-cache@v1
-      - name: Install carho-hack
-        run: |
-          cargo install cargo-hack
       - name: "check"
         run: cd examples && cargo check
 

diff --git a/examples/python_rust_compiled_function/example.py b/examples/python_rust_compiled_function/example.py
@@ -7,6 +7,13 @@
 dist = hamming_distance(a, b)
 expected = pl.Series("", [None, 2], dtype=pl.UInt32)
 
-print(hamming_distance(a, b))
+# run on 2 Series
+print("hamming distance: ", hamming_distance(a, b))
 assert dist.series_equal(expected, null_equal=True)
 
+# or use in polars expressions
+print(
+    pl.DataFrame([a, b]).select(
+        pl.map(["a", "b"], lambda series: hamming_distance(series[0], series[1]))
+    )
+)
diff --git a/examples/read_csv/Cargo.toml b/examples/read_csv/Cargo.toml
@@ -10,4 +10,4 @@ write_output = ["polars/ipc", "polars/parquet"]
 default = ["write_output"]
 
 [dependencies]
-polars = { path = "../../polars", features = ["lazy", "csv-file", "pretty_fmt"] }
+polars = { path = "../../polars", features = ["lazy", "csv-file"] }
diff --git a/examples/read_parquet/Cargo.toml b/examples/read_parquet/Cargo.toml
@@ -6,4 +6,4 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-polars = { path = "../../polars", features = ["lazy", "parquet", "pretty_fmt"] }
+polars = { path = "../../polars", features = ["lazy", "parquet"] }
diff --git a/polars/Cargo.toml b/polars/Cargo.toml
@@ -22,7 +22,7 @@ default = [
   "csv-file",
   "temporal",
   "performant",
-  "plain_fmt",
+  "fmt",
   "dtype-slim",
 ]
 ndarray = ["polars-core/ndarray"]
@@ -55,11 +55,8 @@ csv-file = ["polars-io", "polars-io/csv-file", "polars-lazy/csv-file"]
 # that fits a TrustedLen iterator.
 performant = ["polars-core/performant"]
 
-# Choose one of those features for dataframe formatting.
-#   - plain_fmt: less compile times, simpler formatting
-#   - pretty_fmt: better looking dataframes for increase compile times
-plain_fmt = ["polars-core/plain_fmt"]
-pretty_fmt = ["polars-core/pretty_fmt"]
+# Dataframe formatting.
+fmt = ["polars-core/fmt"]
 
 # sort by multiple columns
 sort_multiple = ["polars-core/sort_multiple"]
@@ -115,7 +112,7 @@ test = [
   "csv-file",
   "dtype-categorical",
   "cum_agg",
-  "polars-core/plain_fmt",
+  "fmt",
   "diff",
   "abs",
   "parquet",

diff --git a/polars/polars-core/Cargo.toml b/polars/polars-core/Cargo.toml
@@ -27,9 +27,7 @@ string_encoding = ["base64", "hex"]
 # support for ObjectChunked<T> (downcastable Series of any type)
 object = ["serde_json"]
 
-# Cannot have mutually exclusive types. User should choose one of:
-pretty_fmt = ["comfy-table"]
-plain_fmt = ["prettytable-rs"]
+fmt = ["comfy-table"]
 
 # opt-in features
 # sort by multiple columns

diff --git a/polars/polars-core/src/fmt.rs b/polars/polars-core/src/fmt.rs
@@ -10,13 +10,11 @@ use std::{
 const LIMIT: usize = 25;
 
 use arrow::temporal_conversions::{timestamp_ms_to_datetime, timestamp_us_to_datetime};
-#[cfg(feature = "pretty_fmt")]
+#[cfg(feature = "fmt")]
 use comfy_table::presets::{ASCII_FULL, UTF8_FULL};
-#[cfg(feature = "pretty_fmt")]
+#[cfg(feature = "fmt")]
 use comfy_table::*;
-#[cfg(all(feature = "plain_fmt", not(feature = "pretty_fmt")))]
-use prettytable::{Cell, Row, Table};
-#[cfg(any(feature = "plain_fmt", feature = "pretty_fmt"))]
+#[cfg(feature = "fmt")]
 use std::borrow::Cow;
 
 macro_rules! format_array {
@@ -297,7 +295,7 @@ impl Debug for DataFrame {
         Display::fmt(self, f)
     }
 }
-#[cfg(any(feature = "plain_fmt", feature = "pretty_fmt"))]
+#[cfg(feature = "fmt")]
 fn make_str_val(v: &str) -> String {
     let string_limit = 32;
     let v_trunc = &v[..v
@@ -313,7 +311,7 @@ fn make_str_val(v: &str) -> String {
     }
 }
 
-#[cfg(any(feature = "plain_fmt", feature = "pretty_fmt"))]
+#[cfg(feature = "fmt")]
 fn prepare_row(row: Vec<Cow<'_, str>>, n_first: usize, n_last: usize) -> Vec<String> {
     let reduce_columns = n_first + n_last < row.len();
     let mut row_str = Vec::with_capacity(n_first + n_last + reduce_columns as usize);
@@ -331,55 +329,39 @@ fn prepare_row(row: Vec<Cow<'_, str>>, n_first: usize, n_last: usize) -> Vec<Str
 
 impl Display for DataFrame {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        let height = self.height();
-        assert!(
-            self.columns.iter().all(|s| s.len() == height),
-            "The columns lengths in the DataFrame are not equal."
-        );
-
-        let max_n_cols = std::env::var("POLARS_FMT_MAX_COLS")
-            .unwrap_or_else(|_| "8".to_string())
-            .parse()
-            .unwrap_or(8);
+        #[cfg(feature = "fmt")]
+        {
+            let height = self.height();
+            assert!(
+                self.columns.iter().all(|s| s.len() == height),
+                "The columns lengths in the DataFrame are not equal."
+            );
 
-        #[cfg(any(feature = "plain_fmt", feature = "pretty_fmt"))]
-        let max_n_rows = {
-            let max_n_rows = std::env::var("POLARS_FMT_MAX_ROWS")
+            let max_n_cols = std::env::var("POLARS_FMT_MAX_COLS")
                 .unwrap_or_else(|_| "8".to_string())
                 .parse()
                 .unwrap_or(8);
-            if max_n_rows < 2 {
-                2
+
+            let max_n_rows = {
+                let max_n_rows = std::env::var("POLARS_FMT_MAX_ROWS")
+                    .unwrap_or_else(|_| "8".to_string())
+                    .parse()
+                    .unwrap_or(8);
+                if max_n_rows < 2 {
+                    2
+                } else {
+                    max_n_rows
+                }
+            };
+            let (n_first, n_last) = if self.width() > max_n_cols {
+                ((max_n_cols + 1) / 2, max_n_cols / 2)
             } else {
-                max_n_rows
-            }
-        };
-        let (n_first, n_last) = if self.width() > max_n_cols {
-            ((max_n_cols + 1) / 2, max_n_cols / 2)
-        } else {
-            (self.width(), 0)
-        };
-        let reduce_columns = n_first + n_last < self.width();
+                (self.width(), 0)
+            };
+            let reduce_columns = n_first + n_last < self.width();
 
-        let mut names = Vec::with_capacity(n_first + n_last + reduce_columns as usize);
+            let mut names = Vec::with_capacity(n_first + n_last + reduce_columns as usize);
 
-        #[cfg(not(feature = "pretty_fmt"))]
-        {
-            let field_to_str = |f: &Field| format!("{}\n---\n{}", f.name(), f.data_type());
-            let fields = self.fields();
-            for field in fields[0..n_first].iter() {
-                names.push(field_to_str(field));
-            }
-            if reduce_columns {
-                names.push("...".into());
-            }
-            for field in fields[self.width() - n_last..].iter() {
-                names.push(field_to_str(field));
-            }
-        }
-
-        #[cfg(feature = "pretty_fmt")]
-        {
             let field_to_str = |f: &Field| {
                 let name = make_str_val(f.name());
                 let lower_bounds = std::cmp::max(5, std::cmp::min(12, name.len()));
@@ -465,7 +447,8 @@ impl Display for DataFrame {
 
             write!(f, "shape: {:?}\n{}", self.shape(), table)?;
         }
-        #[cfg(not(any(feature = "plain_fmt", feature = "pretty_fmt")))]
+
+        #[cfg(not(feature = "fmt"))]
         {
             write!(
                 f,
@@ -474,44 +457,6 @@ impl Display for DataFrame {
             )?;
         }
 
-        #[cfg(all(feature = "plain_fmt", not(feature = "pretty_fmt")))]
-        {
-            let mut table = Table::new();
-            table.set_titles(Row::new(names.into_iter().map(|s| Cell::new(&s)).collect()));
-            let mut rows = Vec::with_capacity(max_n_rows);
-            if self.height() > max_n_rows {
-                for i in 0..(max_n_rows / 2) {
-                    let row = self.columns.iter().map(|s| s.str_value(i)).collect();
-                    rows.push(prepare_row(row, n_first, n_last));
-                }
-                let dots = rows[0].iter().map(|_| "...".to_string()).collect();
-                rows.push(dots);
-                for i in (self.height() - (max_n_rows + 1) / 2)..self.height() {
-                    let row = self.columns.iter().map(|s| s.str_value(i)).collect();
-                    rows.push(prepare_row(row, n_first, n_last));
-                }
-                for row in rows {
-                    table.add_row(Row::new(row.into_iter().map(|s| Cell::new(&s)).collect()));
-                }
-            } else {
-                for i in 0..self.height() {
-                    if self.width() > 0 {
-                        let row = self.columns.iter().map(|s| s.str_value(i)).collect();
-                        table.add_row(Row::new(
-                            prepare_row(row, n_first, n_last)
-                                .into_iter()
-                                .map(|s| Cell::new(&s))
-                                .collect(),
-                        ));
-                    } else {
-                        break;
-                    }
-                }
-            }
-
-            write!(f, "shape: {:?}\n{}", self.shape(), table)?;
-        }
-
         Ok(())
     }
 }

diff --git a/polars/polars-io/Cargo.toml b/polars/polars-io/Cargo.toml
@@ -24,7 +24,7 @@ dtype-date = ["polars-core/dtype-date"]
 dtype-time = ["polars-core/dtype-time", "polars-core/temporal"]
 dtype-categorical = ["polars-core/dtype-categorical"]
 csv-file = ["csv-core", "memmap", "lexical", "arrow/io_csv_write"]
-fmt = ["polars-core/plain_fmt"]
+fmt = ["polars-core/fmt"]
 decompress = ["flate2/miniz_oxide"]
 decompress-fast = ["flate2/zlib-ng-compat"]
 temporal = ["polars-core/dtype-date", "polars-core/dtype-datetime"]

diff --git a/polars/polars-lazy/Cargo.toml b/polars/polars-lazy/Cargo.toml
@@ -19,7 +19,7 @@ ipc = ["polars-io/ipc"]
 csv-file = ["polars-io/csv-file"]
 temporal = ["polars-core/temporal", "polars-time", "dtype-datetime"]
 # debugging purposesses
-fmt = ["polars-core/plain_fmt"]
+fmt = ["polars-core/fmt"]
 strings = ["polars-core/strings"]
 future = []
 dtype-u8 = ["polars-core/dtype-u8"]
@@ -79,7 +79,7 @@ test = [
   "dtype-categorical",
   "cum_agg",
   "regex",
-  "polars-core/plain_fmt",
+  "polars-core/fmt",
   "diff",
   "abs",
   "parquet",

diff --git a/polars/src/lib.rs b/polars/src/lib.rs
@@ -129,7 +129,10 @@
 //!              These are downcastable from Series through the [Any](https://doc.rust-lang.org/std/any/index.html) trait.
 //! * Performance related:
 //!     - `simd` - SIMD operations _(nightly only)_
-//!     - `performant` - ~40% faster chunkedarray creation but may lead to unexpected panic if iterator incorrectly sets a size_hint
+//!     - `bigidx` - Activate this feature if you expect >> 2^32 rows. This has not been needed by anyone.
+//!                  This allows polars to scale up way beyond that by using `u64` as an index.
+//!                  Polars will be a bit slower with this feature activated as many data structures
+//!                  are less cache efficient.
 //! * IO related:
 //!     - `serde` - Support for [serde](https://crates.io/crates/serde) serialization and deserialization.
 //!                 Can be used for JSON and more serde supported serialization formats.
@@ -143,7 +146,7 @@
 //!
 //! * `DataFrame` operations:
 //!     - `dynamic_groupby` - Groupby based on a time window instead of predefined keys.
-//!     - `pivot` - [pivot operation](crate::frame::groupby::GroupBy::pivot) on `DataFrame`s
+//!                           Also activates rolling window group by operations.
 //!     - `sort_multiple` - Allow sorting a `DataFrame` on multiple columns
 //!     - `rows` - Create `DataFrame` from rows and extract rows from `DataFrames`.
 //!                And activates `pivot` and `transpose` operations
@@ -178,9 +181,10 @@
 //!     - `abs` - Get absolute values of Series
 //!     - `arange` - Range operation on Series
 //!     - `product` - Compute the product of a Series.
+//!     - `diff` - [`diff`] operation.
+//!     - `pct_change` - Compute change percentages.
 //! * `DataFrame` pretty printing (Choose one or none, but not both):
-//!     - `plain_fmt` - no overflowing (less compilation times)
-//!     - `pretty_fmt` - cell overflow (increased compilation times)
+//!     - `fmt` - Activate DataFrame formatting
 //!
 //! ## Compile times and opt-in data types
 //! As mentioned above, Polars `Series` are wrappers around

diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml
@@ -53,7 +53,7 @@ features = [
   "random",
   "object",
   "csv-file",
-  "pretty_fmt",
+  "fmt",
   "performant",
   "dtype-full",
   "rows",