In [24]:
:dep ndarray = {version = "0.15.6"}
:dep polars = {version = "0.35.4", features = ["describe", "lazy", "ndarray"]}
:dep plotters = { version = "0.3.5", default_features = false, features = ["evcxr", "all_series", "all_elements"] }

In [25]:
:show_deps

ndarray = {version = "0.15.6"}
plotters = { version = "0.3.5", default_features = false, features = ["evcxr", "all_series", "all_elements"] }
polars = {version = "0.35.4", features = ["describe", "lazy", "ndarray"]}


In [26]:
use polars::prelude::*;
use polars::frame::DataFrame;
use std::path::Path;

In [29]:
fn read_data_frame_from_csv(
    csv_file_path: &Path,
) -> DataFrame {
    CsvReader::from_path(csv_file_path)
        .expect("Cannot open file.")
        .has_header(true)
        .finish()
        .unwrap()
}

let titanic_file_path: &Path = Path::new("dataset/titanic.csv");
let titanic_df: DataFrame = read_data_frame_from_csv(titanic_file_path);

In [30]:
titanic_df.shape()

(891, 12)

In [33]:
titanic_df.describe(None)

Ok(shape: (9, 13)
┌────────────┬─────────────┬──────────┬──────────┬───┬───────────┬───────────┬───────┬──────────┐
│ describe   ┆ PassengerId ┆ Survived ┆ Pclass   ┆ … ┆ Ticket    ┆ Fare      ┆ Cabin ┆ Embarked │
│ ---        ┆ ---         ┆ ---      ┆ ---      ┆   ┆ ---       ┆ ---       ┆ ---   ┆ ---      │
│ str        ┆ f64         ┆ f64      ┆ f64      ┆   ┆ str       ┆ f64       ┆ str   ┆ str      │
╞════════════╪═════════════╪══════════╪══════════╪═══╪═══════════╪═══════════╪═══════╪══════════╡
│ count      ┆ 891.0       ┆ 891.0    ┆ 891.0    ┆ … ┆ 891       ┆ 891.0     ┆ 891   ┆ 891      │
│ null_count ┆ 0.0         ┆ 0.0      ┆ 0.0      ┆ … ┆ 0         ┆ 0.0       ┆ 687   ┆ 2        │
│ mean       ┆ 446.0       ┆ 0.383838 ┆ 2.308642 ┆ … ┆ null      ┆ 32.204208 ┆ null  ┆ null     │
│ std        ┆ 257.353842  ┆ 0.486592 ┆ 0.836071 ┆ … ┆ null      ┆ 49.693429 ┆ null  ┆ null     │
│ min        ┆ 1.0         ┆ 0.0      ┆ 1.0      ┆ … ┆ 110152    ┆ 0.0       ┆ A10   ┆ C        │
│ 

In [31]:
titanic_df.head(Some(5))

shape: (5, 12)
┌─────────────┬──────────┬────────┬───────────────────┬───┬───────────┬─────────┬───────┬──────────┐
│ PassengerId ┆ Survived ┆ Pclass ┆ Name              ┆ … ┆ Ticket    ┆ Fare    ┆ Cabin ┆ Embarked │
│ ---         ┆ ---      ┆ ---    ┆ ---               ┆   ┆ ---       ┆ ---     ┆ ---   ┆ ---      │
│ i64         ┆ i64      ┆ i64    ┆ str               ┆   ┆ str       ┆ f64     ┆ str   ┆ str      │
╞═════════════╪══════════╪════════╪═══════════════════╪═══╪═══════════╪═════════╪═══════╪══════════╡
│ 1           ┆ 0        ┆ 3      ┆ Braund, Mr. Owen  ┆ … ┆ A/5 21171 ┆ 7.25    ┆ null  ┆ S        │
│             ┆          ┆        ┆ Harris            ┆   ┆           ┆         ┆       ┆          │
│ 2           ┆ 1        ┆ 1      ┆ Cumings, Mrs.     ┆ … ┆ PC 17599  ┆ 71.2833 ┆ C85   ┆ C        │
│             ┆          ┆        ┆ John Bradley      ┆   ┆           ┆         ┆       ┆          │
│             ┆          ┆        ┆ (Flor…            ┆   ┆           ┆     

In [34]:
titanic_df.tail(Some(5))

shape: (5, 12)
┌─────────────┬──────────┬────────┬────────────────────┬───┬────────────┬───────┬───────┬──────────┐
│ PassengerId ┆ Survived ┆ Pclass ┆ Name               ┆ … ┆ Ticket     ┆ Fare  ┆ Cabin ┆ Embarked │
│ ---         ┆ ---      ┆ ---    ┆ ---                ┆   ┆ ---        ┆ ---   ┆ ---   ┆ ---      │
│ i64         ┆ i64      ┆ i64    ┆ str                ┆   ┆ str        ┆ f64   ┆ str   ┆ str      │
╞═════════════╪══════════╪════════╪════════════════════╪═══╪════════════╪═══════╪═══════╪══════════╡
│ 887         ┆ 0        ┆ 2      ┆ Montvila, Rev.     ┆ … ┆ 211536     ┆ 13.0  ┆ null  ┆ S        │
│             ┆          ┆        ┆ Juozas             ┆   ┆            ┆       ┆       ┆          │
│ 888         ┆ 1        ┆ 1      ┆ Graham, Miss.      ┆ … ┆ 112053     ┆ 30.0  ┆ B42   ┆ S        │
│             ┆          ┆        ┆ Margaret Edith     ┆   ┆            ┆       ┆       ┆          │
│ 889         ┆ 0        ┆ 3      ┆ Johnston, Miss.    ┆ … ┆ W./C. 6607 ┆ 23

In [16]:
titanic_df.schema()

Schema:
name: PassengerId, data type: Int64
name: Survived, data type: Int64
name: Pclass, data type: Int64
name: Name, data type: String
name: Sex, data type: String
name: Age, data type: Float64
name: SibSp, data type: Int64
name: Parch, data type: Int64
name: Ticket, data type: String
name: Fare, data type: Float64
name: Cabin, data type: String
name: Embarked, data type: String


In [69]:
titanic_df["Survived"].value_counts(true, true)

Ok(shape: (2, 2)
┌──────────┬────────┐
│ Survived ┆ counts │
│ ---      ┆ ---    │
│ i64      ┆ u32    │
╞══════════╪════════╡
│ 0        ┆ 549    │
│ 1        ┆ 342    │
└──────────┴────────┘)

In [90]:
titanic_df["Sex"].value_counts(true, true)

Ok(shape: (2, 2)
┌────────┬────────┐
│ Sex    ┆ counts │
│ ---    ┆ ---    │
│ str    ┆ u32    │
╞════════╪════════╡
│ male   ┆ 577    │
│ female ┆ 314    │
└────────┴────────┘)

In [38]:
use plotters::prelude::*;

In [89]:
evcxr_figure((640, 480), |root| {
    let drawing_area = root;
    drawing_area.fill(&WHITE).unwrap();
    
    let mut chart_context = ChartBuilder::on(&drawing_area)
        .caption("Titanic Dataset", ("Arial", 30).into_font())
        .x_label_area_size(40)
        .y_label_area_size(40)
        // .margin(5)
        // .set_left_and_bottom_label_area_size(20)
        .build_cartesian_2d((0..1).into_segmented(), 0..800)?;
    
    chart_context.configure_mesh()
        .x_desc("Survived?")
        .y_desc("Number").draw()?;

    let data_s: DataFrame = titanic_df["Survived"].value_counts(true, true).unwrap().select(vec!["counts"]).unwrap();
    let mut data_source = data_s.to_ndarray::<Int32Type>(IndexOrder::Fortran).unwrap().into_raw_vec().into_iter();
    
    chart_context.draw_series((0..).zip(data_source).map(|(x, y)| {
        let x0 = SegmentValue::Exact(x);
        let x1 = SegmentValue::Exact(x + 1);
        let mut bar = Rectangle::new([(x0, 0), (x1, y)], BLUE.filled());
        bar.set_margin(0, 0, 30, 30);
        bar
    }))
    .unwrap();

    Ok(())
}).style("width:60%")


In [91]:
let a = UInt32Chunked::new("a", &[1, 2, 3]).into_series();
let b = Float64Chunked::new("b", &[10., 8., 6.]).into_series();

let df = DataFrame::new(vec![a, b]).unwrap();
let ndarray = df.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();
println!("{:?}", ndarray);

[[1.0, 10.0],
 [2.0, 8.0],
 [3.0, 6.0]], shape=[3, 2], strides=[1, 3], layout=Ff (0xa), const ndim=2


In [92]:
use ndarray::*;

In [93]:
array![[1.,2.,3.], [4.,5.,6.]]

[[1.0, 2.0, 3.0],
 [4.0, 5.0, 6.0]], shape=[2, 3], strides=[3, 1], layout=Cc (0x5), const ndim=2

In [26]:
arr2(&[[1.,2.,3.], [4.,5.,6.]])

[[1.0, 2.0, 3.0],
 [4.0, 5.0, 6.0]], shape=[2, 3], strides=[3, 1], layout=Cc (0x5), const ndim=2

In [27]:
Array::range(0., 10., 0.5)

[0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5], shape=[20], strides=[1], layout=CFcf (0xf), const ndim=1

In [29]:
Array::linspace(0., 10., 18)

[0.0, 0.5882352941176471, 1.1764705882352942, 1.7647058823529411, 2.3529411764705883, 2.9411764705882355, 3.5294117647058822, 4.11764705882353, 4.705882352941177, 5.294117647058823, 5.882352941176471, 6.470588235294118, 7.0588235294117645, 7.647058823529412, 8.23529411764706, 8.823529411764707, 9.411764705882353, 10.0], shape=[18], strides=[1], layout=CFcf (0xf), const ndim=1

In [33]:
Array::<f64, _>::ones((3, 4, 5))

[[[1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0]],

 [[1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0]],

 [[1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0],
  [1.0, 1.0, 1.0, 1.0, 1.0]]], shape=[3, 4, 5], strides=[20, 5, 1], layout=Cc (0x5), const ndim=3

In [34]:
Array::<f64, _>::zeros((3, 4, 5))

[[[0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0]],

 [[0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0]],

 [[0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0],
  [0.0, 0.0, 0.0, 0.0, 0.0]]], shape=[3, 4, 5], strides=[20, 5, 1], layout=Cc (0x5), const ndim=3

In [36]:
Array::<f64, _>::eye(3)

[[1.0, 0.0, 0.0],
 [0.0, 1.0, 0.0],
 [0.0, 0.0, 1.0]], shape=[3, 3], strides=[3, 1], layout=Cc (0x5), const ndim=2

In [97]:
let arr = array![[1.,2.,3.], [4.,5.,6.]];

In [42]:
arr[[1, 2]]

6.0

In [43]:
arr.slice(s![0..1, ..])

[[1.0, 2.0, 3.0]], shape=[1, 3], strides=[0, 1], layout=CFcf (0xf), const ndim=2

In [44]:
arr.sum()

21.0

In [45]:
arr.sum_axis(Axis(0))

[5.0, 7.0, 9.0], shape=[3], strides=[1], layout=CFcf (0xf), const ndim=1

In [46]:
arr.sum_axis(Axis(1))

[6.0, 15.0], shape=[2], strides=[1], layout=CFcf (0xf), const ndim=1

In [47]:
arr.mean().unwrap()

3.5

In [49]:
arr

[[1.0, 2.0, 3.0],
 [4.0, 5.0, 6.0]], shape=[2, 3], strides=[3, 1], layout=Cc (0x5), const ndim=2

In [98]:
arr.t()

[[1.0, 4.0],
 [2.0, 5.0],
 [3.0, 6.0]], shape=[3, 2], strides=[1, 3], layout=Ff (0xa), const ndim=2

In [99]:
arr.dot(&arr.t())

[[14.0, 32.0],
 [32.0, 77.0]], shape=[2, 2], strides=[2, 1], layout=Cc (0x5), const ndim=2

In [102]:
arr.mapv(f64::sqrt)

[[1.0, 1.4142135623730951, 1.7320508075688772],
 [2.0, 2.23606797749979, 2.449489742783178]], shape=[2, 3], strides=[3, 1], layout=Cc (0x5), const ndim=2

In [77]:
let a = arr2(&[[ 0f32, 1.],
               [-1., 2.]]);

In [79]:
a.mapv(f32::sqrt)

[[0.0, 1.0],
 [NaN, 1.4142135]], shape=[2, 2], strides=[2, 1], layout=Cc (0x5), const ndim=2

In [83]:
arr.mapv(|a|f32::sqrt(a as f32))

[[1.0, 1.4142135, 1.7320508],
 [2.0, 2.236068, 2.4494898]], shape=[2, 3], strides=[3, 1], layout=Cc (0x5), const ndim=2