# Polars(いわゆるPandas)

In [3]:
:timing
:sccache 1

Timing: true
sccache: true


In [4]:
:dep polars = { version = "0.14.7", features = ["ndarray", "random"]}

In [5]:
#[macro_use]
extern crate polars;
use polars::prelude::*;

## 演算

In [6]:
let s: Series = [1, 2, 3].iter().collect();
s

shape: (3,)
Series: '' [i32]
[
	1
	2
	3
]

In [7]:
Series::new("A", &[1, 2, 3])

shape: (3,)
Series: 'A' [i32]
[
	1
	2
	3
]

In [8]:
println!("add:\n{:?}", &s + 1);
println!("sub:\n{:?}", &s - 1);
println!("mul:\n{:?}", &s * 2);
println!("div:\n{:?}", &s / 2);

add:
shape: (3,)
Series: '' [i32]
[
	2
	3
	4
]
sub:
shape: (3,)
Series: '' [i32]
[
	0
	1
	2
]
mul:
shape: (3,)
Series: '' [i32]
[
	2
	4
	6
]
div:
shape: (3,)
Series: '' [i32]
[
	0
	1
	1
]


In [9]:
println!("add:\n{:?}", &s + 1);
println!("sub:\n{:?}", &s - 1);
println!("mul:\n{:?}", &s * 2);
println!("div:\n{:?}", &s / 2);

add:
shape: (3,)
Series: '' [i32]
[
	2
	3
	4
]
sub:
shape: (3,)
Series: '' [i32]
[
	0
	1
	2
]
mul:
shape: (3,)
Series: '' [i32]
[
	2
	4
	6
]
div:
shape: (3,)
Series: '' [i32]
[
	0
	1
	1
]


## 列選択

In [21]:
let mut df = df!("A" => &["a", "b", "a"],
             "B" => &[1, 3, 5],
             "C" => &[10, 11, 12],
             "D" => &[2, 4, 6],
            "E" => &[1, 3, 5]
    )?;

In [22]:
let selectA = df.select("A")?;
selectA

shape: (3, 1)
+-----+
| A   |
| --- |
| str |
+=====+
| "a" |
+-----+
| "b" |
+-----+
| "a" |
+-----+


In [23]:
let selectB = df.select(("A", "B"))?;
selectB

shape: (3, 2)
+-----+-----+
| A   | B   |
| --- | --- |
| str | i32 |
+=====+=====+
| "a" | 1   |
+-----+-----+
| "b" | 3   |
+-----+-----+
| "a" | 5   |
+-----+-----+


In [24]:
let selectC = df.select(vec!["A", "B", "C"])?;
selectC

shape: (3, 3)
+-----+-----+-----+
| A   | B   | C   |
| --- | --- | --- |
| str | i32 | i32 |
+=====+=====+=====+
| "a" | 1   | 10  |
+-----+-----+-----+
| "b" | 3   | 11  |
+-----+-----+-----+
| "a" | 5   | 12  |
+-----+-----+-----+


## 重複行の抽出

In [25]:
let dup = df.filter(&df.is_duplicated()?)?;
dup

shape: (0, 5)
++
||
||
||
++
++


In [26]:
df.drop_duplicates(true, None)? 

shape: (3, 5)
+-----+-----+-----+-----+-----+
| A   | B   | C   | D   | E   |
| --- | --- | --- | --- | --- |
| str | i32 | i32 | i32 | i32 |
+=====+=====+=====+=====+=====+
| "a" | 1   | 10  | 2   | 1   |
+-----+-----+-----+-----+-----+
| "b" | 3   | 11  | 4   | 3   |
+-----+-----+-----+-----+-----+
| "a" | 5   | 12  | 6   | 5   |
+-----+-----+-----+-----+-----+


## READ CSV

pandasといえばこれ
```python
df = pd.read_csv(path)

```

In [30]:
let csv = CsvReader::from_path("sample.csv")?
        .infer_schema(None)
        .with_delimiter(b',')
        .has_header(true)
        .finish()?;

In [31]:
csv

shape: (1, 3)
+-----+-----+------+
| 1   | 2   | 3    |
| --- | --- | ---  |
| i64 | i64 | i64  |
| 4   | 5   | null |
+-----+-----+------+


In [40]:
let df1: DataFrame = df!("Fruit" => &["Apple", "Banana", "Pear"])?;
let df2: DataFrame = df!("Name" => &["Apple", "Banana", "Pear"])?;

In [43]:
df1.join(&df2, "Fruit", "Name", DataFrame::joinType::Inner, None)

Error: this function takes 4 arguments but 5 arguments were supplied

Error: ambiguous associated type

## 公式による詳細コードはこちら

https://github.com/LaurentMazare/tch-rs/tree/main/examples/mnist

In [33]:
use anyhow::Result;

mod mnist_conv;
mod mnist_linear;
mod mnist_nn;

fn main() -> Result<()> {
    let args: Vec<String> = std::env::args().collect();
    let model = if args.len() < 2 {
        None
    } else {
        Some(args[1].as_str())
    };
    match model {
        None => mnist_nn::run(),
        Some("linear") => mnist_linear::run(),
        Some("conv") => mnist_conv::run(),
        Some(_) => mnist_nn::run(),
    }
}

Error: file not found for module `mnist_conv`

Error: file not found for module `mnist_linear`

Error: file not found for module `mnist_nn`

Error: unresolved import `anyhow`

Error: cannot find function `run` in module `mnist_nn`

Error: cannot find function `run` in module `mnist_linear`

Error: cannot find function `run` in module `mnist_conv`

Error: cannot find function `run` in module `mnist_nn`

In [34]:
// CNN model. This should rearch 99.1% accuracy.

use anyhow::Result;
use tch::{nn, nn::ModuleT, nn::OptimizerConfig, Device, Tensor};

#[derive(Debug)]
struct Net {
    conv1: nn::Conv2D,
    conv2: nn::Conv2D,
    fc1: nn::Linear,
    fc2: nn::Linear,
}

impl Net {
    fn new(vs: &nn::Path) -> Net {
        let conv1 = nn::conv2d(vs, 1, 32, 5, Default::default());
        let conv2 = nn::conv2d(vs, 32, 64, 5, Default::default());
        let fc1 = nn::linear(vs, 1024, 1024, Default::default());
        let fc2 = nn::linear(vs, 1024, 10, Default::default());
        Net {
            conv1,
            conv2,
            fc1,
            fc2,
        }
    }
}

impl nn::ModuleT for Net {
    fn forward_t(&self, xs: &Tensor, train: bool) -> Tensor {
        xs.view([-1, 1, 28, 28])
            .apply(&self.conv1)
            .max_pool2d_default(2)
            .apply(&self.conv2)
            .max_pool2d_default(2)
            .view([-1, 1024])
            .apply(&self.fc1)
            .relu()
            .dropout_(0.5, train)
            .apply(&self.fc2)
    }
}

pub fn run() -> Result<()> {
    let m = tch::vision::mnist::load_dir("data")?;
    let vs = nn::VarStore::new(Device::cuda_if_available());
    let net = Net::new(&vs.root());
    let mut opt = nn::Adam::default().build(&vs, 1e-4)?;
    for epoch in 1..100 {
        for (bimages, blabels) in m.train_iter(256).shuffle().to_device(vs.device()) {
            let loss = net
                .forward_t(&bimages, true)
                .cross_entropy_for_logits(&blabels);
            opt.backward_step(&loss);
        }
        let test_accuracy =
            net.batch_accuracy_for_logits(&m.test_images, &m.test_labels, vs.device(), 1024);
        println!("epoch: {:4} test acc: {:5.2}%", epoch, 100. * test_accuracy,);
    }
    Ok(())
}

Error: unresolved import `tch`

Error: failed to resolve: use of undeclared crate or module `tch`

Error: failed to resolve: use of undeclared crate or module `tch`

Error: unresolved import `tch`

Error: unresolved import `tch`

Error: unresolved import `anyhow`

Error: failed to resolve: use of undeclared crate or module `tch`

In [35]:
// This should rearch 91.5% accuracy.

use anyhow::Result;
use tch::{kind, no_grad, vision, Kind, Tensor};

const IMAGE_DIM: i64 = 784;
const LABELS: i64 = 10;

pub fn run() -> Result<()> {
    let m = vision::mnist::load_dir("data")?;
    println!("train-images: {:?}", m.train_images.size());
    println!("train-labels: {:?}", m.train_labels.size());
    println!("test-images: {:?}", m.test_images.size());
    println!("test-labels: {:?}", m.test_labels.size());
    let mut ws = Tensor::zeros(&[IMAGE_DIM, LABELS], kind::FLOAT_CPU).set_requires_grad(true);
    let mut bs = Tensor::zeros(&[LABELS], kind::FLOAT_CPU).set_requires_grad(true);
    for epoch in 1..200 {
        let logits = m.train_images.mm(&ws) + &bs;
        let loss = logits
            .log_softmax(-1, Kind::Float)
            .nll_loss(&m.train_labels);
        ws.zero_grad();
        bs.zero_grad();
        loss.backward();
        no_grad(|| {
            ws += ws.grad() * (-1);
            bs += bs.grad() * (-1);
        });
        let test_logits = m.test_images.mm(&ws) + &bs;
        let test_accuracy = test_logits
            .argmax(Some(-1), false)
            .eq_tensor(&m.test_labels)
            .to_kind(Kind::Float)
            .mean(Kind::Float)
            .double_value(&[]);
        println!(
            "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%",
            epoch,
            loss.double_value(&[]),
            100. * test_accuracy
        );
    }
    Ok(())
}

Error: unresolved import `tch`

Error: unresolved import `tch`

Error: unresolved import `tch`

Error: unresolved import `tch`

Error: unresolved import `anyhow`

Error: unresolved import `tch`

In [36]:
// This should rearch 97% accuracy.

use anyhow::Result;
use tch::{nn, nn::Module, nn::OptimizerConfig, Device};

const IMAGE_DIM: i64 = 784;
const HIDDEN_NODES: i64 = 128;
const LABELS: i64 = 10;

fn net(vs: &nn::Path) -> impl Module {
    nn::seq()
        .add(nn::linear(
            vs / "layer1",
            IMAGE_DIM,
            HIDDEN_NODES,
            Default::default(),
        ))
        .add_fn(|xs| xs.relu())
        .add(nn::linear(vs, HIDDEN_NODES, LABELS, Default::default()))
}

pub fn run() -> Result<()> {
    let m = tch::vision::mnist::load_dir("data")?;
    let vs = nn::VarStore::new(Device::Cpu);
    let net = net(&vs.root());
    let mut opt = nn::Adam::default().build(&vs, 1e-3)?;
    for epoch in 1..200 {
        let loss = net
            .forward(&m.train_images)
            .cross_entropy_for_logits(&m.train_labels);
        opt.backward_step(&loss);
        let test_accuracy = net
            .forward(&m.test_images)
            .accuracy_for_logits(&m.test_labels);
        println!(
            "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%",
            epoch,
            f64::from(&loss),
            100. * f64::from(&test_accuracy),
        );
    }
    Ok(())
}

Error: unresolved import `tch`

Error: failed to resolve: use of undeclared crate or module `tch`

Error: failed to resolve: use of undeclared crate or module `tch`

Error: unresolved import `tch`

Error: unresolved import `anyhow`

Error: failed to resolve: use of undeclared crate or module `tch`

Error: cannot find trait `Module` in this scope