In [3]:
//Install plotters
:dep plotters = { git = "https://github.com/38/plotters", default_features = false, features = ["evcxr"] }
:dep dashmap = {version = "3.11.1"}
// Load external source code
:dep e4_7 = {path = "e4_7"}

Since python is to slow to run this kind of policy iteration algorithm in a reasonable time frame here is a rust implementation.

The implementation here has a few extra aspects not seen in the sudo code in RL 2018. There are two cases in which we want to stop 
1. if the policy is stable
2. if we are cycling trough policies with the same value.

Since we can only aproximate the value we write a check to see if the value has aproximatly stoped changing.
This introduces two new hyper-perameters $\alpha$ and $\epsilon$. Here $\alpha$ weights changes in value in past iterations and $\epsilon$ messures how close to no change is needed before we stop the algorithm.

## Replicating example 4.2

In [None]:
:opt 2
use e4_7::*;
let car_env = SimpleCarEnv::new();
let (policy_simple, value_simple) = Environment::policy_iteration(
    &car_env,
    1.0, // theta
    0.4, // alpha
    1e-4, // epsilon
    Some(0), //start action
    Some(10) //cut off after 10 iterations
).unwrap();

Optimization: 2


Starting evaluation
-(193.3490492751235 > 1)
-(134.00426086449968 > 1)
-(90.45243773749041 > 1)
-(67.59779106615545 > 1)
-(53.584930617584405 > 1)
-(41.720862911286815 > 1)
-(32.72010939685845 > 1)


### Visualize  actions

In [None]:
use plotters::prelude::*;
evcxr_figure((500,500), |root| {
        root.fill(&WHITE)?;
    let mut chart = ChartBuilder::on(&root)
        .caption("Actions Example 4.2", ("sans-serif", 40))
        .margin(5)
        .top_x_label_area_size(40)
        .y_label_area_size(40)
        .build_ranged(0i32..21i32, 21i32..0i32)?;

    chart
        .configure_mesh()
        .x_labels(20)
        .y_labels(20)
        .x_desc("cars at location 1")
        .y_desc("cars at location 2")
        .disable_x_mesh()
        .disable_y_mesh()
        .label_style(("sans-serif", 20))
        .draw()?;
    
    chart.draw_series(
        car_env.states().iter().map(|s| {
            let (x, y) = s;
            let a = *policy_simple.get(s).unwrap();
            Rectangle::new(
                [(*x, *y), (*x + 1, *y + 1)],
                HSLColor((a+5) as f64 / 11.0, 1.0, 0.5).filled(),
            )
        })
    )?;

    // Tell plotters that everything is ok
    Ok(())
}).style("width: 400px")

### Visualize value

In [None]:
evcxr_figure((500,500), |root| {    
    root.fill(&WHITE)?;
    
    let mut chart = ChartBuilder::on(&root)
        .caption("Values Example 4.2", ("sans-serif", 60))
        .margin(5)
        .top_x_label_area_size(40)
        .y_label_area_size(40)
        .build_ranged(0i32..21i32, 21i32..0i32)?;

    chart
        .configure_mesh()
        .x_labels(20)
        .y_labels(20)
        .x_desc("cars at location 1")
        .y_desc("cars at lo")
        .disable_x_mesh()
        .disable_y_mesh()
        .label_style(("sans-serif", 20))
        .draw()?;
    
    chart.draw_series(
        car_env.states().iter().map(|&(x, y)| {
            let v = *value_simple.get(&(x,y)).unwrap();
            Rectangle::new(
                [(x, y), (x + 1, y + 1)],
                HSLColor((v - 400.0) / 300.0, 1.0, 0.5).filled(),
            )
        })
    )?;

    // Tell plotters that everything is ok
    Ok(())
}).style("width: 400px")

## Exercise 4.7

In [4]:
:opt 2
use e4_7::*;
let car_env_complex = MoreComplexCarEnv::new();
let (policy_complex, value_complex) = Environment::policy_iteration(
    &car_env_complex,
    0.1, // theta
    0.9, // alpha
    0.2, // epsilon
    Some(0), //start action
    Some(10) //cut off after 10 iterations
).unwrap();

Optimization: 2


Starting evaluation
-(147.74599361137163 > 0.1)
-(100.99435152911305 > 0.1)
-(71.01022866709806 > 0.1)
-(61.666735804165285 > 0.1)
-(49.866520200879165 > 0.1)
-(39.093249845824005 > 0.1)
-(30.628757855547406 > 0.1)
-(24.38219773992637 > 0.1)
-(20.347092345418957 > 0.1)
-(16.951101456055426 > 0.1)
-(14.102710708830614 > 0.1)
-(11.717939659599665 > 0.1)
-(9.72422288193627 > 0.1)
-(8.05992904764446 > 0.1)
-(6.672833530570415 > 0.1)
-(5.518631699961645 > 0.1)
-(4.559720780563509 > 0.1)
-(3.7642198965631906 > 0.1)
-(3.105159199954585 > 0.1)
-(2.5597875551185325 > 0.1)
-(2.1089699978332987 > 0.1)
-(1.736659727296228 > 0.1)
-(1.4294359164464936 > 0.1)
-(1.1761011695624575 > 0.1)
-(0.9673331990455267 > 0.1)
-(0.7953854144943762 > 0.1)
-(0.6538311458190265 > 0.1)
-(0.5373463463552639 > 0.1)
-(0.4415258750000248 > 0.1)
-(0.36272881271753477 > 0.1)
-(0.297948688131612 > 0.1)
-(0.2447049323968713 > 0.1)
-(0.20095232691227238 > 0.1)
-(0.16500562975153343 > 0.1)
-(0.13547695646155944 > 0.1)
-(0.1112

### Visualize Actions

In [6]:
use plotters::prelude::*;
evcxr_figure((500,500), |root| {
        root.fill(&WHITE)?;
    let mut chart = ChartBuilder::on(&root)
        .caption("Actions Exercise 4.7", ("sans-serif", 60))
        .margin(5)
        .top_x_label_area_size(40)
        .y_label_area_size(40)
        .build_ranged(0i32..21i32, 21i32..0i32)?;

    chart
        .configure_mesh()
        .x_labels(20)
        .y_labels(20)
        .x_desc("cars at location 1")
        .y_desc("cars at location 2")
        .disable_x_mesh()
        .disable_y_mesh()
        .label_style(("sans-serif", 20))
        .draw()?;
    
    chart.draw_series(
        car_env_complex.states().iter().map(|s| {
            let (x, y) = s;
            let a = *policy_complex.get(s).unwrap();
            Rectangle::new(
                [(*x, *y), (*x + 1, *y + 1)],
                HSLColor((a+5) as f64 / 11.0, 1.0, 0.5).filled(),
            )
        })
    )?;

    // Tell plotters that everything is ok
    Ok(())
}).style("width: 400px")

Error: cannot find value `complex_car_env` in this scope

Error: mismatched types

### Visualize Values

In [None]:
evcxr_figure((500,500), |root| {    
    root.fill(&WHITE)?;
    
    let mut chart = ChartBuilder::on(&root)
        .caption("Values Exercise 4.7", ("sans-serif", 60))
        .margin(5)
        .top_x_label_area_size(40)
        .y_label_area_size(40)
        .build_ranged(0i32..21i32, 21i32..0i32)?;

    chart
        .configure_mesh()
        .x_labels(20)
        .y_labels(20)
        .x_desc("cars at location 1")
        .y_desc("cars at lo")
        .disable_x_mesh()
        .disable_y_mesh()
        .label_style(("sans-serif", 20))
        .draw()?;
    
    chart.draw_series(
        car_env_complex.states().iter().map(|&(x, y)| {
            let v = *value_complex.get(&(x,y)).unwrap();
            Rectangle::new(
                [(x, y), (x + 1, y + 1)],
                HSLColor((v - 400.0) / 300.0, 1.0, 0.5).filled(),
            )
        })
    )?;

    // Tell plotters that everything is ok
    Ok(())
}).style("width: 400px")