In [24]:
//Install plotters
:dep plotters = { git = "https://github.com/38/plotters", default_features = false, features = ["evcxr"] }
:dep dashmap = {version = "3.11.1"}
// Load external source code
:dep e4_7 = {path = "e4_7"}

Since python is to slow to run this kind of policy iteration algorithm in a reasonable time frame here is a rust implementation.

The implementation here has a few extra aspects not seen in the sudo code in RL 2018. There are two cases in which we want to stop 
1. if the policy is stable
2. if we are cycling trough policies with the same value.

Since we can only aproximate the value we write a check to see if the value has aproximatly stoped changing.
This introduces two new hyper-perameters $\alpha$ and $\epsilon$. Here $\alpha$ weights changes in value in past iterations and $\epsilon$ messures how close to no change is needed before we stop the algorithm.

In [62]:
:opt 2
use e4_7::*;
let car_env = SimpleCarEnv::new();
let (policy, value) = Environment::policy_iteration(
    &car_env,
    0.1, // theta
    0.4, // alpha
    1e-4, // epsilon
    Some(0), //start action
    Some(10) //cut off after 10 iterations
).unwrap();

Optimization: 2


Starting evaluation
-(193.3490492751235 > 0.1)
-(134.00426086449968 > 0.1)
-(90.45243773749041 > 0.1)
-(67.5977910661554 > 0.1)
-(53.58493061758446 > 0.1)
-(41.720862911286815 > 0.1)
-(32.72010939685845 > 0.1)
-(26.08755613817783 > 0.1)
-(21.77814339269986 > 0.1)
-(18.318059790332995 > 0.1)
-(15.3674063477024 > 0.1)
-(12.86102712361702 > 0.1)
-(10.73928669081539 > 0.1)
-(8.949004145272909 > 0.1)
-(7.443128433756499 > 0.1)
-(6.180206266078358 > 0.1)
-(5.123897292162326 > 0.1)
-(4.24254034724521 > 0.1)
-(3.508736868355925 > 0.1)
-(2.8989345554258534 > 0.1)
-(2.3930120440829796 > 0.1)
-(1.9738735796704532 > 0.1)
-(1.6270638320042963 > 0.1)
-(1.3404108007531477 > 0.1)
-(1.103701663739173 > 0.1)
-(0.9083935811094079 > 0.1)
-(0.7473592909606737 > 0.1)
-(0.6146658513113152 > 0.1)
-(0.5053839897120724 > 0.1)
-(0.4154250710587917 > 0.1)
-(0.3414025532433129 > 0.1)
-(0.2805148621152398 > 0.1)
-(0.2304468020143986 > 0.1)
-(0.1892868693016112 > 0.1)
-(0.15545811562623157 > 0.1)
-(0.127660490264702

In [61]:
use plotters::prelude::*;
evcxr_figure((500,500), |root| {
    // Do the drawings
    let child_drawing_areas = root.split_evenly((20, 20));
    
    root.fill(&WHITE)?;
    let mut chart = ChartBuilder::on(&root)
        .caption("Actions", ("sans-serif", 80))
        .margin(5)
        .top_x_label_area_size(40)
        .y_label_area_size(40)
        .build_ranged(0i32..21i32, 21i32..0i32)?;

    chart
        .configure_mesh()
        .x_labels(20)
        .y_labels(20)
        .x_desc("cars at location 1")
        .y_desc("cars at location 2")
//        .x_label_offset(0)
//        .y_label_offset(0)
        .disable_x_mesh()
        .disable_y_mesh()
        .label_style(("sans-serif", 20))
        .draw()?;
    
    chart.draw_series(
        car_env.states().iter().map(|s| {
            let (x, y) = s;
            let a = *policy.get(s).unwrap();
            Rectangle::new(
                [(*x, *y), (*x + 1, *y + 1)],
                HSLColor((a+5) as f64 / 11.0, 1.0, 0.5).filled(),
            )
        })
    )?;

    // Tell plotters that everything is ok
    Ok(())
}).style("width: 400px")

## visualizing value

In [59]:
evcxr_figure((500,500), |root| {
    // Do the drawings
    let child_drawing_areas = root.split_evenly((20, 20));
    
    root.fill(&WHITE)?;
    let mut chart = ChartBuilder::on(&root)
        .caption("Values", ("sans-serif", 80))
        .margin(5)
        .top_x_label_area_size(40)
        .y_label_area_size(40)
        .build_ranged(0i32..21i32, 21i32..0i32)?;

    chart
        .configure_mesh()
        .x_labels(20)
        .y_labels(20)
  //      .x_label_offset(0)
  //      .y_label_offset(0)
        .disable_x_mesh()
        .disable_y_mesh()
        .label_style(("sans-serif", 20))
        .draw()?;
    
    chart.draw_series(
        car_env.states().iter().map(|&(x, y)| {
            let v = *value.get(&(x,y)).unwrap();
            Rectangle::new(
                [(x, y), (x + 1, y + 1)],
                HSLColor((v - 400.0) / 300.0, 1.0, 0.5).filled(),
            )
        })
    )?;

    // Tell plotters that everything is ok
    Ok(())
}).style("width: 400px")

In [60]:
println!("{:#?}", *value.get(&(20,20)).unwrap())

638.7627040602802


()

In [52]:
println!("{:#?}", *policy.get(&(20,0)).unwrap())

5


()