In [2]:
//Install plotters
:dep plotters = { git = "https://github.com/38/plotters", default_features = false, features = ["evcxr"] }
// Load external source code
:dep e4_7 = {path = "e4_7"}

Since python is to slow to run this kind of policy iteration algorithm in a reasonable time frame here is a rust implementation.

The implementation here has a few extra aspects not seen in the sudo code in RL 2018. There are two cases in which we want to stop 
1. if the policy is stable
2. if we are cycling trough policies with the same value.

Since we can only aproximate the value we write a check to see if the value has aproximatly stoped changing.
This introduces two new hyper-perameters $\alpha$ and $\epsilon$. Here $\alpha$ weights changes in value in past iterations and $\epsilon$ messures how close to no change is needed before we stop the algorithm.

In [19]:
:opt 2
use e4_7::*;
let (policy, value) = car_env().policy_iteration(
    0.1, // theta
    0.4, // alpha
    1e-4, // epsilon
    Some(0), //start action
    Some(10) //cut off after 10 iterations
).unwrap();

Optimization: 2


Starting evaluation
-(131.13392504059757 > 0.1)
-(94.54245687794872 > 0.1)
-(62.20094644952451 > 0.1)
-(37.737737293921555 > 0.1)
-(21.03208151152495 > 0.1)
-(10.988540657823194 > 0.1)
-(5.4191223215061655 > 0.1)
-(2.5378347718998953 > 0.1)
-(1.1436516815742266 > 0.1)
-(0.4952235144068595 > 0.1)
-(0.2093248570794799 > 0.1)
Starting improvement
-(1/441)
-(31/441)
-(61/441)
-(91/441)
-(121/441)
-(151/441)
-(181/441)
-(211/441)
-(241/441)
-(271/441)
-(301/441)
-(331/441)
-(361/441)
-(391/441)
-(421/441)
Completed 1 iteration(s).
Starting evaluation
-(331.2169755034793 > 0.1)
-(52.247061650934086 > 0.1)
-(23.46741069832251 > 0.1)
-(14.18420465899942 > 0.1)
-(8.921920069641146 > 0.1)
-(5.425394417237953 > 0.1)
-(3.1942514366742785 > 0.1)
-(1.8412451490851254 > 0.1)
-(1.0526828169266764 > 0.1)
-(0.5952305099934847 > 0.1)
-(0.33386244926083464 > 0.1)
-(0.18624316886206316 > 0.1)
-(0.10334290555374537 > 0.1)
Starting improvement
-(1/441)
-(31/441)
-(61/441)
-(91/441)
-(121/441)
-(151/441)
-(18

## visualizing the actions

In [20]:
use plotters::prelude::*;
evcxr_figure((500,500), |root| {
    // Do the drawings
    let child_drawing_areas = root.split_evenly((20, 20));
    
    root.fill(&WHITE)?;
    let mut chart = ChartBuilder::on(&root)
        .caption("Actions", ("sans-serif", 80))
        .margin(5)
        .top_x_label_area_size(40)
        .y_label_area_size(40)
        .build_ranged(0i32..21i32, 21i32..0i32)?;

    chart
        .configure_mesh()
        .x_labels(20)
        .y_labels(20)
        .x_desc("cars at location 1")
        .y_desc("cars at location 2")
//        .x_label_offset(0)
//        .y_label_offset(0)
        .disable_x_mesh()
        .disable_y_mesh()
        .label_style(("sans-serif", 20))
        .draw()?;
    
    chart.draw_series(
        policy.iter().map(|((x, y), a)| {
            Rectangle::new(
                [(*x, *y), (*x + 1, *y + 1)],
                HSLColor((a+5) as f64 / 11.0, 1.0, 0.5).filled(),
            )
        })
    )?;

    // Tell plotters that everything is ok
    Ok(())
}).style("width: 400px")

## visualizing value

In [21]:
evcxr_figure((500,500), |root| {
    // Do the drawings
    let child_drawing_areas = root.split_evenly((20, 20));
    
    root.fill(&WHITE)?;
    let mut chart = ChartBuilder::on(&root)
        .caption("Values", ("sans-serif", 80))
        .margin(5)
        .top_x_label_area_size(40)
        .y_label_area_size(40)
        .build_ranged(0i32..21i32, 21i32..0i32)?;

    chart
        .configure_mesh()
        .x_labels(20)
        .y_labels(20)
  //      .x_label_offset(0)
  //      .y_label_offset(0)
        .disable_x_mesh()
        .disable_y_mesh()
        .label_style(("sans-serif", 20))
        .draw()?;
    
    chart.draw_series(
        value.iter().map(|((x, y), v)| {
            Rectangle::new(
                [(*x, *y), (*x + 1, *y + 1)],
                HSLColor(v / 600.0, 1.0, 0.5).filled(),
            )
        })
    )?;

    // Tell plotters that everything is ok
    Ok(())
}).style("width: 400px")

In [22]:
println!("{:#?}", value[&(19,19)])

333.09847712126054


()

In [7]:
println!("{:#?}", policy)

{
    (
        3,
        15,
    ): -3,
    (
        11,
        0,
    ): 0,
    (
        12,
        3,
    ): 3,
    (
        7,
        18,
    ): -2,
    (
        19,
        10,
    ): 1,
    (
        0,
        4,
    ): 0,
    (
        18,
        6,
    ): 2,
    (
        16,
        5,
    ): 4,
    (
        19,
        12,
    ): 1,
    (
        3,
        10,
    ): -2,
    (
        6,
        15,
    ): -2,
    (
        6,
        17,
    ): -3,
    (
        16,
        1,
    ): 1,
    (
        9,
        11,
    ): 1,
    (
        17,
        0,
    ): 0,
    (
        7,
        20,
    ): 0,
    (
        12,
        13,
    ): 1,
    (
        19,
        9,
    ): 1,
    (
        8,
        6,
    ): 3,
    (
        9,
        3,
    ): 3,
    (
        20,
        6,
    ): 0,
    (
        8,
        12,
    ): 0,
    (
        2,
        3,
    ): 1,
    (
        13,
        11,
    ): 3,
    (
        17,
        12,
    ): 3,
    (
        18,

()

        19,
        8,
    ): 1,
    (
        13,
        19,
    ): -1,
    (
        5,
        1,
    ): 1,
    (
        5,
        10,
    ): -1,
    (
        10,
        3,
    ): 3,
    (
        6,
        1,
    ): 1,
    (
        6,
        13,
    ): -2,
    (
        18,
        7,
    ): 2,
    (
        0,
        15,
    ): 0,
    (
        4,
        3,
    ): 2,
    (
        10,
        19,
    ): -1,
    (
        1,
        12,
    ): -1,
    (
        11,
        7,
    ): 4,
    (
        6,
        9,
    ): 0,
    (
        15,
        1,
    ): 1,
    (
        11,
        17,
    ): -1,
    (
        16,
        16,
    ): 1,
    (
        18,
        19,
    ): 1,
    (
        19,
        4,
    ): 1,
    (
        20,
        20,
    ): 0,
    (
        5,
        14,
    ): -3,
    (
        10,
        8,
    ): 3,
    (
        11,
        5,
    ): 5,
    (
        16,
        10,
    ): 4,
    (
        10,
        0,
    ): 0,
    (
        18,
   

    ): -2,
    (
        2,
        5,
    ): 0,
    (
        0,
        9,
    ): 0,
    (
        4,
        20,
    ): 0,
    (
        1,
        8,
    ): -1,
    (
        5,
        11,
    ): -1,
    (
        7,
        7,
    ): 2,
    (
        1,
        1,
    ): 1,
    (
        10,
        9,
    ): 2,
    (
        11,
        4,
    ): 4,
    (
        11,
        19,
    ): -1,
    (
        11,
        3,
    ): 3,
    (
        15,
        2,
    ): 2,
    (
        1,
        10,
    ): -1,
    (
        20,
        5,
    ): 0,
    (
        7,
        16,
    ): -2,
    (
        7,
        19,
    ): -1,
    (
        10,
        2,
    ): 2,
    (
        10,
        12,
    ): 1,
    (
        4,
        1,
    ): 1,
    (
        10,
        18,
    ): -2,
    (
        11,
        9,
    ): 3,
    (
        8,
        7,
    ): 2,
    (
        13,
        20,
    ): 0,
    (
        14,
        14,
    ): 2,
    (
        20,
        7,
    ): 0,
    (
   