In [None]:
;;
#require "pkp"

In [None]:
open Pkp.Reinforcement_learning

In [None]:
module MySolution = struct
  let other_mark = Solution.other_mark
  let empty_board = Solution.empty_board
  let transpose = Solution.transpose
  let is_full = Solution.is_full
  let mean = Solution.mean
  let play_game = Solution.play_game
end

In [None]:
module M = Make (MySolution)

Let's run an example game between the optimal X player, and a random O player:

In [None]:
let result = M.play (M.random O, M.optimal X)

Try running it several times to convince yourself that the optimal player does pretty well! Try also reversing the order of the players, so that the O player gets to start. What do you notice? Does the optimal player ever lose?

-----

Let us now define an O-player that interpolates between the optimal player (which is as strong as our optimal X-player) and a random player: every time it needs to chose a new board, it makes a random choice with probability $p$, and the optimal choice with probability $(1-p)$:

In [None]:
let playerO p =
 let dumb = M.random O in
 let opt = M.optimal O in
 let play b = if Random.float 1. < p then dumb.play b else opt.play b in
 { mark = O; play }

Let us try out such an intermediate player -- you may want to play with parameter $p$, and swap the player ordering:

In [None]:
let result = M.play (playerO 0.5, playerX)

Let us now run many games, collect some statistics, and make pretty plots!

In [None]:
let stats p =
  let playerO = playerO p in
  let n_games = 10000 in
  let games =   List.init n_games (fun _ -> M.play ~display:false (playerO, playerX)) in
  let n_wins = games |> List.filter (fun winner -> winner = Some X) |> List.length in
  let n_ties = games |> List.filter (fun winner -> winner = None) |> List.length in
  float n_wins /. float n_games, float n_ties /. float n_games

In [None]:
let () =
  let open Owl in
  let ps = Mat.linspace 0. 1. 20 in
  let results = ps |> Mat.to_array |> Array.map stats in
  let wins = results |> Array.map fst |> fun v -> Mat.of_array v 1 (-1) in
  let ties = results |> Array.map snd |> fun v -> Mat.of_array v 1 (-1) in
  let lose = Mat.(1. $- wins + ties) in
  let open Gp in
  let figure (module P : Plot) =
    P.plots
      [ item (L [ ps; wins ]) ~style:"lp pt 7 lc 7 ps 0.6" ~legend:"win"
      ; item (L [ ps; ties ]) ~style:"lp pt 7 lc 8 ps 0.6" ~legend:"tie"
      ; item (L [ ps; lose ]) ~style:"lp pt 7 lc 3 ps 0.6" ~legend:"lose"
      ]
      [ barebone
      ; set "key at graph 1.1, graph 1 top left"
      ; tics "out nomirror"
      ; borders [ `bottom; `left ]
      ; xlabel "probability of opponent playing randomly"
      ; ylabel "win / tie probabilities"
      ; margins [ `right 0.6 ]
      ]
  in
  Juplot.draw ~fmt:`svg ~size:(500, 200) figure

Take-home exercise: redo the stats above, in the case where the order of play gets decided randomly (50-50) at the beginning of every game!