# Dynamic Programming and Optimal Control
## Problem Set 2, Problem 3.b

**Python script that solves Problem 3.b of Problem Set 2 by applying value iteration. Problem is taken from the book "Dynamic Programming and Optimal Control", Vol. 1, by D. Bertsekas. (Page 445, Problem 7.1b)** 

**We use [NumPy](https://numpy.org/) and [matplotlib](https://matplotlib.org/) packages. You can install these packages using `pip install` or `conda install` command depending on your package manager. You can also find the installation guide in the package websites or documentations.**

In [None]:
import numpy as np
import matplotlib.pyplot as plt

### Parameters

In [None]:
# Landing probability information
p = np.ndarray(2)

# Winning probability information
q = np.ndarray(2)

# Landing probability
p[1] = 0.95  # slow serve

# Winning probability
q[0] = 0.6  # fast serve
q[1] = 0.4  # slow serve

# Define value iteration error bound
err = 1e-100

# Define vector of incremental values for p[0]
p_incr_vec = np.linspace(0, 1, 21)

prob_win_vec = np.zeros(p_incr_vec.shape)

### Value Iteration

In [None]:
for p_incr in p_incr_vec:
    # PARAMETERS
    # Landing probability
    p[0] = p_incr

    # INITIALIZE PROBLEM
    # Our state space is S = {0,1,2,3}x{0,1,2,3}x{1,2}
    # i.e. x_k = [score player 1, score player 2, serve]

    # Initialize costs to 1
    # (although any value would do)
    J = np.ones([4, 4, 2])

    # Initialize the optimal control policy
    # 0 represents Fast serve, 1 represents Slow serve
    FVal = np.zeros([4, 4, 2])

    # Initialize cost-to-go
    cost_to_go = np.zeros([4, 4, 2])

    # Iterate until cost has converged
    iter = 0

    while 1:
        # Increase counter
        iter = iter + 1

        # Update the value
        for i in range(3):
            cost_to_go[3, i, 0] = np.max(
                q * p + (1 - q) * p * J[3, i + 1, 0] + (1 - p) * J[3, i, 1]
            )
            FVal[3, i, 0] = np.argmax(
                q * p + (1 - q) * p * J[3, i + 1, 0] + (1 - p) * J[3, i, 1]
            )
            cost_to_go[3, i, 1] = np.max(q * p + (1 - q * p) * J[3, i + 1, 0])
            FVal[3, i, 1] = np.argmax(q * p + (1 - q * p) * J[3, i + 1, 0])
            cost_to_go[i, 3, 0] = np.max(q * p * J[i + 1, 3, 0] + (1 - p) * J[i, 3, 1])
            FVal[i, 3, 0] = np.argmax(q * p * J[i + 1, 3, 0] + (1 - p) * J[i, 3, 1])
            cost_to_go[i, 3, 1] = np.max(q * p * J[i + 1, 3, 0])
            FVal[i, 3, 1] = np.argmax(q * p * J[i + 1, 3, 0])
            for j in range(3):
                cost_to_go[i, j, 0] = np.amax(
                    q * p * J[i + 1, j, 0]
                    + (1 - q) * p * J[i, j + 1, 0]
                    + (1 - p) * J[i, j, 1]
                )
                FVal[i, j, 0] = np.argmax(
                    q * p * J[i + 1, j, 0]
                    + (1 - q) * p * J[i, j + 1, 0]
                    + (1 - p) * J[i, j, 1]
                )
                cost_to_go[i, j, 1] = np.amax(
                    q * p * J[i + 1, j, 0] + (1 - q * p) * J[i, j + 1, 0]
                )
                FVal[i, j, 1] = np.argmax(
                    q * p * J[i + 1, j, 0] + (1 - q * p) * J[i, j + 1, 0]
                )
        cost_to_go[3, 3, 0] = np.max(
            q * p * J[3, 2, 0] + (1 - q) * p * J[2, 3, 0] + (1 - p) * J[3, 3, 1]
        )
        FVal[3, 3, 0] = np.argmax(
            q * p * J[3, 2, 0] + (1 - q) * p * J[2, 3, 0] + (1 - p) * J[3, 3, 1]
        )
        cost_to_go[3, 3, 1] = np.max(q * p * J[3, 2, 0] + (1 - q * p) * J[2, 3, 0])
        FVal[3, 3, 1] = np.argmax(q * p * J[3, 2, 0] + (1 - q * p) * J[2, 3, 0])

        # Check if cost has converged
        if np.max(np.abs(J - cost_to_go)) / np.max(np.abs(cost_to_go)) < err:
            # Update cost and break
            # PS: use np.copy() to assign value of cost_to_go to J,
            # otherwise J will only be a reference of cost_to_go.
            J = np.copy(cost_to_go)
            break
        else:
            # Update cost
            J = np.copy(cost_to_go)

    # Probability of player 1 winning the game
    prob_win_vec[p_incr == p_incr_vec] = J[0, 0, 0]

    # Display
    print(
        "Terminated after",
        iter,
        "iterations:",
        " For p_F = ",
        format(p_incr, ".2f"),
        ", probability of winning is",
        format(J[0, 0, 0], ".2f"),
    )

### Plot results

In [None]:
plt.plot(p_incr_vec, prob_win_vec, linestyle="-", marker="*", color="b")
plt.title("Probability of the server winning a game")
plt.xlabel("p_F")
plt.ylabel("Probability of winning")
plt.show()