# Dynamic Programming Algorithms - Experiments - Chapter 5 and 6.2

In [None]:
import time
import pickle
import numpy as np
import matplotlib.pyplot as plt

from typing import List, Optional
from shapely.geometry import Polygon, MultiPolygon, Point

from experimental.utils.dynamic_system import DynamicSystem
from experimental.utils.markov_decision_process import MarkovDecisionProcess

In [None]:
A = np.array([[1,1], [1,0]])

In [None]:
def phi(x: np.array) -> np.array:
    x_new = np.dot(A, x) % 1
    return x_new

In [None]:
def q(x: np.array) -> np.array:
    return x % 1

In [None]:
# get eigenvalues and right eigenvectors, i.e. transposed eigenvectors
eig_vals, eig_vects = np.linalg.eig(A)
# retrieve eigenvectors from right eigenvectors
eig_vects = np.transpose(eig_vects)

In [None]:
print(f"Eigenvalues: {eig_vals}")

In [None]:
print(f"Eigenvectors: {eig_vects}")

In [None]:
v1 = eig_vects[0]
v2 = eig_vects[1]

In [None]:
def perp(a) :
    b = np.empty_like(a)
    b[0] = -a[1]
    b[1] = a[0]
    return b

# line segment a given by endpoints a1, a2
# line segment b given by endpoints b1, b2
def seg_intersect(a, b) :
    a1, a2 = a
    b1, b2 = b

    da = a2 - a1
    db = b2 - b1
    dp = a1 - b1
    dap = perp(da)
    denom = np.dot(dap, db)
    num = np.dot(dap, dp)

    return (num / denom.astype(float))*db + b1

In [None]:
u1 = np.array([0,0])
u2 = np.array([0,1])
u3 = np.array([1,1])
u4 = np.array([1,0])

In [None]:
lu1 = np.array([[0,0], [0,1]])
lu2 = np.array([[0,1], [1,1]])
lu3 = np.array([[1,1], [1,0]])
lu4 = np.array([[1,0], [0,0]])

In [None]:
p1 = u1 + (1/v1[0]) * v1
p2 = u2 - (1/v2[1]) * v2
p3 = u3 - (1/v1[0]) * v1
p4 = u4 + (1/v2[1]) * v2

In [None]:
l_00 = np.array([u1, p1])
l_01 = np.array([u2, p2])
l_10 = np.array([u4, p4])
l_11 = np.array([u3, p3])

In [None]:
P1a = seg_intersect(l_00, l_01)
P1b = seg_intersect(l_10, l_00)

P3a = seg_intersect(l_10, l_11)

symm_helper_x = seg_intersect(lu2, l_10)
symm_helper_y = symm_helper_x - np.array([0,1])
symm_helper_l11 = np.array([symm_helper_x, symm_helper_y])

p1_symm = symm_helper_y + v2
l_10_symm_extension = np.array([symm_helper_y, p1_symm])
P3b = seg_intersect(l_10_symm_extension, l_00)
l_10_symm_extension = np.array([symm_helper_y, P3b])

In [None]:
l_00 = np.array([u1, P1b])
l_01 = np.array([u2, P1a])
l_10 = np.array([u4, p4])
l_11 = np.array([u3, P3a])

In [None]:
# plot unit square
plt.plot(lu1[:, 0], lu1[:, 1], "r-")
plt.plot(lu2[:, 0], lu2[:, 1], "r-")
plt.plot(lu3[:, 0], lu3[:, 1], "r-")
plt.plot(lu4[:, 0], lu4[:, 1], "r-")

# step 1: plot l_10 in contracting direction
plt.plot(l_10[:, 0], l_10[:, 1], "b-")

# step 2: plot l_00 and l_11 in expanding directions
plt.plot(l_00[:, 0], l_00[:, 1], "b-")
plt.plot(l_11[:, 0], l_11[:, 1], "b-")

# step 3: plot l_01 in contracting direction 
plt.plot(l_01[:, 0], l_01[:, 1], "b-")

# step 4: plot symmetric extension of l_01 line
plt.plot(symm_helper_l11[:, 0], symm_helper_l11[:, 1], "m--")
plt.plot(l_10_symm_extension[:, 0], l_10_symm_extension[:, 1], "b-")

# plot intersection points
plt.plot(P1a[0], P1a[1], "bo")
plt.plot(P1b[0], P1b[1], "ro")
plt.plot(P3a[0], P3a[1], "go")
plt.plot(P3b[0], P3b[1], "yo")

plt.show()

In [None]:
P1A = Polygon([P1a, u1, u2])
P1B = Polygon([P3a, u3, u4])
P1 = MultiPolygon([P1A, P1B])

In [None]:
P2A = Polygon([P3b, u1, symm_helper_l11[1]])
P2B = Polygon([symm_helper_l11[0], u2, P1a, P1b])
P2 = MultiPolygon([P2A, P2B])

In [None]:
P3A = Polygon([P1b, u4, symm_helper_l11[1], P3b])
P3B = Polygon([symm_helper_l11[0], u3, P3a])
P3 = MultiPolygon([P3A, P3B])

In [None]:
P1_vertices = [P1a, u1, u2, P3a]
P2_vertices = [P3b, u1, P1a, P1b]
P3_vertices = [P1b, u4, P3b, P3a]
partition_vertices = [P1_vertices, P2_vertices, P3_vertices]

In [None]:
partition = [P1, P2, P3]

In [None]:
MultiPolygon([P1A, P1B, P2A, P2B, P3A, P3B])

In [None]:
P1A_phi = Polygon([P1b, u4, u1])
P1B_phi = Polygon([u2, u3, phi(P3a)])
P1_phi = MultiPolygon([P1A_phi, P1B_phi])

In [None]:
symm_helper_x = seg_intersect(lu3, l_00)
symm_helper_y = q(symm_helper_x)
symm_helper_l00 = np.array([symm_helper_x, symm_helper_y])

In [None]:
P2A_phi = Polygon([symm_helper_l00[1], phi(P1b), P1a, u1])
P2B_phi = Polygon([symm_helper_l00[0], u4, P1b])
P2_phi = MultiPolygon([P2A_phi, P2B_phi])

In [None]:
P3A_phi = Polygon([phi(P3a), P1a, symm_helper_l00[0], u3])
P3B_phi = Polygon([u2, phi(P1b), symm_helper_l00[1]])
P3_phi = MultiPolygon([P3A_phi, P3B_phi])

In [None]:
MultiPolygon([P1A_phi, P1B_phi, P2A_phi, P2B_phi, P3A_phi, P3B_phi])

In [None]:
partition_phi = [P1_phi, P2_phi, P3_phi]

In [None]:
def phi_non_identified(x: np.array) -> np.array:
    return np.dot(A, x)

In [None]:
def d_phi(x: np.array) -> np.array:
    return np.transpose(A)

In [None]:
m_id = 1
gamma = 0.8
c = 3000
tau = 0.0001
target_state = np.array([0.5, 0.5])
dynamic_system = DynamicSystem(phi_non_identified, d_phi, m_id)
markov_decision_process = MarkovDecisionProcess(dynamic_system, partition, partition_vertices=partition_vertices, target_state=target_state, gamma=gamma)

In [None]:
markov_decision_process.estimate_probability_matrix_pi_method(c=c, tau=tau, max_sample_trials=1000)

In [None]:
prob_mat_ground_truth = markov_decision_process.ground_truth_probability_matrix(partition_phi)

In [None]:
prob_mat_ground_truth

In [None]:
markov_decision_process.transition_prob_matrix

In [None]:
epsilon = 2 * np.finfo(float).eps
V, num_iters = markov_decision_process.policy_evaluation(markov_decision_process.g, epsilon=epsilon)
print(f"Value function: {V}")
print(f"Number of iterations until convergence: {num_iters}")