# Dynamic Programming Algorithms - Experiments - Chapter 5 and 6.2

In [None]:
import os
import time
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from typing import List, Optional
from shapely.geometry import Polygon, MultiPolygon, Point

from experimental.utils.partition import Partition
from experimental.utils.dynamic_system import DynamicSystem
from experimental.utils.markov_decision_process import MarkovDecisionProcess

In [None]:
sns.set_context("paper")
sns.set_style("whitegrid")

## Build equally-spaced state space partitions

In [None]:
# load baseline partitions from disk, if they already exist
if os.path.isfile("results/baseline_3_partitions.pkl"):
    with open("results/baseline_3_partitions.pkl", "rb") as file:
        baseline_partitions = pickle.load(file)
else:
    baseline_partitions = {}

baseline_partitions.keys()

In [None]:
n = 3
m_id = 1
equally_spaced_offset_x = np.array([m_id/3, 0])
equally_spaced_offset_y = np.array([0, m_id/3])

In [None]:
u1 = np.array([0,0])
u2 = np.array([0,1])
u3 = np.array([1,1])
u4 = np.array([1,0])

In [None]:
x1 = u1 + equally_spaced_offset_x
x2 = u1 + 2*equally_spaced_offset_x
x3 = u2 + equally_spaced_offset_x
x4 = u2 + 2*equally_spaced_offset_x

In [None]:
X1_vertices = [u1, u2, x3, x1]
X1 = MultiPolygon([Polygon(X1_vertices)])

X2_vertices = [x3, x4, x2, x1]
X2 = MultiPolygon([Polygon(X2_vertices)])

X3_vertices = [x4, u3, u4, x2]
X3 = MultiPolygon([Polygon(X3_vertices)])

In [None]:
MultiPolygon([Polygon([u1, u2, x3, x1]), Polygon([x3, x4, x2, x1]), Polygon([x4, u3, u4, x2])])

In [None]:
horizontal_partition = [X1, X2, X3]
horizontal_partition_vertices = [X1_vertices, X2_vertices, X3_vertices]

baseline_partitions["horizontal"] = {}
baseline_partitions["horizontal"]["partition"] = horizontal_partition
baseline_partitions["horizontal"]["partition_vertices"] = horizontal_partition_vertices

In [None]:
y1 = u1 + equally_spaced_offset_y
y2 = u1 + 2*equally_spaced_offset_y
y3 = u4 + equally_spaced_offset_y
y4 = u4 + 2*equally_spaced_offset_y

In [None]:
Y1_vertices = [u1, y1, y3, u4]
Y1 = MultiPolygon([Polygon(Y1_vertices)])

Y2_vertices = [y1, y2, y4, y3]
Y2 = MultiPolygon([Polygon(Y2_vertices)])

Y3_vertices = [y2, u2, u3, y4]
Y3 = MultiPolygon([Polygon(Y3_vertices)])

In [None]:
MultiPolygon([Polygon([u1, y1, y3, u4]), Polygon([y1, y2, y4, y3]), Polygon([y2, u2, u3, y4])])

In [None]:
vertical_partition = [Y1, Y2, Y3]
vertical_partition_vertices = [Y1_vertices, Y2_vertices, Y3_vertices]

baseline_partitions["vertical"] = {}
baseline_partitions["vertical"]["partition"] = vertical_partition
baseline_partitions["vertical"]["partition_vertices"] = vertical_partition_vertices

In [None]:
x_halved = np.array([m_id/2, 0])

In [None]:
A1_vertices = [u2, u3, y4, y2]
A1 = MultiPolygon([Polygon(A1_vertices)])

A2_vertices = [u1, y2, y2+x_halved, u1+x_halved]
A2 = MultiPolygon([Polygon(A2_vertices)])

A3_vertices = [u1+x_halved, y2+x_halved, y4, u4]
A3 = MultiPolygon([Polygon(A3_vertices)])

In [None]:
MultiPolygon([Polygon([u2, u3, y4, y2]), Polygon([u1, y2, y2+x_halved, u1+x_halved]), Polygon([u1+x_halved, y2+x_halved, y4, u4])])

In [None]:
complex_partition_1 = [A1, A2, A3]
complex_partition_vertices_1 = [A1_vertices, A2_vertices, A3_vertices]

baseline_partitions["complex_1"] = {}
baseline_partitions["complex_1"]["partition"] = complex_partition_1
baseline_partitions["complex_1"]["partition_vertices"] = complex_partition_vertices_1

In [None]:
y_halved = np.array([0, m_id/2])

In [None]:
B1_vertices = [u1, u1+y_halved, x2+y_halved, x2]
B1 = MultiPolygon([Polygon(B1_vertices)])

B2_vertices = [u1+y_halved, u2, x4, x2+y_halved]
B2 = MultiPolygon([Polygon(B2_vertices)])

B3_vertices = [x4, u3, u4, x2]
B3 = MultiPolygon([Polygon(B3_vertices)])

In [None]:
MultiPolygon([Polygon([u1, u1+y_halved, x2+y_halved, x2]), Polygon([u1+y_halved, u2, x4, x2+y_halved]), Polygon([x4, u3, u4, x2])])

In [None]:
complex_partition_2 = [B1, B2, B3]
complex_partition_vertices_2 = [B1_vertices, B2_vertices, B3_vertices]

baseline_partitions["complex_2"] = {}
baseline_partitions["complex_2"]["partition"] = complex_partition_2
baseline_partitions["complex_2"]["partition_vertices"] = complex_partition_vertices_2

In [None]:
baseline_partitions.keys()

## Test Policy Evaluation Algorithm with standard example dynamic system

In [None]:
A = np.array([[1,1], [1,0]])

In [None]:
def phi(x: np.array) -> np.array:
    x_new = np.dot(A, x) % 1
    return x_new

In [None]:
def q(x: np.array) -> np.array:
    return x % 1

In [None]:
# get eigenvalues and right eigenvectors, i.e. transposed eigenvectors
eig_vals, eig_vects = np.linalg.eig(A)
# retrieve eigenvectors from right eigenvectors
eig_vects = np.transpose(eig_vects)

In [None]:
print(f"Eigenvalues: {eig_vals}")

In [None]:
print(f"Eigenvectors: {eig_vects}")

In [None]:
v1 = eig_vects[0]
v2 = eig_vects[1]

In [None]:
def perp(a) :
    b = np.empty_like(a)
    b[0] = -a[1]
    b[1] = a[0]
    return b

# line segment a given by endpoints a1, a2
# line segment b given by endpoints b1, b2
def seg_intersect(a, b) :
    a1, a2 = a
    b1, b2 = b

    da = a2 - a1
    db = b2 - b1
    dp = a1 - b1
    dap = perp(da)
    denom = np.dot(dap, db)
    num = np.dot(dap, dp)

    return (num / denom.astype(float))*db + b1

In [None]:
u1 = np.array([0,0])
u2 = np.array([0,1])
u3 = np.array([1,1])
u4 = np.array([1,0])

In [None]:
lu1 = np.array([[0,0], [0,1]])
lu2 = np.array([[0,1], [1,1]])
lu3 = np.array([[1,1], [1,0]])
lu4 = np.array([[1,0], [0,0]])

In [None]:
p1 = u1 + (1/v1[0]) * v1
p2 = u2 - (1/v2[1]) * v2
p3 = u3 - (1/v1[0]) * v1
p4 = u4 + (1/v2[1]) * v2

In [None]:
l_00 = np.array([u1, p1])
l_01 = np.array([u2, p2])
l_10 = np.array([u4, p4])
l_11 = np.array([u3, p3])

In [None]:
P1a = seg_intersect(l_00, l_01)
P1b = seg_intersect(l_10, l_00)

P3a = seg_intersect(l_10, l_11)

symm_helper_x = seg_intersect(lu2, l_10)
symm_helper_y = symm_helper_x - np.array([0,1])
symm_helper_l11 = np.array([symm_helper_x, symm_helper_y])

p1_symm = symm_helper_y + v2
l_10_symm_extension = np.array([symm_helper_y, p1_symm])
P3b = seg_intersect(l_10_symm_extension, l_00)
l_10_symm_extension = np.array([symm_helper_y, P3b])

In [None]:
l_00 = np.array([u1, P1b])
l_01 = np.array([u2, P1a])
l_10 = np.array([u4, p4])
l_11 = np.array([u3, P3a])

In [None]:
# plot unit square
plt.plot(lu1[:, 0], lu1[:, 1], "r-")
plt.plot(lu2[:, 0], lu2[:, 1], "r-")
plt.plot(lu3[:, 0], lu3[:, 1], "r-")
plt.plot(lu4[:, 0], lu4[:, 1], "r-")

# step 1: plot l_10 in contracting direction
plt.plot(l_10[:, 0], l_10[:, 1], "b-")

# step 2: plot l_00 and l_11 in expanding directions
plt.plot(l_00[:, 0], l_00[:, 1], "b-")
plt.plot(l_11[:, 0], l_11[:, 1], "b-")

# step 3: plot l_01 in contracting direction 
plt.plot(l_01[:, 0], l_01[:, 1], "b-")

# step 4: plot symmetric extension of l_01 line
plt.plot(symm_helper_l11[:, 0], symm_helper_l11[:, 1], "m--")
plt.plot(l_10_symm_extension[:, 0], l_10_symm_extension[:, 1], "b-")

# plot intersection points
plt.plot(P1a[0], P1a[1], "bo")
plt.plot(P1b[0], P1b[1], "ro")
plt.plot(P3a[0], P3a[1], "go")
plt.plot(P3b[0], P3b[1], "yo")

plt.show()

In [None]:
P1A = Polygon([P1a, u1, u2])
P1B = Polygon([P3a, u3, u4])
P1 = MultiPolygon([P1A, P1B])

In [None]:
P2A = Polygon([P3b, u1, symm_helper_l11[1]])
P2B = Polygon([symm_helper_l11[0], u2, P1a, P1b])
P2 = MultiPolygon([P2A, P2B])

In [None]:
P3A = Polygon([P1b, u4, symm_helper_l11[1], P3b])
P3B = Polygon([symm_helper_l11[0], u3, P3a])
P3 = MultiPolygon([P3A, P3B])

In [None]:
P1_vertices = [P1a, u1, u2, P3a]
P2_vertices = [P3b, u1, P1a, P1b]
P3_vertices = [P1b, u4, P3b, P3a]
partition_vertices = [P1_vertices, P2_vertices, P3_vertices]

In [None]:
partition = [P1, P2, P3]

In [None]:
MultiPolygon([P1A, P1B, P2A, P2B, P3A, P3B])

In [None]:
P1A_phi = Polygon([P1b, u4, u1])
P1B_phi = Polygon([u2, u3, phi(P3a)])
P1_phi = MultiPolygon([P1A_phi, P1B_phi])

In [None]:
symm_helper_x = seg_intersect(lu3, l_00)
symm_helper_y = q(symm_helper_x)
symm_helper_l00 = np.array([symm_helper_x, symm_helper_y])

In [None]:
P2A_phi = Polygon([symm_helper_l00[1], phi(P1b), P1a, u1])
P2B_phi = Polygon([symm_helper_l00[0], u4, P1b])
P2_phi = MultiPolygon([P2A_phi, P2B_phi])

In [None]:
P3A_phi = Polygon([phi(P3a), P1a, symm_helper_l00[0], u3])
P3B_phi = Polygon([u2, phi(P1b), symm_helper_l00[1]])
P3_phi = MultiPolygon([P3A_phi, P3B_phi])

In [None]:
MultiPolygon([P1A_phi, P1B_phi, P2A_phi, P2B_phi, P3A_phi, P3B_phi])

In [None]:
partition_phi = [P1_phi, P2_phi, P3_phi]

In [None]:
def phi_non_identified(x: np.array) -> np.array:
    return np.dot(A, x)

In [None]:
def d_phi(x: np.array) -> np.array:
    return np.transpose(A)

In [None]:
m_id = 1
gamma = 0.8
c = 3000
tau = 0.0001
target_state = np.array([0.5, 0.5])
dynamic_system = DynamicSystem(phi_non_identified, d_phi, m_id)
markov_decision_process = MarkovDecisionProcess(dynamic_system, partition, partition_vertices=partition_vertices, target_state=target_state, gamma=gamma)

In [None]:
# add markov partition to overall experiment dictionary
baseline_partitions["markov_1"] = {}
baseline_partitions["markov_1"]["partition"] = partition
baseline_partitions["markov_1"]["partition_vertices"] = partition_vertices

In [None]:
print("Estimate transition probabilities")
start_time_estimation = time.time()
_, num_iters_per_state = markov_decision_process.estimate_probability_matrix_pi_method(c=c, tau=tau, max_sample_trials=1000)
total_time_estimation = time.time() - start_time_estimation
print(f"Transition probability estimation took {round(total_time_estimation, 2)}s")
print(f"Number of iterates per state: {num_iters_per_state}")

print("Evaluate policy")
epsilon = 10e-6 # 2 * np.finfo(float).eps
start_time_evaluation = time.time()
V, num_iters, convergence_info = markov_decision_process.policy_evaluation(markov_decision_process.g, epsilon=epsilon)
total_time_evaluation = time.time() - start_time_evaluation
print(f"Policy evaluation took {total_time_evaluation}s")
print(f"Value function: {V}")
print(f"Number of iterations until convergence: {num_iters}")

In [None]:
x_values = list(range(1, num_iters))
plt.plot(x_values, convergence_info["max_dist"][1:])
plt.xlabel("Iteration")
plt.ylabel("MAX-DIST")
plt.title("Maximal convergence distance over iterations")
plt.show()

In [None]:
x_values = list(range(1, num_iters))
plt.plot(x_values, convergence_info["avg_dist"][1:])
plt.xlabel("Iteration")
plt.ylabel("L2-DIST")
plt.title("L2 convergence distance over iterations")
plt.show()

## Build second baseline partition

In [None]:
p1 = u1 + (1/v1[0]) * v1
p2 = u2 - (1/v2[1]) * v2
p3 = u3 - (1/v1[0]) * v1
p4 = u4 + (1/v2[1]) * v2

In [None]:
l_00 = np.array([u1, p1])
l_01 = np.array([u2, p2])
l_10 = np.array([u4, p4])
l_11 = np.array([u3, p3])

In [None]:
P1a = seg_intersect(l_00, l_01)
P1b = seg_intersect(l_01, l_11)

P3a = seg_intersect(l_10, l_00)

symm_helper_x = seg_intersect(lu3, l_00)
symm_helper_y = q(symm_helper_x)
symm_helper_l00 = np.array([symm_helper_x, symm_helper_y])

p1_symm = symm_helper_y + v1
l_00_symm_extension = np.array([symm_helper_y, p1_symm])
P3b = seg_intersect(l_00_symm_extension, l_01)
l_00_symm_extension = np.array([symm_helper_y, P3b])

In [None]:
l_00 = np.array([u1, p1])
l_01 = np.array([u2, P1a])
l_10 = np.array([u4, P3a])
l_11 = np.array([u3, P1b])

In [None]:
# plot unit square
plt.plot(lu1[:, 0], lu1[:, 1], "r-")
plt.plot(lu2[:, 0], lu2[:, 1], "r-")
plt.plot(lu3[:, 0], lu3[:, 1], "r-")
plt.plot(lu4[:, 0], lu4[:, 1], "r-")

# step 1: plot l_00 in expanding direction 
plt.plot(l_00[:, 0], l_00[:, 1], "b-")

# step 2: plot l_01 and l_10 in contracting directions
plt.plot(l_01[:, 0], l_01[:, 1], "b-")
plt.plot(l_10[:, 0], l_10[:, 1], "b-")

# step 3: plot l_11 in expanding direction
plt.plot(l_11[:, 0], l_11[:, 1], "b-")

# step 4: plot symmetric extension of l_00 line
plt.plot(symm_helper_l00[:, 0], symm_helper_l00[:, 1], "m--")
plt.plot(l_00_symm_extension[:, 0], l_00_symm_extension[:, 1], "b-")

# plot intersection points
plt.plot(P1a[0], P1a[1], "bo")
plt.plot(P1b[0], P1b[1], "ro")
plt.plot(P3a[0], P3a[1], "go")
plt.plot(P3b[0], P3b[1], "yo")

plt.show()

In [None]:
P1A = Polygon([P3b, symm_helper_l00[1], u1, P1a])
P1B = Polygon([symm_helper_l00[0], u4, P3a])
P1 = MultiPolygon([P1A, P1B])

In [None]:
P2A = Polygon([P1b, u3, symm_helper_l00[0], P1a])
P2B = Polygon([symm_helper_l00[1], u2, P3b])
P2 = MultiPolygon([P2A, P2B])

In [None]:
P3A = Polygon([P1b, u2, u3])
P3B = Polygon([u1, P3a, u4])
P3 = MultiPolygon([P3A, P3B])

In [None]:
P1_vertices = [u1, P1a, P3b, P3a]
P2_vertices = [P1b, P1a, P3b, u3]
P3_vertices = [P1b, u3, u2, P3a]
partition_vertices = [P1_vertices, P2_vertices, P3_vertices]

In [None]:
partition = [P1, P2, P3]

In [None]:
MultiPolygon([P1A, P1B, P2A, P2B, P3A, P3B])

In [None]:
# add markov partition to overall experiment dictionary
baseline_partitions["markov_2"] = {}
baseline_partitions["markov_2"]["partition"] = partition
baseline_partitions["markov_2"]["partition_vertices"] = partition_vertices

In [None]:
with open(f"results/baseline_3_partitions.pkl", "wb") as file:
    pickle.dump(baseline_partitions, file, protocol=pickle.HIGHEST_PROTOCOL)

## Test policy evaluation for a baseline partition

In [None]:
test_baseline_partition = baseline_partitions["horizontal"]["partition"]
test_baseline_partition_vertices = baseline_partitions["horizontal"]["partition_vertices"]
markov_decision_process = MarkovDecisionProcess(dynamic_system, test_baseline_partition, partition_vertices=test_baseline_partition_vertices, target_state=target_state, gamma=gamma)

In [None]:
print("Estimate transition probabilities")
start_time_estimation = time.time()
_, num_iters_per_state = markov_decision_process.estimate_probability_matrix_pi_method(c=c, tau=tau, max_sample_trials=1000)
total_time_estimation = time.time() - start_time_estimation
print(f"Transition probability estimation took {round(total_time_estimation, 2)}s")
print(f"Number of iterates per state: {num_iters_per_state}")

print("Evaluate policy")
epsilon = 10e-6 # 2 * np.finfo(float).eps
start_time_evaluation = time.time()
V, num_iters, convergence_info = markov_decision_process.policy_evaluation(markov_decision_process.g, epsilon=epsilon)
total_time_evaluation = time.time() - start_time_evaluation
print(f"Policy evaluation took {total_time_evaluation}s")
print(f"Value function: {V}")
print(f"Number of iterations until convergence: {num_iters}")

In [None]:
x_values = list(range(1, num_iters))
plt.plot(x_values, convergence_info["max_dist"][1:])
plt.xlabel("Iteration")
plt.ylabel("MAX-DIST")
plt.title("Maximal convergence distance over iterations")
plt.show()

In [None]:
x_values = list(range(1, num_iters))
plt.plot(x_values, convergence_info["avg_dist"][1:])
plt.xlabel("Iteration")
plt.ylabel("L2-DIST")
plt.title("L2 convergence distance over iterations")
plt.show()

## Experiment with baseline and markov partitions of 3 subsets

In [None]:
gamma_candidates = np.linspace(start=0.05, stop=0.95, num=2)
estimation_repetitions = 1
experiment_results = {}

for experiment_run in baseline_partitions.keys():
    print("\n---------------------------------------\n")
    print(f"Experiment run with {experiment_run} partition")

    partition = baseline_partitions[experiment_run]["partition"]
    partition_vertices = baseline_partitions[experiment_run]["partition_vertices"]
    markov_decision_process = MarkovDecisionProcess(dynamic_system, partition, partition_vertices=partition_vertices, target_state=target_state)

    print("Estimate transition probabilities")
    n = len(partition)
    total_time_estimation_results = np.zeros((estimation_repetitions, n))
    num_iters_per_state_results = np.zeros((estimation_repetitions, n))

    for i in range(estimation_repetitions): 
        start_time_estimation = time.time()
        _, num_iters_per_state = markov_decision_process.estimate_probability_matrix_pi_method(c=c, tau=tau, max_sample_trials=1000)
        total_time_estimation = time.time() - start_time_estimation
        total_time_estimation_results[i, :] = total_time_estimation
        num_iters_per_state_results[i, :] = num_iters_per_state

    print(f"Transition probability estimation took {round(np.mean(total_time_estimation_results), 2)}s on average with {round(np.var(total_time_estimation_results), 2)}s variance.")
    print(f"Number of iterates per state {np.mean(num_iters_per_state_results, axis=0)} on average with variance {np.var(num_iters_per_state_results, axis=0)}")

    experiment_results[experiment_run] = {}
    experiment_results[experiment_run]["partition"] = partition
    experiment_results[experiment_run]["partition_vertices"] = partition_vertices
    experiment_results[experiment_run]["estimation_run_time"] = total_time_estimation_results
    experiment_results[experiment_run]["num_iters_per_state"] = num_iters_per_state_results
    experiment_results[experiment_run]["policy_evaluation"] = {}

    for gamma in gamma_candidates:
        print(f"\nEvaluate policy with gamma={gamma}")
        experiment_results[experiment_run]["policy_evaluation"][gamma] = {}
        markov_decision_process.gamma = gamma

        epsilon = 10e-6 # 2 * np.finfo(float).eps
        start_time_evaluation = time.time()
        V, num_iters, convergence_info = markov_decision_process.policy_evaluation(markov_decision_process.g, epsilon=epsilon)
        total_time_evaluation = time.time() - start_time_evaluation
        print(f"Policy evaluation took {total_time_evaluation}s")
        print(f"Value function: {V}")
        print(f"Number of iterations until convergence: {num_iters}")

        experiment_results[experiment_run]["policy_evaluation"][gamma]["run_time"] = total_time_evaluation
        experiment_results[experiment_run]["policy_evaluation"][gamma]["value_function"] = V
        experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"] = num_iters
        experiment_results[experiment_run]["policy_evaluation"][gamma]["convergence_info"] = convergence_info

with open(f"results/policy_evaluation_3_partition_results.pkl", "wb") as file:
    pickle.dump(experiment_results, file, protocol=pickle.HIGHEST_PROTOCOL)

## Visualize experiment results

In [None]:
with open("results/policy_evaluation_3_partition_results.pkl", "rb") as file:
    experiment_results = pickle.load(file)

In [None]:
for experiment_run in experiment_results.keys():
    print("\n---------------------------------------\n")
    print(f"Experiment run with {experiment_run} partition")

    for gamma in experiment_results[experiment_run]["policy_evaluation"]:
        num_iters = experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"]
        convergence_info = experiment_results[experiment_run]["policy_evaluation"][gamma]["convergence_info"]
        x_values = list(range(1, num_iters))

        plt.plot(x_values, convergence_info["max_dist"][1:])
        plt.xlabel("Iteration")
        plt.ylabel("MAX-DIST")
        plt.title(f"Maximal convergence distance for gamma={round(gamma, 2)}")
        plt.show()

        plt.plot(x_values, convergence_info["avg_dist"][1:])
        plt.xlabel("Iteration")
        plt.ylabel("L2-DIST")
        plt.title(f"L2 convergence distance for gamma={round(gamma, 2)}")
        plt.show()

In [None]:
estimation_convergence_results = []
for experiment_run in experiment_results.keys():
    estimation_run_time_avg = np.mean(experiment_results[experiment_run]["estimation_run_time"])
    estimation_run_time_var = np.var(experiment_results[experiment_run]["estimation_run_time"])

    sum_estimation_iters_avg = np.mean(np.sum(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    sum_estimation_iters_var = np.var(np.sum(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    avg_estimation_iters_avg = np.mean(np.mean(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    avg_estimation_iters_var = np.var(np.mean(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    max_estimation_iters_avg = np.mean(np.max(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    max_estimation_iters_var = np.var(np.max(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    estimation_convergence_result = {
        "partition_method": experiment_run,
        "estimation_run_time_avg": estimation_run_time_avg,
        "estimation_run_time_var": estimation_run_time_var,
        "sum_estimation_iters_avg": sum_estimation_iters_avg,
        "sum_estimation_iters_var": sum_estimation_iters_var,
        "avg_estimation_iters_avg": avg_estimation_iters_avg,
        "avg_estimation_iters_var": avg_estimation_iters_var,
        "max_estimation_iters_avg": max_estimation_iters_avg,
        "max_estimation_iters_var": max_estimation_iters_var,
    }
    
    estimation_convergence_results.append(estimation_convergence_result)

estimation_convergence_results_df = pd.DataFrame(estimation_convergence_results)

In [None]:
estimation_convergence_results_df

In [None]:
fixed_gamma = 0.95
num_iters_list = []

for experiment_run in experiment_results.keys():
    num_iters = experiment_results[experiment_run]["policy_evaluation"][fixed_gamma]["num_iters"]
    num_iters_dict = {"partition": experiment_run, "num_iters": num_iters}
    num_iters_list.append(num_iters_dict)

num_iters_df = pd.DataFrame(num_iters_list)

ax = sns.barplot(x="partition", y="num_iters", data=num_iters_df, hue="partition", saturation=8, dodge=False)
ax.set(xlabel="partition method", ylabel="Number of Iterations")
plt.title("Policy Evaluation Convergence")
plt.show()

In [None]:
relevant_cols = ["partition_method", "sum_estimation_iters_avg", "avg_estimation_iters_avg", "max_estimation_iters_avg"] 
cols_naming = ["Partition", "Sum of Iters", "Average Iters", "Max Iters"]
latex_table_str = estimation_convergence_results_df.to_latex(index=False, columns=relevant_cols, header=cols_naming, float_format="%.2f", bold_rows=True)

with open("results/latex_table_3_partitions_export.txt", "w") as file:
    file.write(latex_table_str)

In [None]:
num_iters_list = []

for experiment_run in experiment_results.keys():
    for gamma in experiment_results[experiment_run]["policy_evaluation"]:
        num_iters = experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"]
        num_iters_dict = {"partition": experiment_run, "gamma": gamma, "num_iters": num_iters}
        num_iters_list.append(num_iters_dict)

num_iters_df = pd.DataFrame(num_iters_list)

ax = sns.lineplot(data=num_iters_df, x="gamma", y="num_iters", hue="partition")
ax.set(xlabel="discount factor gamma", ylabel="Number of Iterations")
plt.title("Policy Evaluation Convergence")
plt.show()

## Build Markov partition with 7 subsets

In [None]:
# load baseline partitions from disk, if they already exist
if os.path.isfile("results/baseline_7_partitions.pkl"):
    with open("results/baseline_7_partitions.pkl", "rb") as file:
        baseline_partitions = pickle.load(file)
else:
    baseline_partitions = {}

baseline_partitions.keys()

In [None]:
baseline_partitions = {}

In [None]:
A = np.array([[1,1], [1,0]])
default_delta = 10e-3
num_iters = 2
m_id = 1

In [None]:
def phi_non_identified(x: np.array) -> np.array:
    return np.dot(A, x)

In [None]:
def d_phi(x: np.array) -> np.array:
    return np.transpose(A)

In [None]:
dynamic_system = DynamicSystem(phi_non_identified, d_phi, m_id)
partition = Partition(dynamic_system)

In [None]:
branches, intersection_points = partition.compute_partition(num_iters, delta=default_delta)

In [None]:
partition.plot_partition()

In [None]:
vertices = intersection_points.copy()
vertices.extend([u1, u2, u3, u4])
vertices = np.array(vertices)

In [None]:
vertices

In [None]:
P1_vertices = vertices[[0,1,3,2]]
P2_vertices = vertices[[0,2,5,8]]
P3_vertices = vertices[[1,3,6,10]]
P4_vertices = vertices[[2,3,4,11]]
P5_vertices = vertices[[0,1,7,8]]
P6_vertices = vertices[[0,3,5,6]]
P7_vertices = vertices[[1,2,4,7]]

In [None]:
partition_vertices = [P1_vertices, P2_vertices, P3_vertices, P4_vertices, P5_vertices, P6_vertices, P7_vertices]

In [None]:
c1 = symm_helper_l11[0,0]
c2 = symm_helper_l00[0,1]

x1 = np.array([c1, 0])
x2 = np.array([c2, 0])
x3 = np.array([c1, 1])
x4 = np.array([c2, 1])

y1 = np.array([0, c1])
y2 = np.array([0, c2])
y3 = np.array([1, c1])
y4 = np.array([1, c2])

In [None]:
P1A = Polygon(P1_vertices)
P1 = MultiPolygon([P1A])

P2A = Polygon([vertices[0], vertices[2], u1, y1])
P2B = Polygon([u4, y3, vertices[5]])
P2 = MultiPolygon([P2A, P2B])

P3A = Polygon([vertices[1], vertices[3], y4, u3])
P3B = Polygon([u2, y2, vertices[6]])
P3 = MultiPolygon([P3A, P3B])

P4A = Polygon([vertices[3], vertices[2], x2, u4])
P4B = Polygon([u3, x4, vertices[4]])
P4 = MultiPolygon([P4A, P4B])

P5A = Polygon([vertices[0], vertices[1], x3, u2])
P5B = Polygon([u1, x1, vertices[7]])
P5 = MultiPolygon([P5A, P5B])

P6A = Polygon([vertices[0], y1, y2, vertices[6]])
P6B = Polygon([y3, y4, vertices[3], vertices[5]])
P6 = MultiPolygon([P6A, P6B])

P7A = Polygon([x3, x4, vertices[4], vertices[1]])
P7B = Polygon([x1, x2, vertices[2], vertices[7]])
P7 = MultiPolygon([P7A, P7B])

In [None]:
MultiPolygon([P1A, P2A, P2B, P3A, P3B, P4A, P4B, P5A, P5B, P6A, P6B, P7A, P7B])

In [None]:
partition = [P1, P2, P3, P4, P5, P6, P7]

In [None]:
# add markov partition to overall experiment dictionary
baseline_partitions["markov_1"] = {}
baseline_partitions["markov_1"]["partition"] = partition
baseline_partitions["markov_1"]["partition_vertices"] = partition_vertices

## Build baseline partitions with 7 subsets

In [None]:
n = 7
y_offset = np.array([0, m_id / n])
partition = []
partition_vertices = []
polygons = []

for i in range(n):
    p1 = u1 + i*y_offset
    p2 = u1 + (i+1)*y_offset
    p3 = u4 + (i+1)*y_offset
    p4 = u4 + i*y_offset

    P = MultiPolygon([Polygon([p1, p2, p3, p4])])
    polygons.append(Polygon([p1, p2, p3, p4]))
    partition.append(P)
    partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["horizontal"] = {}
baseline_partitions["horizontal"]["partition"] = partition
baseline_partitions["horizontal"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
n = 7
x_offset = np.array([m_id / n, 0])
partition = []
partition_vertices = []
polygons = []

for i in range(n):
    p1 = u1 + i*x_offset
    p2 = u1 + (i+1)*x_offset
    p3 = u2 + (i+1)*x_offset
    p4 = u2 + i*x_offset

    P = MultiPolygon([Polygon([p1, p2, p3, p4])])
    polygons.append(Polygon([p1, p2, p3, p4]))
    partition.append(P)
    partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["vertical"] = {}
baseline_partitions["vertical"]["partition"] = partition
baseline_partitions["vertical"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
n1 = 2
n2 = 3
x_offset = np.array([m_id / n1, 0])
y_offset = np.array([0, m_id / n2])

partition = []
partition_vertices = []
polygons = []

for i in range(n1):
    for j in range(n2):
        p1 = u1 + i*x_offset + j*y_offset
        p2 = p1 + x_offset
        p3 = p2 + y_offset
        p4 = p3 - x_offset

        P = MultiPolygon([Polygon([p1, p2, p3, p4])])
        polygons.append(Polygon([p1, p2, p3, p4]))
        partition.append(P)
        partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["complex_1"] = {}
baseline_partitions["complex_1"]["partition"] = partition
baseline_partitions["complex_1"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
n1 = 3
n2 = 2
x_offset = np.array([m_id / n1, 0])
y_offset = np.array([0, m_id / n2])

partition = []
partition_vertices = []
polygons = []

for i in range(n1):
    for j in range(n2):
        p1 = u1 + i*x_offset + j*y_offset
        p2 = p1 + x_offset
        p3 = p2 + y_offset
        p4 = p3 - x_offset

        P = MultiPolygon([Polygon([p1, p2, p3, p4])])
        polygons.append(Polygon([p1, p2, p3, p4]))
        partition.append(P)
        partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["complex_2"] = {}
baseline_partitions["complex_2"]["partition"] = partition
baseline_partitions["complex_2"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
with open(f"results/baseline_7_partitions.pkl", "wb") as file:
    pickle.dump(baseline_partitions, file, protocol=pickle.HIGHEST_PROTOCOL)

## Experiment with all 7 subset partitions

In [None]:
gamma_candidates = np.linspace(start=0.05, stop=0.95, num=2)
estimation_repetitions = 1
experiment_results = {}

for experiment_run in baseline_partitions.keys():
    print("\n---------------------------------------\n")
    print(f"Experiment run with {experiment_run} partition")

    partition = baseline_partitions[experiment_run]["partition"]
    partition_vertices = baseline_partitions[experiment_run]["partition_vertices"]
    markov_decision_process = MarkovDecisionProcess(dynamic_system, partition, partition_vertices=partition_vertices, target_state=target_state)

    print("Estimate transition probabilities")
    n = len(partition)
    total_time_estimation_results = np.zeros((estimation_repetitions, n))
    num_iters_per_state_results = np.zeros((estimation_repetitions, n))

    for i in range(estimation_repetitions): 
        start_time_estimation = time.time()
        _, num_iters_per_state = markov_decision_process.estimate_probability_matrix_pi_method(c=c, tau=tau, max_sample_trials=1000)
        total_time_estimation = time.time() - start_time_estimation
        total_time_estimation_results[i, :] = total_time_estimation
        num_iters_per_state_results[i, :] = num_iters_per_state

    print(f"Transition probability estimation took {round(np.mean(total_time_estimation_results), 2)}s on average with {round(np.var(total_time_estimation_results), 2)}s variance.")
    print(f"Number of iterates per state {np.mean(num_iters_per_state_results, axis=0)} on average with variance {np.var(num_iters_per_state_results, axis=0)}")

    experiment_results[experiment_run] = {}
    experiment_results[experiment_run]["partition"] = partition
    experiment_results[experiment_run]["partition_vertices"] = partition_vertices
    experiment_results[experiment_run]["estimation_run_time"] = total_time_estimation_results
    experiment_results[experiment_run]["num_iters_per_state"] = num_iters_per_state_results
    experiment_results[experiment_run]["policy_evaluation"] = {}

    for gamma in gamma_candidates:
        print(f"\nEvaluate policy with gamma={gamma}")
        experiment_results[experiment_run]["policy_evaluation"][gamma] = {}
        markov_decision_process.gamma = gamma

        epsilon = 10e-6 # 2 * np.finfo(float).eps
        start_time_evaluation = time.time()
        V, num_iters, convergence_info = markov_decision_process.policy_evaluation(markov_decision_process.g, epsilon=epsilon)
        total_time_evaluation = time.time() - start_time_evaluation
        print(f"Policy evaluation took {total_time_evaluation}s")
        print(f"Value function: {V}")
        print(f"Number of iterations until convergence: {num_iters}")

        experiment_results[experiment_run]["policy_evaluation"][gamma]["run_time"] = total_time_evaluation
        experiment_results[experiment_run]["policy_evaluation"][gamma]["value_function"] = V
        experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"] = num_iters
        experiment_results[experiment_run]["policy_evaluation"][gamma]["convergence_info"] = convergence_info

with open(f"results/policy_evaluation_7_partition_results.pkl", "wb") as file:
    pickle.dump(experiment_results, file, protocol=pickle.HIGHEST_PROTOCOL)

## Visualize experiment results

In [None]:
with open("results/policy_evaluation_7_partition_results.pkl", "rb") as file:
    experiment_results = pickle.load(file)

In [None]:
for experiment_run in experiment_results.keys():
    print("\n---------------------------------------\n")
    print(f"Experiment run with {experiment_run} partition")

    for gamma in experiment_results[experiment_run]["policy_evaluation"]:
        num_iters = experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"]
        convergence_info = experiment_results[experiment_run]["policy_evaluation"][gamma]["convergence_info"]
        x_values = list(range(1, num_iters))

        plt.plot(x_values, convergence_info["max_dist"][1:])
        plt.xlabel("Iteration")
        plt.ylabel("MAX-DIST")
        plt.title(f"Maximal convergence distance for gamma={round(gamma, 2)}")
        plt.show()

        plt.plot(x_values, convergence_info["avg_dist"][1:])
        plt.xlabel("Iteration")
        plt.ylabel("L2-DIST")
        plt.title(f"L2 convergence distance for gamma={round(gamma, 2)}")
        plt.show()

In [None]:
estimation_convergence_results = []
for experiment_run in experiment_results.keys():
    estimation_run_time_avg = np.mean(experiment_results[experiment_run]["estimation_run_time"])
    estimation_run_time_var = np.var(experiment_results[experiment_run]["estimation_run_time"])

    sum_estimation_iters_avg = np.mean(np.sum(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    sum_estimation_iters_var = np.var(np.sum(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    avg_estimation_iters_avg = np.mean(np.mean(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    avg_estimation_iters_var = np.var(np.mean(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    max_estimation_iters_avg = np.mean(np.max(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    max_estimation_iters_var = np.var(np.max(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    estimation_convergence_result = {
        "partition_method": experiment_run,
        "estimation_run_time_avg": estimation_run_time_avg,
        "estimation_run_time_var": estimation_run_time_var,
        "sum_estimation_iters_avg": sum_estimation_iters_avg,
        "sum_estimation_iters_var": sum_estimation_iters_var,
        "avg_estimation_iters_avg": avg_estimation_iters_avg,
        "avg_estimation_iters_var": avg_estimation_iters_var,
        "max_estimation_iters_avg": max_estimation_iters_avg,
        "max_estimation_iters_var": max_estimation_iters_var,
    }
    
    estimation_convergence_results.append(estimation_convergence_result)

estimation_convergence_results_df = pd.DataFrame(estimation_convergence_results)

In [None]:
estimation_convergence_results_df

In [None]:
relevant_cols = ["partition_method", "sum_estimation_iters_avg", "avg_estimation_iters_avg", "max_estimation_iters_avg"] 
cols_naming = ["Partition", "Sum of Iters", "Average Iters", "Max Iters"]
latex_table_str = estimation_convergence_results_df.to_latex(index=False, columns=relevant_cols, header=cols_naming, float_format="%.2f", bold_rows=True)

with open("results/latex_table_7_partitions_export.txt", "w") as file:
    file.write(latex_table_str)

In [None]:
fixed_gamma = 0.95
num_iters_list = []

for experiment_run in experiment_results.keys():
    num_iters = experiment_results[experiment_run]["policy_evaluation"][fixed_gamma]["num_iters"]
    num_iters_dict = {"partition": experiment_run, "num_iters": num_iters}
    num_iters_list.append(num_iters_dict)

num_iters_df = pd.DataFrame(num_iters_list)

ax = sns.barplot(x="partition", y="num_iters", data=num_iters_df, hue="partition", saturation=8, dodge=False)
ax.set(xlabel="partition method", ylabel="Number of Iterations")
plt.title("Policy Evaluation Convergence")
plt.show()

In [None]:
num_iters_list = []

for experiment_run in experiment_results.keys():
    for gamma in experiment_results[experiment_run]["policy_evaluation"]:
        num_iters = experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"]
        num_iters_dict = {"partition": experiment_run, "gamma": gamma, "num_iters": num_iters}
        num_iters_list.append(num_iters_dict)

num_iters_df = pd.DataFrame(num_iters_list)

ax = sns.lineplot(data=num_iters_df, x="gamma", y="num_iters", hue="partition")
ax.set(xlabel="discount factor gamma", ylabel="Number of Iterations")
plt.title("Policy Evaluation Convergence")
plt.show()

## Build Markov partition with 11 subsets

In [None]:
# load baseline partitions from disk, if they already exist
if os.path.isfile("results/baseline_11_partitions.pkl"):
    with open("results/baseline_11_partitions.pkl", "rb") as file:
        baseline_partitions = pickle.load(file)
else:
    baseline_partitions = {}

baseline_partitions.keys()

In [None]:
baseline_partitions = {}

In [None]:
A = np.array([[1,1], [1,0]])
default_delta = 10e-3
num_iters = 3
m_id = 1

In [None]:
def phi_non_identified(x: np.array) -> np.array:
    return np.dot(A, x)

In [None]:
def d_phi(x: np.array) -> np.array:
    return np.transpose(A)

In [None]:
dynamic_system = DynamicSystem(phi_non_identified, d_phi, m_id)
partition = Partition(dynamic_system)

In [None]:
branches, intersection_points = partition.compute_partition(num_iters, delta=default_delta)

In [None]:
partition.plot_partition()

In [None]:
vertices = intersection_points.copy()
vertices.extend([u1, u2, u3, u4])
vertices = np.array(vertices)

In [None]:
vertices.shape

In [None]:
vertices

In [None]:
P1_vertices = vertices[[0,1,3,2]]
P2_vertices = vertices[[0,2,7,9]]
P3_vertices = vertices[[0,1,11,6]]
P4_vertices = vertices[[1,3,10,4]]
P5_vertices = vertices[[3,2,8,5]]
P6_vertices = vertices[[3,5,0,6]]
P7_vertices = vertices[[2,7,1,4]]
P8_vertices = vertices[[8,5,15,4]]
P9_vertices = vertices[[4,14,6,10]]
P10_vertices = vertices[[7,12,6,11]]
P11_vertices = vertices[[7,12,5,9]]

In [None]:
partition_vertices = [P1_vertices, P2_vertices, P3_vertices, P4_vertices, P5_vertices, P6_vertices, P7_vertices, P8_vertices, P9_vertices, P10_vertices, P11_vertices]

In [None]:
P1A = Polygon(P1_vertices)
P1 = MultiPolygon([P1A])

P2A = Polygon(P2_vertices)
P2 = MultiPolygon([P2A])

P3A = Polygon(P3_vertices)
P3 = MultiPolygon([P3A])

P4A = Polygon(P4_vertices)
P4 = MultiPolygon([P4A])

P5A = Polygon(P5_vertices)
P5 = MultiPolygon([P5A])

P6A = Polygon([vertices[3], vertices[5], y3, y4])
P6B = Polygon([y1, y2, vertices[6], vertices[0]])
P6 = MultiPolygon([P6A, P6B])

P7A = Polygon([vertices[2], vertices[7], x1, x2])
P7B = Polygon([x3, x4, vertices[4], vertices[1]])
P7 = MultiPolygon([P7A, P7B])

P8A = Polygon([vertices[8], vertices[5], vertices[15], x2])
P8B = Polygon([u3, x4, vertices[4]])
P8 = MultiPolygon([P8A, P8B])

P9A = Polygon([vertices[4], u3, y4, vertices[10]])
P9B = Polygon([u2, y2, vertices[6]])
P9 = MultiPolygon([P9A, P9B])

P10A = Polygon([u1, vertices[7], x1])
P10B = Polygon([u2, x3, vertices[11], vertices[6]])
P10 = MultiPolygon([P10A, P10B])

P11A = Polygon([u1, y1, vertices[9], vertices[7]])
P11B = Polygon([u4, y3, vertices[5]])
P11 = MultiPolygon([P11A, P11B])

In [None]:
MultiPolygon([P1A, P2A, P3A, P4A, P5A, P6A, P6B, P7A, P7B, P8A, P8B, P9A, P9B, P10A, P10B, P11A, P11B])

In [None]:
partition = [P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]

In [None]:
# add markov partition to overall experiment dictionary
baseline_partitions["markov_1"] = {}
baseline_partitions["markov_1"]["partition"] = partition
baseline_partitions["markov_1"]["partition_vertices"] = partition_vertices

## Build baseline partitions with 11 subsets

In [None]:
n = 11
y_offset = np.array([0, m_id / n])
partition = []
partition_vertices = []
polygons = []

for i in range(n):
    p1 = u1 + i*y_offset
    p2 = u1 + (i+1)*y_offset
    p3 = u4 + (i+1)*y_offset
    p4 = u4 + i*y_offset

    P = MultiPolygon([Polygon([p1, p2, p3, p4])])
    polygons.append(Polygon([p1, p2, p3, p4]))
    partition.append(P)
    partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["horizontal"] = {}
baseline_partitions["horizontal"]["partition"] = partition
baseline_partitions["horizontal"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
n = 11
x_offset = np.array([m_id / n, 0])
partition = []
partition_vertices = []
polygons = []

for i in range(n):
    p1 = u1 + i*x_offset
    p2 = u1 + (i+1)*x_offset
    p3 = u2 + (i+1)*x_offset
    p4 = u2 + i*x_offset

    P = MultiPolygon([Polygon([p1, p2, p3, p4])])
    polygons.append(Polygon([p1, p2, p3, p4]))
    partition.append(P)
    partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["vertical"] = {}
baseline_partitions["vertical"]["partition"] = partition
baseline_partitions["vertical"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
n1 = 3
n2 = 3
x_offset = np.array([m_id / n1, 0])
y_offset = np.array([0, m_id / n2])

partition = []
partition_vertices = []
polygons = []

for i in range(n1):
    for j in range(n2):
        p1 = u1 + i*x_offset + j*y_offset
        p2 = p1 + x_offset
        p3 = p2 + y_offset
        p4 = p3 - x_offset

        P = MultiPolygon([Polygon([p1, p2, p3, p4])])
        polygons.append(Polygon([p1, p2, p3, p4]))
        partition.append(P)
        partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["equally_9"] = {}
baseline_partitions["equally_9"]["partition"] = partition
baseline_partitions["equally_9"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
n1 = 4
n2 = 3
x_offset = np.array([m_id / n1, 0])
y_offset = np.array([0, m_id / n2])

partition = []
partition_vertices = []
polygons = []

for i in range(n1):
    for j in range(n2):
        p1 = u1 + i*x_offset + j*y_offset
        p2 = p1 + x_offset
        p3 = p2 + y_offset
        p4 = p3 - x_offset

        P = MultiPolygon([Polygon([p1, p2, p3, p4])])
        polygons.append(Polygon([p1, p2, p3, p4]))
        partition.append(P)
        partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["equally_12_1"] = {}
baseline_partitions["equally_12_1"]["partition"] = partition
baseline_partitions["equally_12_1"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
n1 = 3
n2 = 4
x_offset = np.array([m_id / n1, 0])
y_offset = np.array([0, m_id / n2])

partition = []
partition_vertices = []
polygons = []

for i in range(n1):
    for j in range(n2):
        p1 = u1 + i*x_offset + j*y_offset
        p2 = p1 + x_offset
        p3 = p2 + y_offset
        p4 = p3 - x_offset

        P = MultiPolygon([Polygon([p1, p2, p3, p4])])
        polygons.append(Polygon([p1, p2, p3, p4]))
        partition.append(P)
        partition_vertices. append([p1, p2, p3, p4])

# add markov partition to overall experiment dictionary
baseline_partitions["equally_12_2"] = {}
baseline_partitions["equally_12_2"]["partition"] = partition
baseline_partitions["equally_12_2"]["partition_vertices"] = partition_vertices

MultiPolygon(polygons)

In [None]:
with open(f"results/baseline_11_partitions.pkl", "wb") as file:
    pickle.dump(baseline_partitions, file, protocol=pickle.HIGHEST_PROTOCOL)

## Experiment with all 11 subset partitions

In [None]:
gamma_candidates = np.linspace(start=0.05, stop=0.95, num=2)
estimation_repetitions = 1
experiment_results = {}

for experiment_run in baseline_partitions.keys():
    print("\n---------------------------------------\n")
    print(f"Experiment run with {experiment_run} partition")

    partition = baseline_partitions[experiment_run]["partition"]
    partition_vertices = baseline_partitions[experiment_run]["partition_vertices"]
    markov_decision_process = MarkovDecisionProcess(dynamic_system, partition, partition_vertices=partition_vertices, target_state=target_state)

    print("Estimate transition probabilities")
    n = len(partition)
    total_time_estimation_results = np.zeros((estimation_repetitions, n))
    num_iters_per_state_results = np.zeros((estimation_repetitions, n))

    for i in range(estimation_repetitions): 
        start_time_estimation = time.time()
        _, num_iters_per_state = markov_decision_process.estimate_probability_matrix_pi_method(c=c, tau=tau, max_sample_trials=1000)
        total_time_estimation = time.time() - start_time_estimation
        total_time_estimation_results[i, :] = total_time_estimation
        num_iters_per_state_results[i, :] = num_iters_per_state

    print(f"Transition probability estimation took {round(np.mean(total_time_estimation_results), 2)}s on average with {round(np.var(total_time_estimation_results), 2)}s variance.")
    print(f"Number of iterates per state {np.mean(num_iters_per_state_results, axis=0)} on average with variance {np.var(num_iters_per_state_results, axis=0)}")

    experiment_results[experiment_run] = {}
    experiment_results[experiment_run]["partition"] = partition
    experiment_results[experiment_run]["partition_vertices"] = partition_vertices
    experiment_results[experiment_run]["estimation_run_time"] = total_time_estimation_results
    experiment_results[experiment_run]["num_iters_per_state"] = num_iters_per_state_results
    experiment_results[experiment_run]["policy_evaluation"] = {}

    for gamma in gamma_candidates:
        print(f"\nEvaluate policy with gamma={gamma}")
        experiment_results[experiment_run]["policy_evaluation"][gamma] = {}
        markov_decision_process.gamma = gamma

        epsilon = 10e-6 # 2 * np.finfo(float).eps
        start_time_evaluation = time.time()
        V, num_iters, convergence_info = markov_decision_process.policy_evaluation(markov_decision_process.g, epsilon=epsilon)
        total_time_evaluation = time.time() - start_time_evaluation
        print(f"Policy evaluation took {total_time_evaluation}s")
        print(f"Value function: {V}")
        print(f"Number of iterations until convergence: {num_iters}")

        experiment_results[experiment_run]["policy_evaluation"][gamma]["run_time"] = total_time_evaluation
        experiment_results[experiment_run]["policy_evaluation"][gamma]["value_function"] = V
        experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"] = num_iters
        experiment_results[experiment_run]["policy_evaluation"][gamma]["convergence_info"] = convergence_info

with open(f"results/policy_evaluation_11_partition_results.pkl", "wb") as file:
    pickle.dump(experiment_results, file, protocol=pickle.HIGHEST_PROTOCOL)

## Visualize experiment results

In [None]:
with open("results/policy_evaluation_11_partition_results.pkl", "rb") as file:
    experiment_results = pickle.load(file)

In [None]:
for experiment_run in experiment_results.keys():
    print("\n---------------------------------------\n")
    print(f"Experiment run with {experiment_run} partition")

    for gamma in experiment_results[experiment_run]["policy_evaluation"]:
        num_iters = experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"]
        convergence_info = experiment_results[experiment_run]["policy_evaluation"][gamma]["convergence_info"]
        x_values = list(range(1, num_iters))

        ax = sns.lineplot(x=x_values, y=convergence_info["max_dist"][1:])
        ax.set(xlabel="Iteration", ylabel="MAX-DIST")
        plt.title(f"Maximal convergence distance for gamma = {round(gamma, 2)}")
        plt.show()

        ax = sns.lineplot(x=x_values, y=convergence_info["avg_dist"][1:])
        ax.set(xlabel="Iteration", ylabel="L2-DIST")
        plt.title(f"L2 convergence distance for gamma={round(gamma, 2)}")
        plt.show()

In [None]:
estimation_convergence_results = []
for experiment_run in experiment_results.keys():
    estimation_run_time_avg = np.mean(experiment_results[experiment_run]["estimation_run_time"])
    estimation_run_time_var = np.var(experiment_results[experiment_run]["estimation_run_time"])

    sum_estimation_iters_avg = np.mean(np.sum(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    sum_estimation_iters_var = np.var(np.sum(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    avg_estimation_iters_avg = np.mean(np.mean(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    avg_estimation_iters_var = np.var(np.mean(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    max_estimation_iters_avg = np.mean(np.max(experiment_results[experiment_run]["num_iters_per_state"], axis=1))
    max_estimation_iters_var = np.var(np.max(experiment_results[experiment_run]["num_iters_per_state"], axis=1))

    estimation_convergence_result = {
        "partition_method": experiment_run,
        "estimation_run_time_avg": estimation_run_time_avg,
        "estimation_run_time_var": estimation_run_time_var,
        "sum_estimation_iters_avg": sum_estimation_iters_avg,
        "sum_estimation_iters_var": sum_estimation_iters_var,
        "avg_estimation_iters_avg": avg_estimation_iters_avg,
        "avg_estimation_iters_var": avg_estimation_iters_var,
        "max_estimation_iters_avg": max_estimation_iters_avg,
        "max_estimation_iters_var": max_estimation_iters_var,
    }
    
    estimation_convergence_results.append(estimation_convergence_result)

estimation_convergence_results_df = pd.DataFrame(estimation_convergence_results)

In [None]:
estimation_convergence_results_df

In [None]:
relevant_cols = ["partition_method", "sum_estimation_iters_avg", "avg_estimation_iters_avg", "max_estimation_iters_avg"] 
cols_naming = ["Partition", "Sum of Iters", "Average Iters", "Max Iters"]
latex_table_str = estimation_convergence_results_df.to_latex(index=False, columns=relevant_cols, header=cols_naming, float_format="%.2f", bold_rows=True)

with open("results/latex_table_11_partitions_export.txt", "w") as file:
    file.write(latex_table_str)

In [None]:
fixed_gamma = 0.95
convergence_results = {}

for experiment_run in experiment_results.keys():
    num_iters = experiment_results[experiment_run]["policy_evaluation"][fixed_gamma]["num_iters"]
    convergence_info = experiment_results[experiment_run]["policy_evaluation"][fixed_gamma]["convergence_info"]
    x_values = list(range(1, 50))

    plt.plot(x_values, convergence_info["max_dist"][1:50], label=experiment_run)

plt.legend(loc="upper right")
plt.xlabel("Iterations")
plt.ylabel("Max-DIST")
plt.title(f"Maximal convergence distance for gamma = {fixed_gamma}")
plt.show()

In [None]:
fixed_gamma = 0.95
convergence_results = {}

for experiment_run in experiment_results.keys():
    num_iters = experiment_results[experiment_run]["policy_evaluation"][fixed_gamma]["num_iters"]
    convergence_info = experiment_results[experiment_run]["policy_evaluation"][fixed_gamma]["convergence_info"]
    x_values = list(range(1, 50))

    plt.plot(x_values, convergence_info["avg_dist"][1:50], label=experiment_run)

plt.legend(loc="upper right")
plt.xlabel("Iterations")
plt.ylabel("L2-DIST")
plt.title(f"L2 convergence distance for gamma = {fixed_gamma}")
plt.show()

In [None]:
fixed_gamma = 0.95
num_iters_list = []

for experiment_run in experiment_results.keys():
    num_iters = experiment_results[experiment_run]["policy_evaluation"][fixed_gamma]["num_iters"]
    num_iters_dict = {"partition": experiment_run, "num_iters": num_iters}
    num_iters_list.append(num_iters_dict)

num_iters_df = pd.DataFrame(num_iters_list)

ax = sns.barplot(x="partition", y="num_iters", data=num_iters_df, hue="partition", saturation=8, dodge=False)
ax.set(xlabel="partition method", ylabel="Number of Iterations")
plt.title("Policy Evaluation Convergence")
plt.show()

In [None]:
num_iters_list = []

for experiment_run in experiment_results.keys():
    for gamma in experiment_results[experiment_run]["policy_evaluation"]:
        num_iters = experiment_results[experiment_run]["policy_evaluation"][gamma]["num_iters"]
        num_iters_dict = {"partition": experiment_run, "gamma": gamma, "num_iters": num_iters}
        num_iters_list.append(num_iters_dict)

num_iters_df = pd.DataFrame(num_iters_list)

ax = sns.lineplot(data=num_iters_df, x="gamma", y="num_iters", hue="partition")
ax.set(xlabel="discount factor gamma", ylabel="Number of Iterations")
plt.title("Policy Evaluation Convergence")
plt.show()