In [16]:
from Clustering import *
import matplotlib.pyplot as plt

In [17]:
num_repeats = 10

# Experiment 1:
Changing n, depending on the regime of TH

In [10]:
# range of n, number of contexts
n_range = range(100, 200, 20)
# n_range = range(100, 300, 20)
# n_range = range(100, 300, 20)
N = len(n_range)
# us = [0, 0.5, 1, 1.5, 2]  # TH = n (log n)^u
us = [0, 1, 2]  # TH = n (log n)^u
# number of actions, clusters
S, A = 2, 3
# symmetric binary case
eps = 0.2
latent_transitions = [np.array([[1 / 2 - eps, 1 / 2 + eps], [1 / 2 + eps, 1 / 2 - eps]]),
                      np.array([[1 / 2 + eps, 1 / 2 - eps], [1 / 2 - eps, 1 / 2 + eps]]),
                      np.array([[1 / 2, 1 / 2], [1 / 2, 1 / 2]])]

init_errors_mean1 = np.ones((len(us), len(n_range)))
init_errors_std1 = np.ones((len(us), len(n_range)))
final_errors_mean1 = np.ones((len(us), len(n_range)))
final_errors_std1 = np.ones((len(us), len(n_range)))

# main loop 1
for j, n in enumerate(n_range):
    # set H
    H = int(np.ceil(n / 2))
    # set environment
    env_config = {'n': n, 'H': H, 'S:': S, 'A': A,
                  'ps': latent_transitions,
                  'qs': 'uniform'}
    env = Synthetic(env_config)
    # true clusters
    f = {}
    for s in range(env.S):
        cluster = env.partitions[s]
        for x in range(cluster.start, cluster.start + cluster.n):
            f[x] = s
    # main loop 2
    for i, u in enumerate(us):
        print(f"#### n = {n}, TH = n (log n)^{u} ### \n")
        # set T
        T = int(np.ceil(n * A * (np.log(n * A) ** u)) / H)
        init_errors, final_errors = [], []
        for _ in range(num_repeats):
            # obtain trajectories
            trajectories = generate_trajectories(T, env)

            # initial spectral clustering
            f_1 = init_spectral(env, trajectories)
            init_err_rate = error_rate(f, f_1, env.n, env.S)
            init_errors.append(init_err_rate)
            # print("Error rate after initial clustering is ", init_err_rate)

            # likelihood_improvement
            # f_final, errors = likelihood_improvement(env, trajectories, f_1, f, num_iter=10)
            f_final, errors = likelihood_improvement(env, trajectories, f_1, f, num_iter=None)
            final_errors.append(errors[-1])
            # print("Final error rate is ", errors[-1])
            # print("Errors along the improvement steps: ", errors)

        print("initial: ", init_errors)
        print("final: ", final_errors)
        # logging
        init_errors_mean1[i][j] = np.mean(init_errors)
        init_errors_std1[i][j] = np.std(init_errors)
        final_errors_mean1[i][j] = np.mean(final_errors)
        final_errors_std1[i][j] = np.std(final_errors)

#### n = 100, TH = n (log n)^0 ### 

initial:  [0.46, 0.47, 0.45, 0.48, 0.47, 0.49, 0.49, 0.48, 0.45, 0.41]
final:  [0.5, 0.44, 0.46, 0.5, 0.5, 0.5, 0.5, 0.43, 0.46, 0.45]
#### n = 100, TH = n (log n)^1 ### 

initial:  [0.49, 0.44, 0.46, 0.44, 0.48, 0.42, 0.42, 0.48, 0.42, 0.48]
final:  [0.42, 0.48, 0.47, 0.38, 0.44, 0.39, 0.33, 0.46, 0.48, 0.48]
#### n = 100, TH = n (log n)^2 ### 

initial:  [0.02, 0.06, 0.34, 0.34, 0.27, 0.37, 0.49, 0.28, 0.48, 0.47]
final:  [0.0, 0.0, 0.01, 0.0, 0.0, 0.01, 0.11, 0.01, 0.5, 0.46]
#### n = 120, TH = n (log n)^0 ### 



IndexError: list index out of range

In [None]:
# FINAL PLOTS
fig, axs = plt.subplots(len(us), sharex=True)
fig.suptitle("Experiment 1: Varying n and u")
for i, u in enumerate(us):
    axs[i].errorbar(n_range, init_errors_mean1[i], yerr=1.96*init_errors_std1/np.sqrt(num_repeats), marker="o")
    axs[i].errorbar(n_range, final_errors_mean1[i], yerr=1.96*final_errors_std1/np.sqrt(num_repeats), marker="o")
    axs[i].title(f"TH = n (log n)^{u}")
    axs[i].legend(["spectral","spectral + improvement"])
    axs.xlabel("n")
    axs.ylabel("error rate")
fig.savefig("plot_exp1.pdf", dpi=500)

np.savez_compressed("exp1", init_mean=init_errors_mean1, init_std=init_errors_std1, final_mean=final_errors_mean1, final_std=final_errors_std1)