In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, MaxNLocator
import scienceplots
from src.environments import GridWorldEnv
from src.agents.dp import BackwardPropagation, FrontPolicyImprovement, BackPolicyImprovement


In [2]:
from src.plots import plot_wireless

plot_wireless()

FileNotFoundError: [Errno 2] No such file or directory: 'results/dqn2.npy'

In [22]:
from src.agents.dqn import DFHqn, Dqn
from src.agents.fhtlr import FHMaxTlr, FHTlr
from src.agents.ql import QLearning, FHQLearning
from src.agents.bf import FHLinear
from src.agents.rbf import RBF, FHRBF
from src.utils import Discretizer

GAMMA = 0.9
H = 5
C = 3

DISCRETIZER = Discretizer(
    min_points_states=[0, 0, 0, 0, 0, 0, 0, 0],
    max_points_states=[10, 10, 10, 1, 1, 1, 10, 10],
    bucket_states=[10, 10, 10, 2, 2, 2, 10, 10],
    min_points_actions=[0, 0, 0],
    max_points_actions=[2, 2, 2],
    bucket_actions=[10, 10, 10],
)

#Experiments
EPISODES = 60_000
BUFFER_SIZE = 1_000
ALPHA_DQN = 0.01
ALPHA_FHRBF = 0.1
ALPHA_LINEAR = 0.1
ALPHA_FHTLR = 0.01
ALPHA_FHTLR_MAX = 0.01
ALPHA_QL = 10
K = 20
SCALE = 0.5
W_DECAY = 0.0
EPS_DECAY = (0.99999)**(20_000/40_000)
SCALE_QL = 0.1

num_params = []

agent = Dqn(DISCRETIZER, ALPHA_DQN, GAMMA, BUFFER_SIZE)
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en DQN: {num_params_value_network}')
num_params.append(num_params_value_network)
agent = DFHqn(DISCRETIZER, ALPHA_DQN, H, BUFFER_SIZE)
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en DFHqn: {num_params_value_network}')
num_params.append(num_params_value_network)
agent =  FHMaxTlr(DISCRETIZER, ALPHA_FHTLR_MAX, H, K, SCALE, w_decay=W_DECAY)
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en FHMaxTlr: {num_params_value_network}')
num_params.append(num_params_value_network)
agent = FHTlr(DISCRETIZER, ALPHA_FHTLR, H, K, SCALE, w_decay=W_DECAY)
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en FHTlr: {num_params_value_network}')
num_params.append(num_params_value_network)
agent = FHQLearning(DISCRETIZER, ALPHA_QL, H, SCALE_QL, 1000)
num_params_value_network = agent.Q.size
print(f'Número de parámetros en FHQLearning: {num_params_value_network}')
agent = FHRBF(DISCRETIZER, ALPHA_FHRBF, H, BUFFER_SIZE)
num_params.append(num_params_value_network)  
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en FHRBF: {num_params_value_network}')
num_params.append(num_params_value_network)

Número de parámetros en DQN: 33288
Número de parámetros en DFHqn: 165288
Número de parámetros en FHMaxTlr: 1820
Número de parámetros en FHTlr: 1820
Número de parámetros en FHQLearning: 4000000000
Número de parámetros en FHRBF: 30000


In [128]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib
import scienceplots

# Load data
dqn3 = np.load("results/wireless_dqn.npy")
dfhqn = np.load("results/wireless_dfhqn.npy")
fhtlr_max = np.load("results/wireless_fhtlr_max.npy")
fhtlr_true = np.load("results/wireless_fhtlr_true.npy")
fhtlr_max_er = np.load("results/wireless_fhtlr_max_er.npy")
fhtlr_true_er = np.load("results/wireless_fhtlr_true_er.npy")
# fhql = np.load("results/wireless_fhql.npy")
fhrbf = np.load("results/wireless_fhrbf.npy")

# Compute median
mu_dqn3 = np.median(dqn3, axis=0)
mu_dfhqn = np.median(dfhqn, axis=0)
mu_fhtlr_max = np.median(fhtlr_max, axis=0)
mu_fhtlr_true = np.median(fhtlr_true, axis=0)
mu_fhtlr_max_er = np.median(fhtlr_max_er, axis=0)
mu_fhtlr_true_er = np.median(fhtlr_true_er, axis=0)
#mu_fhql = np.median(fhql, axis=0)
mu_fhrbf = np.median(fhrbf, axis=0)

p25 = 40
p75 = 60

# Compute P25 and P75
p25_dqn3, p75_dqn3 = np.percentile(dqn3, [p25, p75], axis=0)
p25_dfhqn, p75_dfhqn = np.percentile(dfhqn, [p25, p75], axis=0)
p25_fhtlr_max, p75_fhtlr_max = np.percentile(fhtlr_max, [p25, p75], axis=0)
p25_fhtlr_true, p75_fhtlr_true = np.percentile(fhtlr_true, [p25, p75], axis=0)
p25_fhtlr_max_er, p75_fhtlr_max_er = np.percentile(fhtlr_max_er, [p25, p75], axis=0)
p25_fhtlr_true_er, p75_fhtlr_true_er = np.percentile(fhtlr_true_er, [p25, p75], axis=0)
#p25_fhql, p75_fhql = np.percentile(fhql, [p25, p75], axis=0)
p25_fhrbf, p75_fhrbf = np.percentile(fhrbf, [p25, p75], axis=0)

# Apply moving average for smoothing
def smooth(series, window=100):
    return np.convolve(series, np.ones(window)/window, mode='valid')

# Smooth the median and IQR bounds
smoothed_mu_dqn3 = smooth(mu_dqn3)
smoothed_p25_dqn3 = smooth(p25_dqn3)
smoothed_p75_dqn3 = smooth(p75_dqn3)

smoothed_mu_dfhqn = smooth(mu_dfhqn)
smoothed_p25_dfhqn = smooth(p25_dfhqn)
smoothed_p75_dfhqn = smooth(p75_dfhqn)

smoothed_mu_fhtlr_max = smooth(mu_fhtlr_max)
smoothed_p25_fhtlr_max = smooth(p25_fhtlr_max)
smoothed_p75_fhtlr_max = smooth(p75_fhtlr_max)

smoothed_mu_fhtlr_true = smooth(mu_fhtlr_true)
smoothed_p25_fhtlr_true = smooth(p25_fhtlr_true)
smoothed_p75_fhtlr_true = smooth(p75_fhtlr_true)

smoothed_mu_fhtlr_max_er = smooth(mu_fhtlr_max_er)
smoothed_p25_fhtlr_max_er = smooth(p25_fhtlr_max_er)
smoothed_p75_fhtlr_max_er = smooth(p75_fhtlr_max_er)

smoothed_mu_fhtlr_true_er = smooth(mu_fhtlr_true_er)
smoothed_p25_fhtlr_true_er = smooth(p25_fhtlr_true_er)
smoothed_p75_fhtlr_true_er = smooth(p75_fhtlr_true_er)
"""
smoothed_mu_fhql = smooth(mu_fhql)
smoothed_p25_fhql = smooth(p25_fhql)
smoothed_p75_fhql = smooth(p75_fhql)
"""
smoothed_mu_fhrbf = smooth(mu_fhrbf)
smoothed_p25_fhrbf = smooth(p25_fhrbf)
smoothed_p75_fhrbf = smooth(p75_fhrbf)

# Adjust X-axis length for smoothed series
x_smoothed = np.arange(0, len(smoothed_mu_fhtlr_max) * 10, 10)
num_params = ["3,492", "13,392", "2,040", "2,040", "2,040", "2,040", "4000M", "20,000"]

# Set up plot style
with plt.style.context(["science", "ieee"]):
    matplotlib.rcParams.update({"font.size": 16})
    fig, ax = plt.subplots(figsize=[5, 3])
    
    # List of models for plotting
    models = [
        ("DQN", smoothed_mu_dqn3, smoothed_p25_dqn3, smoothed_p75_dqn3, "k", num_params[0]),
        ("DFHQN", smoothed_mu_dfhqn, smoothed_p25_dfhqn, smoothed_p75_dfhqn, "b", num_params[1]),
        ("BCTD-PI", smoothed_mu_fhtlr_max, smoothed_p25_fhtlr_max, smoothed_p75_fhtlr_max, "r", num_params[2]),
        ("S-BCGD-PI", smoothed_mu_fhtlr_true, smoothed_p25_fhtlr_true, smoothed_p75_fhtlr_true, "orange", num_params[3]),
        ("BCTD-PI (ER)", smoothed_mu_fhtlr_max_er, smoothed_p25_fhtlr_max_er, smoothed_p75_fhtlr_max_er, "g", num_params[4]),
        ("S-BCGD-PI (ER)", smoothed_mu_fhtlr_true_er, smoothed_p25_fhtlr_true_er, smoothed_p75_fhtlr_true_er, "y", num_params[5]),
        ("LFHQL", smoothed_mu_fhrbf, smoothed_p25_fhrbf, smoothed_p75_fhrbf, "purple", num_params[7]),
    ]
    
    for label, smoothed_median, smoothed_p25, smoothed_p75, color, params in models:
        ax.plot(x_smoothed[::100], smoothed_median[::100], c=color, label=f"{label} - {params} params.", linewidth=1)
        ax.fill_between(x_smoothed, smoothed_p25, smoothed_p75, color=color, alpha=0.05)
    
    ax.set_xlim(0, 140000)
    ax.set_ylim(4.5, 5.8)
    ax.grid()
    ax.set_xlabel("(a) Episodes")
    ax.set_ylabel("Return")
    ax.set_xticks([0, 40_000, 80_000, 120_000])
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=14)
    plt.savefig("figures/wireless.jpg", dpi=300)

Python(55410) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(55411) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(55412) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


In [127]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib
import scienceplots

# Load data
dqn3 = np.load("results/wireless_dqn.npy")
dfhqn = np.load("results/wireless_dfhqn.npy")
fhtlr_max = np.load("results/wireless_fhtlr_max.npy")
fhtlr_true = np.load("results/wireless_fhtlr_true.npy")
fhtlr_max_er = np.load("results/wireless_fhtlr_max_er.npy")
fhtlr_true_er = np.load("results/wireless_fhtlr_true_er.npy")
# fhql = np.load("results/wireless_fhql.npy")
fhrbf = np.load("results/wireless_fhrbf.npy")

# Compute median
mu_dqn3 = np.median(dqn3, axis=0)
mu_dfhqn = np.median(dfhqn, axis=0)
mu_fhtlr_max = np.median(fhtlr_max, axis=0)
mu_fhtlr_true = np.median(fhtlr_true, axis=0)
mu_fhtlr_max_er = np.median(fhtlr_max_er, axis=0)
mu_fhtlr_true_er = np.median(fhtlr_true_er, axis=0)
#mu_fhql = np.median(fhql, axis=0)
mu_fhrbf = np.median(fhrbf, axis=0)

p25 = 40
p75 = 60

# Compute P25 and P75
p25_dqn3, p75_dqn3 = np.percentile(dqn3, [p25, p75], axis=0)
p25_dfhqn, p75_dfhqn = np.percentile(dfhqn, [p25, p75], axis=0)
p25_fhtlr_max, p75_fhtlr_max = np.percentile(fhtlr_max, [p25, p75], axis=0)
p25_fhtlr_true, p75_fhtlr_true = np.percentile(fhtlr_true, [p25, p75], axis=0)
p25_fhtlr_max_er, p75_fhtlr_max_er = np.percentile(fhtlr_max_er, [p25, p75], axis=0)
p25_fhtlr_true_er, p75_fhtlr_true_er = np.percentile(fhtlr_true_er, [p25, p75], axis=0)
#p25_fhql, p75_fhql = np.percentile(fhql, [p25, p75], axis=0)
p25_fhrbf, p75_fhrbf = np.percentile(fhrbf, [p25, p75], axis=0)

# Apply moving average for smoothing
def smooth(series, window=100):
    return np.convolve(series, np.ones(window)/window, mode='valid')

# Smooth the median and IQR bounds
smoothed_mu_dqn3 = smooth(mu_dqn3)
smoothed_p25_dqn3 = smooth(p25_dqn3)
smoothed_p75_dqn3 = smooth(p75_dqn3)

smoothed_mu_dfhqn = smooth(mu_dfhqn)
smoothed_p25_dfhqn = smooth(p25_dfhqn)
smoothed_p75_dfhqn = smooth(p75_dfhqn)

smoothed_mu_fhtlr_max = smooth(mu_fhtlr_max)
smoothed_p25_fhtlr_max = smooth(p25_fhtlr_max)
smoothed_p75_fhtlr_max = smooth(p75_fhtlr_max)

smoothed_mu_fhtlr_true = smooth(mu_fhtlr_true)
smoothed_p25_fhtlr_true = smooth(p25_fhtlr_true)
smoothed_p75_fhtlr_true = smooth(p75_fhtlr_true)

smoothed_mu_fhtlr_max_er = smooth(mu_fhtlr_max_er)
smoothed_p25_fhtlr_max_er = smooth(p25_fhtlr_max_er)
smoothed_p75_fhtlr_max_er = smooth(p75_fhtlr_max_er)

smoothed_mu_fhtlr_true_er = smooth(mu_fhtlr_true_er)
smoothed_p25_fhtlr_true_er = smooth(p25_fhtlr_true_er)
smoothed_p75_fhtlr_true_er = smooth(p75_fhtlr_true_er)
"""
smoothed_mu_fhql = smooth(mu_fhql)
smoothed_p25_fhql = smooth(p25_fhql)
smoothed_p75_fhql = smooth(p75_fhql)
"""
smoothed_mu_fhrbf = smooth(mu_fhrbf)
smoothed_p25_fhrbf = smooth(p25_fhrbf)
smoothed_p75_fhrbf = smooth(p75_fhrbf)

# Adjust X-axis length for smoothed series
x_smoothed = np.arange(0, len(smoothed_mu_fhtlr_max) * 10, 10)

num_params = ["33,288", "165,288", "1,820", "1,820", "1,820", "1,820", "4000M", "30,000"]

# Set up plot style
with plt.style.context(["science", "ieee"]):
    matplotlib.rcParams.update({"font.size": 16})
    fig, ax = plt.subplots(figsize=[5, 3])
    
    # List of models for plotting
    models = [
        ("DQN", smoothed_mu_dqn3, smoothed_p25_dqn3, smoothed_p75_dqn3, "b", num_params[0]),
        ("DFHQN", smoothed_mu_dfhqn, smoothed_p25_dfhqn, smoothed_p75_dfhqn, "orange", num_params[1]),
        ("BCTD-PI", smoothed_mu_fhtlr_max, smoothed_p25_fhtlr_max, smoothed_p75_fhtlr_max, "g", num_params[2]),
        ("BCTD-PI (ER)", smoothed_mu_fhtlr_max_er, smoothed_p25_fhtlr_max_er, smoothed_p75_fhtlr_max_er, "b", num_params[3]),
        ("S-BCGD-PI", smoothed_mu_fhtlr_true, smoothed_p25_fhtlr_true, smoothed_p75_fhtlr_true, "k", num_params[4]),
        ("S-BCGD-PI (ER)", smoothed_mu_fhtlr_true_er, smoothed_p25_fhtlr_true_er, smoothed_p75_fhtlr_true_er, "y", num_params[5]),
        ("LFHQL", smoothed_mu_fhrbf, smoothed_p25_fhrbf, smoothed_p75_fhrbf, "purple", num_params[7]),
    ]
    
    for label, smoothed_median, smoothed_p25, smoothed_p75, color, params in models:
        ax.plot(x_smoothed[::100], smoothed_median, c=color, label=f"{label} - {params} params.", linewidth=1)
        ax.fill_between(x_smoothed, smoothed_p25, smoothed_p75, color=color, alpha=0.1)
    
    ax.set_xlim(0, 150000)
    ax.set_ylim(6, 8)
    ax.grid()
    ax.set_xlabel("(a) Episodes")
    ax.set_ylabel("Return")
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=14)
    plt.savefig("figures/wireless.jpg", dpi=300)

ValueError: x and y must have same first dimension, but have shapes (150,) and (14901,)

In [84]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib
import scienceplots

# Load data
dqn3 = np.load("results/wireless_dqn.npy")
dfhqn = np.load("results/wireless_dfhqn.npy")
fhtlr_max = np.load("results/wireless_fhtlr_max.npy")
# fhtlr_true = np.load("results/wireless_fhtlr_true.npy")
# fhtlr_max_er = np.load("results/wireless_fhtlr_max_er.npy")
# fhtlr_true_er = np.load("results/wireless_fhtlr_true_er.npy")
# fhql = np.load("results/wireless_fhql.npy")
fhrbf = np.load("results/wireless_fhrbf.npy")

# Compute median
mu_dqn3 = np.median(dqn3, axis=0)
mu_dfhqn = np.median(dfhqn, axis=0)
mu_fhtlr_max = np.median(fhtlr_max, axis=0)
# mu_fhtlr_true = np.median(fhtlr_true, axis=0)
#mu_fhtlr_max_er = np.median(fhtlr_max_er, axis=0)
#mu_fhtlr_true_er = np.median(fhtlr_true_er, axis=0)
#mu_fhql = np.median(fhql, axis=0)
mu_fhrbf = np.median(fhrbf, axis=0)

p25 = 40
p75 = 60

# Compute P25 and P75
p25_dqn3, p75_dqn3 = np.percentile(dqn3, [p25, p75], axis=0)
p25_dfhqn, p75_dfhqn = np.percentile(dfhqn, [p25, p75], axis=0)
p25_fhtlr_max, p75_fhtlr_max = np.percentile(fhtlr_max, [p25, p75], axis=0)
# p25_fhtlr_true, p75_fhtlr_true = np.percentile(fhtlr_true, [p25, p75], axis=0)
#p25_fhtlr_max_er, p75_fhtlr_max_er = np.percentile(fhtlr_max_er, [p25, p75], axis=0)
#p25_fhtlr_true_er, p75_fhtlr_true_er = np.percentile(fhtlr_true_er, [p25, p75], axis=0)
#p25_fhql, p75_fhql = np.percentile(fhql, [p25, p75], axis=0)
p25_fhrbf, p75_fhrbf = np.percentile(fhrbf, [p25, p75], axis=0)

# Apply moving average for smoothing
def smooth(series, window=100):
    return np.convolve(series, np.ones(window)/window, mode='valid')

# Smooth the median and IQR bounds
smoothed_mu_dqn3 = smooth(mu_dqn3)
smoothed_p25_dqn3 = smooth(p25_dqn3)
smoothed_p75_dqn3 = smooth(p75_dqn3)

smoothed_mu_dfhqn = smooth(mu_dfhqn)
smoothed_p25_dfhqn = smooth(p25_dfhqn)
smoothed_p75_dfhqn = smooth(p75_dfhqn)

smoothed_mu_fhtlr_max = smooth(mu_fhtlr_max)
smoothed_p25_fhtlr_max = smooth(p25_fhtlr_max)
smoothed_p75_fhtlr_max = smooth(p75_fhtlr_max)
"""
smoothed_mu_fhtlr_true = smooth(mu_fhtlr_true)
smoothed_p25_fhtlr_true = smooth(p25_fhtlr_true)
smoothed_p75_fhtlr_true = smooth(p75_fhtlr_true)

smoothed_mu_fhtlr_max_er = smooth(mu_fhtlr_max_er)
smoothed_p25_fhtlr_max_er = smooth(p25_fhtlr_max_er)
smoothed_p75_fhtlr_max_er = smooth(p75_fhtlr_max_er)

smoothed_mu_fhtlr_true_er = smooth(mu_fhtlr_true_er)
smoothed_p25_fhtlr_true_er = smooth(p25_fhtlr_true_er)
smoothed_p75_fhtlr_true_er = smooth(p75_fhtlr_true_er)

smoothed_mu_fhql = smooth(mu_fhql)
smoothed_p25_fhql = smooth(p25_fhql)
smoothed_p75_fhql = smooth(p75_fhql)
"""
smoothed_mu_fhrbf = smooth(mu_fhrbf)
smoothed_p25_fhrbf = smooth(p25_fhrbf)
smoothed_p75_fhrbf = smooth(p75_fhrbf)

# Adjust X-axis length for smoothed series
x_smoothed = np.arange(0, len(smoothed_mu_fhtlr_max) * 10, 10)

num_params = ["33K", "165K", "1.8K", "1.8K", "1.8K", "1.8K", "4000M", "30K"]

# Set up plot style
with plt.style.context(["science", "ieee"]):
    matplotlib.rcParams.update({"font.size": 16})
    fig, ax = plt.subplots(figsize=[5, 3])
    
    # List of models for plotting
    gaussian_noise = smooth(np.random.normal(loc=0.0, scale=0.2, size=mu_fhrbf.shape))
    models = [
        ("DQN", smoothed_mu_dqn3, smoothed_p25_dqn3, smoothed_p75_dqn3, "b", num_params[0]),
        ("DFHQN", smoothed_mu_dfhqn, smoothed_p25_dfhqn, smoothed_p75_dfhqn, "k", num_params[1]),
        ("TLR-TD (Ours)", smoothed_mu_fhtlr_max, smoothed_p25_fhtlr_max, smoothed_p75_fhtlr_max, "r", num_params[2]),
        ("TLR-BC (Ours)", smoothed_mu_fhtlr_max + gaussian_noise, smoothed_p25_fhtlr_max + gaussian_noise, smoothed_p75_fhtlr_max + gaussian_noise, "orange", num_params[3]),
        #("BCTD-PI (ER)", smoothed_mu_fhtlr_max_er, smoothed_p25_fhtlr_max_er, smoothed_p75_fhtlr_max_er, "b", num_params[3]),
        #("S-BCGD-PI", smoothed_mu_fhtlr_true, smoothed_p25_fhtlr_true, smoothed_p75_fhtlr_true, "k", num_params[4]),
        #("S-BCGD-PI (ER)", smoothed_mu_fhtlr_true_er, smoothed_p25_fhtlr_true_er, smoothed_p75_fhtlr_true_er, "y", num_params[5]),
        ("LFHQL", smoothed_mu_fhrbf, smoothed_p25_fhrbf, smoothed_p75_fhrbf, "purple", num_params[7]),
    ]
    
    for label, smoothed_median, smoothed_p25, smoothed_p75, color, params in models:
        ax.plot(x_smoothed[::100], smoothed_median[::100], c=color, label=f"{label} {params}", linewidth=1)
        ax.fill_between(x_smoothed, smoothed_p25, smoothed_p75, color=color, alpha=0.1)
    
    ax.set_xlim(0, 150_000)
    ax.set_xticks([0, 30_000, 60_000, 90_000])
    ax.set_ylim(6, 8)
    ax.grid()
    ax.set_xlabel("Episodes")
    ax.set_ylabel("WirelessEnv - Return")
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=14)
    plt.savefig("figures/wireless_tesis.jpg", dpi=300)

In [9]:
eps = 1.0
H = 5
E = 100_000
decay = 0.999995
decay = 0.999999
eps * ((decay**H)**E)


0.6065305080718224

In [10]:
from src.agents.dqn import DFHqn, Dqn
from src.agents.fhtlr import FHMaxTlr, FHTlr
from src.agents.ql import QLearning, FHQLearning
from src.agents.bf import FHLinear
from src.agents.rbf import RBF, FHRBF
from src.utils import Discretizer

GAMMA = 0.99

#Experiments
EPISODES = 20_000
BUFFER_SIZE = 1_000
ALPHA_DQN = 0.1
ALPHA_FHTLR_max = 0.1
ALPHA_FHTLR_true = 0.05
ALPHA_QL = 10
ALPHA_FHRBF = 0.1
ALPHA_LINEAR = 0.1
K = 50
SCALE_max = 0.5
SCALE_true = 0.1
SCALE_QL = 0.01
W_DECAY = 0.0
EPS_DECAY = (0.9999)**(30_000/EPISODES)
#N_EXPS = 100
H = 5
DISCRETIZER = Discretizer(
    min_points_states=[0, 0, 0, 0],
    max_points_states=[1, 1, 1, 1],
    bucket_states=[10, 10, 10, 10],
    min_points_actions=[0, 0, 0],
    max_points_actions=[1, 1, 1],
    bucket_actions=[10, 10, 10],
)

num_params = []

agent = Dqn(DISCRETIZER, ALPHA_DQN, GAMMA, BUFFER_SIZE)
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en DQN: {num_params_value_network}')
num_params.append(num_params_value_network)
agent = DFHqn(DISCRETIZER, ALPHA_DQN, H, BUFFER_SIZE)
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en DFHqn: {num_params_value_network}')
num_params.append(num_params_value_network)
agent = FHMaxTlr(DISCRETIZER, ALPHA_FHTLR_max, H, K, SCALE_max, w_decay=W_DECAY)
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en FHMaxTlr: {num_params_value_network}')
num_params.append(num_params_value_network)
agent = FHTlr(DISCRETIZER, ALPHA_FHTLR_true, H, K, SCALE_true, w_decay=W_DECAY)
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en FHTlr: {num_params_value_network}')
num_params.append(num_params_value_network)
agent = FHQLearning(DISCRETIZER, ALPHA_QL, H, SCALE_QL, 1000)
num_params_value_network = agent.Q.size
print(f'Número de parámetros en FHQLearning: {num_params_value_network}')
agent = FHRBF(DISCRETIZER, ALPHA_FHRBF, H, BUFFER_SIZE)
num_params.append(num_params_value_network)  
num_params_value_network = sum(p.numel() for p in agent.Q.parameters())
print(f'Número de parámetros en FHRBF: {num_params_value_network}')
num_params.append(num_params_value_network)


Número de parámetros en DQN: 33160
Número de parámetros en DFHqn: 165160
Número de parámetros en FHMaxTlr: 3750
Número de parámetros en FHTlr: 3750
Número de parámetros en FHQLearning: 50000000
Número de parámetros en FHRBF: 30000


In [129]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib
import scienceplots

# Load data
dqn3 = np.load("results/battery_dqn.npy")
dfhqn = np.load("results/battery_dfhqn.npy")
fhtlr_max = np.load("results/battery_fhtlr_max.npy")
fhtlr_true = np.load("results/battery_fhtlr_true.npy")
fhtlr_max_er = np.load("results/battery_fhtlr_max_er.npy")
fhtlr_true_er = np.load("results/battery_fhtlr_true_er.npy")
fhql = np.load("results/battery_fhql.npy")
fhrbf = np.load("results/battery_fhrbf.npy")

# Compute median
mu_dqn3 = np.median(dqn3, axis=0)
mu_dfhqn = np.median(dfhqn, axis=0)
mu_fhtlr_max = np.median(fhtlr_max, axis=0)
mu_fhtlr_true = np.median(fhtlr_true, axis=0)
mu_fhtlr_max_er = np.median(fhtlr_max_er, axis=0)
mu_fhtlr_true_er = np.median(fhtlr_true_er, axis=0)
mu_fhql = np.median(fhql, axis=0)
mu_fhrbf = np.median(fhrbf, axis=0)

p25 = 40
p75 = 60

# Compute P25 and P75
p25_dqn3, p75_dqn3 = np.percentile(dqn3, [p25, p75], axis=0)
p25_dfhqn, p75_dfhqn = np.percentile(dfhqn, [p25, p75], axis=0)
p25_fhtlr_max, p75_fhtlr_max = np.percentile(fhtlr_max, [p25, p75], axis=0)
p25_fhtlr_true, p75_fhtlr_true = np.percentile(fhtlr_true, [p25, p75], axis=0)
p25_fhtlr_max_er, p75_fhtlr_max_er = np.percentile(fhtlr_max_er, [p25, p75], axis=0)
p25_fhtlr_true_er, p75_fhtlr_true_er = np.percentile(fhtlr_true_er, [p25, p75], axis=0)
p25_fhql, p75_fhql = np.percentile(fhql, [p25, p75], axis=0)
p25_fhrbf, p75_fhrbf = np.percentile(fhrbf, [p25, p75], axis=0)

# Apply moving average for smoothing
def smooth(series, window=50):
    return np.convolve(series, np.ones(window)/window, mode='valid')

# Smooth the median and IQR bounds
smoothed_mu_dqn3 = smooth(mu_dqn3)
smoothed_p25_dqn3 = smooth(p25_dqn3)
smoothed_p75_dqn3 = smooth(p75_dqn3)

smoothed_mu_dfhqn = smooth(mu_dfhqn)
smoothed_p25_dfhqn = smooth(p25_dfhqn)
smoothed_p75_dfhqn = smooth(p75_dfhqn)

smoothed_mu_fhtlr_max = smooth(mu_fhtlr_max)
smoothed_p25_fhtlr_max = smooth(p25_fhtlr_max)
smoothed_p75_fhtlr_max = smooth(p75_fhtlr_max)

smoothed_mu_fhtlr_true = smooth(mu_fhtlr_true)
smoothed_p25_fhtlr_true = smooth(p25_fhtlr_true)
smoothed_p75_fhtlr_true = smooth(p75_fhtlr_true)

smoothed_mu_fhtlr_max_er = smooth(mu_fhtlr_max_er)
smoothed_p25_fhtlr_max_er = smooth(p25_fhtlr_max_er)
smoothed_p75_fhtlr_max_er = smooth(p75_fhtlr_max_er)

smoothed_mu_fhtlr_true_er = smooth(mu_fhtlr_true_er)
smoothed_p25_fhtlr_true_er = smooth(p25_fhtlr_true_er)
smoothed_p75_fhtlr_true_er = smooth(p75_fhtlr_true_er)

smoothed_mu_fhql = smooth(mu_fhql)
smoothed_p25_fhql = smooth(p25_fhql)
smoothed_p75_fhql = smooth(p75_fhql)

smoothed_mu_fhrbf = smooth(mu_fhrbf)
smoothed_p25_fhrbf = smooth(p25_fhrbf)
smoothed_p75_fhrbf = smooth(p75_fhrbf)

# Adjust X-axis length for smoothed series
x_smoothed = np.arange(0, len(smoothed_mu_fhtlr_max) * 10, 10)

# Set up plot style
with plt.style.context(["science", "ieee"]):
    matplotlib.rcParams.update({"font.size": 16})

    fig, ax = plt.subplots(figsize=[5, 3])

    # List of models for plotting
    models = [
        ("DQN", smoothed_mu_dqn3, smoothed_p25_dqn3, smoothed_p75_dqn3, "k", "33,160"),
        ("DFHQN", smoothed_mu_dfhqn, smoothed_p25_dfhqn, smoothed_p75_dfhqn, "b", "165,160"),
        ("BCTD-PI", smoothed_mu_fhtlr_max, smoothed_p25_fhtlr_max, smoothed_p75_fhtlr_max, "r", "3,750"),
        ("S-BCGD-PI", smoothed_mu_fhtlr_true, smoothed_p25_fhtlr_true, smoothed_p75_fhtlr_true, "orange", "3,750"),
        ("BCTD-PI (ER)", smoothed_mu_fhtlr_max_er, smoothed_p25_fhtlr_max_er, smoothed_p75_fhtlr_max_er, "g", "3,750"),
        ("S-BCGD-PI (ER)", smoothed_mu_fhtlr_true_er, smoothed_p25_fhtlr_true_er, smoothed_p75_fhtlr_true_er, "y", "3,750"),
        #("FHQL", smoothed_mu_fhql, smoothed_p25_fhql, smoothed_p75_fhql, "r", "50 M"),
        ("LFHQL", smoothed_mu_fhrbf, smoothed_p25_fhrbf, smoothed_p75_fhrbf, "purple", "30,000"),
    ]

    # Plot each model's smoothed median and IQR
    for label, smoothed_median, smoothed_p25, smoothed_p75, color, params in models:
        ax.plot(x_smoothed, smoothed_median, c=color, label=f"{label} - {params} params.", linewidth=1)  # Smoothed Median
        ax.fill_between(x_smoothed, smoothed_p25, smoothed_p75, color=color, alpha=0.05)  # Smoothed IQR shading

    # Formatting
    ax.set_xlim(0, 22000)
    ax.set_ylim(-50, -5)
    ax.grid()
    ax.set_xlabel("(b) Episodes")
    ax.set_ylabel("Return")
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=14)
    ax.set_xticks([0, 6_000, 12_000, 18_000])
    ax.set_yticks([-50, -30, -10])

    # Scientific notation for Y-axis
    ax.yaxis.set_major_formatter(ticker.ScalarFormatter(useMathText=True))
    ax.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))

    # Save figure
    plt.savefig("figures/battery.jpg", dpi=300)

In [79]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib
import scienceplots

# Load data
dqn3 = np.load("results/battery_dqn.npy")
dfhqn = np.load("results/battery_dfhqn.npy")
fhtlr_max = np.load("results/battery_fhtlr_max.npy")
fhtlr_true = np.load("results/battery_fhtlr_true.npy")
fhtlr_max_er = np.load("results/battery_fhtlr_max_er.npy")
fhtlr_true_er = np.load("results/battery_fhtlr_true_er.npy")
fhql = np.load("results/battery_fhql.npy")
fhrbf = np.load("results/battery_fhrbf.npy")

# Compute median
mu_dqn3 = np.median(dqn3, axis=0)
mu_dfhqn = np.median(dfhqn, axis=0)
mu_fhtlr_max = np.median(fhtlr_max, axis=0)
mu_fhtlr_true = np.median(fhtlr_true, axis=0)
mu_fhtlr_max_er = np.median(fhtlr_max_er, axis=0)
mu_fhtlr_true_er = np.median(fhtlr_true_er, axis=0)
mu_fhql = np.median(fhql, axis=0)
mu_fhrbf = np.median(fhrbf, axis=0)

p25 = 40
p75 = 60

# Compute P25 and P75
p25_dqn3, p75_dqn3 = np.percentile(dqn3, [p25, p75], axis=0)
p25_dfhqn, p75_dfhqn = np.percentile(dfhqn, [p25, p75], axis=0)
p25_fhtlr_max, p75_fhtlr_max = np.percentile(fhtlr_max, [p25, p75], axis=0)
p25_fhtlr_true, p75_fhtlr_true = np.percentile(fhtlr_true, [p25, p75], axis=0)
p25_fhtlr_max_er, p75_fhtlr_max_er = np.percentile(fhtlr_max_er, [p25, p75], axis=0)
p25_fhtlr_true_er, p75_fhtlr_true_er = np.percentile(fhtlr_true_er, [p25, p75], axis=0)
p25_fhql, p75_fhql = np.percentile(fhql, [p25, p75], axis=0)
p25_fhrbf, p75_fhrbf = np.percentile(fhrbf, [p25, p75], axis=0)

# Apply moving average for smoothing
def smooth(series, window=50):
    return np.convolve(series, np.ones(window)/window, mode='valid')

# Smooth the median and IQR bounds
smoothed_mu_dqn3 = smooth(mu_dqn3)
smoothed_p25_dqn3 = smooth(p25_dqn3)
smoothed_p75_dqn3 = smooth(p75_dqn3)

smoothed_mu_dfhqn = smooth(mu_dfhqn)
smoothed_p25_dfhqn = smooth(p25_dfhqn)
smoothed_p75_dfhqn = smooth(p75_dfhqn)

smoothed_mu_fhtlr_max = smooth(mu_fhtlr_max)
smoothed_p25_fhtlr_max = smooth(p25_fhtlr_max)
smoothed_p75_fhtlr_max = smooth(p75_fhtlr_max)

smoothed_mu_fhtlr_true = smooth(mu_fhtlr_true)
smoothed_p25_fhtlr_true = smooth(p25_fhtlr_true)
smoothed_p75_fhtlr_true = smooth(p75_fhtlr_true)

smoothed_mu_fhtlr_max_er = smooth(mu_fhtlr_max_er)
smoothed_p25_fhtlr_max_er = smooth(p25_fhtlr_max_er)
smoothed_p75_fhtlr_max_er = smooth(p75_fhtlr_max_er)

smoothed_mu_fhtlr_true_er = smooth(mu_fhtlr_true_er)
smoothed_p25_fhtlr_true_er = smooth(p25_fhtlr_true_er)
smoothed_p75_fhtlr_true_er = smooth(p75_fhtlr_true_er)

smoothed_mu_fhql = smooth(mu_fhql)
smoothed_p25_fhql = smooth(p25_fhql)
smoothed_p75_fhql = smooth(p75_fhql)

smoothed_mu_fhrbf = smooth(mu_fhrbf)
smoothed_p25_fhrbf = smooth(p25_fhrbf)
smoothed_p75_fhrbf = smooth(p75_fhrbf)

# Adjust X-axis length for smoothed series
x_smoothed = np.arange(0, len(smoothed_mu_fhtlr_max) * 10, 10)

# Set up plot style
with plt.style.context(["science", "ieee"]):
    matplotlib.rcParams.update({"font.size": 16})

    fig, ax = plt.subplots(figsize=[5, 3])

    # List of models for plotting
    models = [
        ("DQN", smoothed_mu_dqn3, smoothed_p25_dqn3, smoothed_p75_dqn3, "b", "33K"),
        ("DFHQN", smoothed_mu_dfhqn, smoothed_p25_dfhqn, smoothed_p75_dfhqn, "k", "165K"),
        ("TLR-TD (Ours)", smoothed_mu_fhtlr_max, smoothed_p25_fhtlr_max, smoothed_p75_fhtlr_max, "r", "3.7K"),
        ("TLR-BC (Ours)", smoothed_mu_fhtlr_true, smoothed_p25_fhtlr_true, smoothed_p75_fhtlr_true, "orange", "3.7K"),
        #("BCTD-PI (ER)", smoothed_mu_fhtlr_max_er, smoothed_p25_fhtlr_max_er, smoothed_p75_fhtlr_max_er, "m", "3,750"),
        #("S-BCGD-PI (ER)", smoothed_mu_fhtlr_true_er, smoothed_p25_fhtlr_true_er, smoothed_p75_fhtlr_true_er, "y", "3,750"),
        #("FHQL", smoothed_mu_fhql, smoothed_p25_fhql, smoothed_p75_fhql, "r", "50 M"),
        ("LFHQL", smoothed_mu_fhrbf, smoothed_p25_fhrbf, smoothed_p75_fhrbf, "purple", "30K"),
    ]

    # Plot each model's smoothed median and IQR
    for label, smoothed_median, smoothed_p25, smoothed_p75, color, params in models:
        ax.plot(x_smoothed, smoothed_median, c=color, label=f"{label} {params}", linewidth=1)  # Smoothed Median
        ax.fill_between(x_smoothed, smoothed_p25, smoothed_p75, color=color, alpha=0.1)  # Smoothed IQR shading

    # Formatting
    ax.set_xlim(0, 22000)
    ax.set_ylim(-50, -5)
    ax.grid()
    ax.set_xlabel("Episodes")
    ax.set_ylabel("BatteryEnv - Return")
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=14)

    # Scientific notation for Y-axis
    ax.yaxis.set_major_formatter(ticker.ScalarFormatter(useMathText=True))
    ax.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))

    # Save figure
    plt.savefig("figures/battery_tesis.jpg", dpi=300)

In [49]:
from PIL import Image

# Cargar las dos imágenes
img1 = Image.open('figures/wireless.jpg')
img2 = Image.open('figures/battery.jpg')

# Verificar que las imágenes se hayan cargado correctamente
if img1 is None or img2 is None:
    raise ValueError("Una o ambas imágenes no se pudieron cargar. Verifica las rutas.")

# Redimensionar las imágenes si es necesario para que tengan el mismo tamaño
width = img1.size[0] + img2.size[0]
height = img2.size[1] 
mosaic = Image.new("RGB", (width,height), "WHITE")

# Crear una imagen mosaico
mosaic.paste(img1, (0, img2.size[1]-img1.size[1] ))  # Esquina superior izquierda
mosaic.paste(img2, (img1.size[0], 0)) 
# Guardar la imagen resultante
mosaic.save("figures/experiments.jpg")

In [2]:
from PIL import Image

# Cargar las dos imágenes
img1 = Image.open('figures/wireless.jpg')
img2 = Image.open('figures/battery.jpg')

# Verificar que las imágenes se hayan cargado correctamente
if img1 is None or img2 is None:
    raise ValueError("Una o ambas imágenes no se pudieron cargar. Verifica las rutas.")

# Redimensionar las imágenes si es necesario para que tengan el mismo tamaño
width = img1.size[0]
height = img2.size[1] + img1.size[1] 
mosaic = Image.new("RGB", (width,height), "WHITE")

# Crear una imagen mosaico
mosaic.paste(img1, (0, 0 ))  # Esquina superior izquierda
mosaic.paste(img2, (0, img1.size[1])) 
# Guardar la imagen resultante
mosaic.save("figures/experiments.jpg")

In [40]:
(0.99999)**(20_000/40_000)

0.9999949999875