In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
import ControlSystem
from QNetwork import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "qnetwork_11act_boltzmann"
episodes = 1300
samples_per_target = 200
targets_per_episode = 5
batch_size = 16
batch_per_episode = 100

action_size = 11
agent = QLearningAgentBoltzmann(action_size=action_size, n=100, gamma=0.95,
                        temperature=0.03, temperature_decay=0.99998, temperature_min=0.003,
                        learning_rate=0.00005 * batch_size, warmup_steps=3000, learning_rate_decay=0.99997,
                        stored_episodes=15, samples_per_episode=targets_per_episode * samples_per_target)

best_reward = -1000000
for e in range(episodes):
    # Reset the environment (PT1 system)
    pt1_with_delay = ControlSystem.PT1(K=2, T=5, delta_t=0.1, delay=2)
    total_reward = 0
    state = torch.tensor([pt1_with_delay.y_prev], device=device)

    for j in range(targets_per_episode):
        target = torch.rand((1,), device=device) * 2
        for k in range(samples_per_target):
            action = agent.act(state, target)

            control_signal = action / (action_size - 1)

            output = pt1_with_delay.calculate(control_signal)
            next_state = output

            # Calculate reward (negative of the absolute error)
            reward = -torch.abs(next_state - target)
            total_reward += reward

            # Store the experience
            agent.remember(state, action, reward, target)

            state = next_state

    # Replay experience
    for _ in range(batch_per_episode):
        agent.replay(batch_size)

    if e % 50 == 0:
        print(f"Episode {e + 1}/{episodes}, Total Reward: {total_reward}")
        agent.save(f"{model_name}_{e}.pth")

agent.save(f"{model_name}.pth")
print("End")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
import ControlSystem
from QNetwork import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "qnetwork_11act"
episodes = 2000
samples_per_target = 200
targets_per_episode = 5
batch_size = 16
batch_per_episode = 100

action_size = 11
agent = QLearningAgent(action_size=action_size, n=100, gamma=0.95,
                        epsilon=0.3, epsilon_decay=0.99999, epsilon_min=0.08,
                        learning_rate=0.00005 * batch_size, warmup_steps=3000, learning_rate_decay=0.99998,
                        stored_episodes=15, samples_per_episode=targets_per_episode * samples_per_target)


best_reward = -1000000
for e in range(episodes):
    # Reset the environment (PT1 system)
    pt1_with_delay = ControlSystem.PT1(K=2, T=5, delta_t=0.1, delay=2)
    total_reward = 0
    state = torch.tensor([pt1_with_delay.y_prev], device=device)

    for j in range(targets_per_episode):
        target = torch.rand((1,), device=device) * 2
        for k in range(samples_per_target):
            action = agent.act(state, target)

            control_signal = action / (action_size - 1)

            output = pt1_with_delay.calculate(control_signal)
            next_state = output

            # Calculate reward (negative of the absolute error)
            reward = -torch.abs(next_state - target)
            total_reward += reward

            # Store the experience
            agent.remember(state, action, reward, target)

            state = next_state

    # Replay experience
    for _ in range(batch_per_episode):
        agent.replay(batch_size)

    if e % 50 == 0:
        print(f"Episode {e + 1}/{episodes}, Total Reward: {total_reward}")
        agent.save(f"{model_name}_{e}.pth")

agent.save(f"{model_name}.pth")
print("End")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
import ControlSystem
from QNetwork import *
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model_name = "qnetwork_soft"

episodes = 1300
samples_per_episode = 200
targets_per_episode = 4
batch_size = 16
batch_per_episode = 100

agent = QLearningAgentSoft(action_search_batch=32,
                                 gamma=0.988,
                                 temperature=0.016,
                                 average_weight=0.5,
                                 learning_rate=0.00003 * batch_size, warmup_steps=1000,
                                 learning_rate_decay=0.99998)

for e in range(episodes):
    # Reset the environment (PT1 system)
    pt1_with_delay = ControlSystem.PT1(K=2, T=5, delta_t=0.1, delay=2)
    total_reward = 0
    state = torch.tensor([pt1_with_delay.y_prev], device=device)

    for j in range(targets_per_episode):
        target = torch.tensor([random.uniform(0, 2)], device=device)
        for k in range(samples_per_episode):
            control_signal, u, s = agent.act(state, target)
            next_state = pt1_with_delay.calculate(control_signal)

            # Calculate reward (negative of the absolute error)
            reward = -torch.abs(next_state - target)
            total_reward += reward

            # Store the experience
            agent.remember(reward, next_state, target)

            state = next_state

    # Replay experience
    for _ in range(batch_per_episode):
        agent.replay(batch_size)

    if e % 50 == 0:
        print(f"Episode {e + 1}/{episodes}, Total Reward: {total_reward}")
        agent.save(f"{model_name}_{e}.pth")

agent.save(f"{model_name}.pth")
print("End")

In [None]:
from pathlib import Path
import matplotlib.pyplot as plt
import torch
import numpy as np
import ControlSystem
from QNetwork import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

action_size = 11

K = 2
T = 5
delay = 2
delta_t = 0.1

simulation_time = 30
steps = int(simulation_time / delta_t)
time_values = torch.linspace(0.0, simulation_time, steps)
setpoints = [0.2, 0.5, 0.7, 1.2, 1.5, 1.9]
rows, cols = 3, 2

agent = QLearningAgent(action_size=action_size, n=100, gamma=0.988,
                        epsilon=0.00, epsilon_decay=0.99999, epsilon_min=0.08,
                        #temperature=0.02, temperature_decay=0.99999, temperature_min=0.08,
                        learning_rate=0.0, warmup_steps=0, learning_rate_decay=0.0,
                        stored_episodes=1, samples_per_episode=steps)


best = ""
best_error = torch.tensor((10000.0,), device=device)
for path in Path("./").glob("qnetwork_11act*.pth"):
    overall_mean_error = 0.0
    agent.load(path)
    for setpoint in setpoints:
        pt1_with_delay = ControlSystem.PT1(K=K, T=T, delta_t=delta_t, delay=delay)
        state = pt1_with_delay.y_prev
        setpoint = torch.tensor([setpoint], device=device)
        output_values = torch.zeros([steps], device=device)
        control_values = torch.zeros([steps], device=device)

        for step in range(steps):
            action = agent.act(state, setpoint)

            control_signal = action / (action_size - 1)

            output = pt1_with_delay.calculate(control_signal)
            next_state = output

            reward = -torch.abs(next_state - setpoint)
            agent.remember(state, action, reward, setpoint)
            output_values[step] = output
            control_values[step] = control_signal

            state = next_state

        mean_error = torch.mean(torch.abs(output_values - setpoint))
        overall_mean_error += mean_error / len(setpoints)

    if overall_mean_error < best_error:
        best = path
        best_error = overall_mean_error

print(best)