In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
import gym
from ray.rllib.env.wrappers.atari_wrappers import wrap_deepmind
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from ncps.datasets.torch import AtariCloningDataset
from ncps.torch import CfC, CfCCell
from ncps.wirings.wiringsRevised import WiringRevised
import torch
from torch import nn
from typing import Optional, Union
import ncps
from ncps.torch.lstm import LSTMCell

# 定義卷積塊
class ConvBlock(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(4, 64, 5, padding=2, stride=2)
        self.conv2 = nn.Conv2d(64, 128, 5, padding=2, stride=2)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 128, 5, padding=2, stride=2)
        self.conv4 = nn.Conv2d(128, 256, 5, padding=2, stride=2)
        self.bn4 = nn.BatchNorm2d(256)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = F.relu(self.bn4(self.conv4(x)))
        x = x.mean((-1, -2))  # 全局平均池化
        return x

# 定義卷積CfC模型
class ConvCfC(nn.Module):
    def __init__(self, n_actions, wiring):
        super().__init__()
        self.conv_block = ConvBlock()
        self.rnn = CfC(256, wiring, batch_first=True, proj_size=n_actions)

    def forward(self, x, hx=None):
        batch_size = x.size(0)
        seq_len = x.size(1)
        x = x.view(batch_size * seq_len, *x.shape[2:])
        x = self.conv_block(x)  # 應用卷積塊
        x = x.view(batch_size, seq_len, *x.shape[1:])
        x, hx = self.rnn(x, hx)  # hx是RNN的隱藏狀態
        return x, hx

class WiredCfCCell(nn.Module):
    def __init__(self, input_size, wiring, mode="default"):
        super(WiredCfCCell, self).__init__()
        self.input_size = input_size
        self.wiring = wiring
        self.mode = mode
        self.units = wiring.units

        # 定義其他需要的屬性和層
        self.linear = nn.Linear(input_size, self.units)
        self.output_linear = nn.Linear(self.units, wiring.output_dim if wiring.output_dim else self.units)
        
        # 使用wiring的adjacency_matrix初始化連接
        self.adjacency_matrix = wiring.adjacency_matrix
        self.sensory_adjacency_matrix = wiring.sensory_adjacency_matrix

    def forward(self, x, hx, ts=1.0):
        h = F.relu(self.linear(x) + torch.matmul(hx, self.adjacency_matrix))
        h = F.relu(h + torch.matmul(x, self.sensory_adjacency_matrix))
        if self.mode == "default":
            h = self.output_linear(h)
        return h, h

class CfC(nn.Module):
    def __init__(
        self,
        input_size: Union[int, ncps.wirings.Wiring],
        units,
        proj_size: Optional[int] = None,
        return_sequences: bool = True,
        batch_first: bool = True,
        mixed_memory: bool = False,
        mode: str = "default",
        activation: str = "lecun_tanh",
        backbone_units: Optional[int] = None,
        backbone_layers: Optional[int] = None,
        backbone_dropout: Optional[int] = None,
    ):
        super(CfC, self).__init__()
        self.input_size = input_size
        self.wiring_or_units = units
        self.proj_size = proj_size
        self.batch_first = batch_first
        self.return_sequences = return_sequences

        if isinstance(units, ncps.wirings.Wiring) or isinstance(units, ncps.wirings.wiringsRevised.WiringRevised):
            self.wired_mode = True
            if backbone_units is not None:
                raise ValueError(f"Cannot use backbone_units in wired mode")
            if backbone_layers is not None:
                raise ValueError(f"Cannot use backbone_layers in wired mode")
            if backbone_dropout is not None:
                raise ValueError(f"Cannot use backbone_dropout in wired mode")
            self.wiring = units
            self.state_size = self.wiring.units
            self.output_size = self.wiring.output_dim if self.wiring.output_dim is not None else self.state_size
            self.rnn_cell = WiredCfCCell(
                input_size,
                self.wiring_or_units,
                mode,
            )
        else:
            self.wired_false = True
            backbone_units = 128 if backbone_units is None else backbone_units
            backbone_layers = 1 if backbone_layers is None else backbone_layers
            backbone_dropout = 0.0 if backbone_dropout is None else backbone_dropout
            self.state_size = units
            self.output_size = self.state_size
            self.rnn_cell = CfCCell(
                input_size,
                self.wiring_or_units,
                mode,
                activation,
                backbone_units,
                backbone_layers,
                backbone_dropout,
            )
        self.use_mixed = mixed_memory
        if self.use_mixed:
            self.lstm = LSTMCell(input_size, self.state_size)

        if proj_size is None:
            self.fc = nn.Identity()
        else:
            self.fc = nn.Linear(self.output_size, self.proj_size)

    def forward(self, input, hx=None, timespans=None):
        device = input.device
        is_batched = input.dim() == 3
        batch_dim = 0 if self.batch_first else 1
        seq_dim = 1 if self.batch_first else 0
        if not is_batched:
            input = input.unsqueeze(batch_dim)
            if timespans is not None:
                timespans = timespans.unsqueeze(batch_dim)

        batch_size, seq_len = input.size(batch_dim), input.size(seq_dim)

        if hx is None:
            h_state = torch.zeros((batch_size, self.state_size), device=device)
            c_state = torch.zeros((batch_size, self.state_size), device=device) if self.use_mixed else None
        else:
            if self.use_mixed and isinstance(hx, torch.Tensor):
                raise RuntimeError(
                    "Running a CfC with mixed_memory=True, requires a tuple (h0,c0) to be passed as state (got torch.Tensor instead)"
                )
            h_state, c_state = hx if self.use_mixed else (hx, None)
            if is_batched:
                if h_state.dim() != 2:
                    msg = f"For batched 2-D input, hx and cx should also be 2-D but got ({h_state.dim()}-D) tensor"
                    raise RuntimeError(msg)
            else:
                if h_state.dim() != 1:
                    msg = f"For unbatched 1-D input, hx and cx should also be 1-D but got ({h_state.dim()}-D) tensor"
                    raise RuntimeError(msg)
                h_state = h_state.unsqueeze(0)
                c_state = c_state.unsqueeze(0) if c_state is not None else None

        output_sequence = []
        for t in range(seq_len):
            if self.batch_first:
                inputs = input[:, t]
                ts = 1.0 if timespans is None else timespans[:, t].squeeze()
            else:
                inputs = input[t]
                ts = 1.0 if timespans is None else timespans[t].squeeze()

            if self.use_mixed:
                h_state, c_state = self.lstm(inputs, (h_state, c_state))
            h_out, h_state = self.rnn_cell.forward(inputs, h_state, ts)
            if self.return_sequences:
                output_sequence.append(self.fc(h_out))

        if self.return_sequences:
            stack_dim = 1 if self.batch_first else 0
            readout = torch.stack(output_sequence, dim=stack_dim)
        else:
            readout = self.fc(h_out)
        hx = (h_state, c_state) if self.use_mixed else h_state

        if not is_batched:
            readout = readout.squeeze(batch_dim)
            hx = (h_state[0], c_state[0]) if self.use_mixed else h_state[0]

        return readout, hx

# 初始化環境
env = gym.make("ALE/Breakout-v5")
env = wrap_deepmind(env)

# 準備數據集
train_ds = AtariCloningDataset("breakout", split="train")
val_ds = AtariCloningDataset("breakout", split="val")
trainloader = DataLoader(train_ds, batch_size=32, num_workers=4, shuffle=True)
valloader = DataLoader(val_ds, batch_size=32, num_workers=4)

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
  logger.warn(


In [None]:
# 訓練和評估函數
def train_one_epoch(model, criterion, optimizer, trainloader):
    running_loss = 0.0
    pbar = tqdm(total=len(trainloader))
    model.train()
    device = next(model.parameters()).device  # 獲取模型所在設備
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)  # 將數據移動到與模型相同的設備
        labels = labels.to(device)

        # 梯度清零
        optimizer.zero_grad()
        # 前向傳播 + 反向傳播 + 優化
        outputs, hx = model(inputs)
        labels = labels.view(-1, *labels.shape[2:])  # 展平
        outputs = outputs.reshape(-1, *outputs.shape[2:])  # 展平
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # 打印統計數據
        running_loss += loss.item()
        pbar.set_description(f"loss={running_loss / (i + 1):0.4g}")
        pbar.update(1)
    pbar.close()

def eval(model, valloader):
    losses, accs = [], []
    model.eval()
    device = next(model.parameters()).device  # 獲取模型所在設備
    with torch.no_grad():
        for inputs, labels in valloader:
            inputs = inputs.to(device)  # 將數據移動到與模型相同的設備
            labels = labels.to(device)

            outputs, _ = model(inputs)
            outputs = outputs.reshape(-1, *outputs.shape[2:])  # 展平
            labels = labels.view(-1, *labels.shape[2:])  # 展平
            loss = criterion(outputs, labels)
            acc = (outputs.argmax(-1) == labels).float().mean()
            losses.append(loss.item())
            accs.append(acc.item())
    return np.mean(losses), np.mean(accs)

# 設置設備、模型、損失函數和優化器
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
wiring = WiringRevised(units=256)  # 示例連接
model = ConvCfC(n_actions=env.action_space.n, wiring=wiring).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# 訓練循環
for epoch in range(20):  # 數據集上的多次循環
    train_one_epoch(model, criterion, optimizer, trainloader)
    val_loss, val_acc = eval(model, valloader)
    print(f"Epoch {epoch+1}, val_loss={val_loss:0.4g}, val_acc={100*val_acc:0.2f}%")

# 保存日誌到txt文件
with open('training_log.txt', 'w') as f:
    for epoch in range(20):
        train_one_epoch(model, criterion, optimizer, trainloader)
        val_loss, val_acc = eval(model, valloader)
        log_message = f"Epoch {epoch+1}, val_loss={val_loss:0.4g}, val_acc={100*val_acc:0.2f}%\n"
        print(log_message)
        f.write(log_message)

In [None]:
# 保存模型
torch.save(model.state_dict(), 'model_WiringRevised.pt')
print("Model saved")

In [7]:
def run_closed_loop(model, env, num_episodes=None):
    obs = env.reset()  # 重置环境并获取初始观察值
    if isinstance(obs, tuple):
        obs = obs[0]  # 如果 obs 是 tuple，取第一个元素
    device = next(model.parameters()).device  # 获取模型所在设备
    hx = None  # RNN 的隐藏状态
    returns = []
    total_reward = 0
    with torch.no_grad():
        while True:
            # 打印原始 obs 形状（调试用）
            obs = np.asarray(obs)
            if len(obs.shape) == 3:  # 确保 obs 是 3 维
                obs = np.transpose(obs, [2, 0, 1]).astype(np.float32) / 255.0  # 转置并标准化
            else:
                raise ValueError(f"Unexpected obs shape: {obs.shape}")
            obs = torch.from_numpy(obs).unsqueeze(0).unsqueeze(0).to(device)  # 转换为 Tensor 并移动到设备
            pred, hx = model(obs, hx)  # 模型前向传播，获取预测和新的隐藏状态
            action = pred.squeeze(0).squeeze(0).argmax().item()  # 选择预测动作
            
            result = env.step(action)  # 执行动作
            # 打印结果的回报值（调试用）
            if len(result) == 4:
                obs, r, done, info = result
            else:
                obs, r, done, info, _ = result
            
            if isinstance(obs, tuple):
                obs = obs[0]  # 如果 obs 是 tuple，取第一个元素
            total_reward += r  # 累计回报
            if done:
                obs = env.reset()  # 重置环境
                if isinstance(obs, tuple):
                    obs = obs[0]  # 如果 obs 是 tuple，取第一个元素
                hx = None  # 重置 RNN 的隐藏状态
                returns.append(total_reward)  # 保存总回报
                total_reward = 0
                if num_episodes is not None:
                    num_episodes = num_episodes - 1
                    if num_episodes == 0:
                        return returns  # 返回所有回报


In [11]:
# 确定要使用的设备，优先使用 GPU
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

# 初始化模型并移动到设备上
loaded_model = ConvCfC(n_actions=env.action_space.n, wiring=wiring).to(device)

# 加载预训练模型的状态字典
loaded_model.load_state_dict(torch.load('model_WiringRevised.pt'))
print("Model loaded successfully")  # 模型加载成功

# 确保模型处于评估模式
loaded_model.eval()

# 再次运行闭环测试
returns = run_closed_loop(loaded_model, env, num_episodes=10)
print(f"Mean return {np.mean(returns)} (n={len(returns)})")  # 打印平均回报


Model loaded successfully
Mean return 63.0 (n=10)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
import gym
from ray.rllib.env.wrappers.atari_wrappers import wrap_deepmind
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from ncps.datasets.torch import AtariCloningDataset
from ncps.torch import CfC, CfCCell
from ncps.wirings.wirings import Wiring
import torch
from torch import nn
from typing import Optional, Union
import ncps
from ncps.torch.lstm import LSTMCell 

# ConvBlock definition
class ConvBlock(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(4, 64, 5, padding=2, stride=2)
        self.conv2 = nn.Conv2d(64, 128, 5, padding=2, stride=2)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 128, 5, padding=2, stride=2)
        self.conv4 = nn.Conv2d(128, 256, 5, padding=2, stride=2)
        self.bn4 = nn.BatchNorm2d(256)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = F.relu(self.bn4(self.conv4(x)))
        x = x.mean((-1, -2))  # Global average pooling
        return x

class ConvCfC(nn.Module):
    def __init__(self, n_actions):
        super().__init__()
        self.conv_block = ConvBlock()
        self.rnn = CfC(256, 64, batch_first=True, proj_size=n_actions)

    def forward(self, x, hx=None):
        batch_size = x.size(0)
        seq_len = x.size(1)
        # Merge time and batch dimension into a single one (because the Conv layers require this)
        x = x.view(batch_size * seq_len, *x.shape[2:])
        x = self.conv_block(x)  # apply conv block to merged data
        # Separate time and batch dimension again
        x = x.view(batch_size, seq_len, *x.shape[1:])
        x, hx = self.rnn(x, hx)  # hx is the hidden state of the RNN
        return x, hx

# Initialize environment
env = gym.make("ALE/Breakout-v5")
env = wrap_deepmind(env)

# Prepare dataset
train_ds = AtariCloningDataset("breakout", split="train")
val_ds = AtariCloningDataset("breakout", split="val")
trainloader = DataLoader(train_ds, batch_size=32, num_workers=4, shuffle=True)
valloader = DataLoader(val_ds, batch_size=32, num_workers=4)

# Training and evaluation functions
def train_one_epoch(model, criterion, optimizer, trainloader):
    running_loss = 0.0
    pbar = tqdm(total=len(trainloader))
    model.train()
    device = next(model.parameters()).device  # get device the model is located on
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)  # move data to same device as the model
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs, hx = model(inputs)
        labels = labels.view(-1, *labels.shape[2:])  # flatten
        outputs = outputs.reshape(-1, *outputs.shape[2:])  # flatten
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        pbar.set_description(f"loss={running_loss / (i + 1):0.4g}")
        pbar.update(1)
    pbar.close()

def eval(model, valloader):
    losses, accs = [], []
    model.eval()
    device = next(model.parameters()).device  # get device the model is located on
    with torch.no_grad():
        for inputs, labels in valloader:
            inputs = inputs.to(device)  # move data to same device as the model
            labels = labels.to(device)

            outputs, _ = model(inputs)
            outputs = outputs.reshape(-1, *outputs.shape[2:])  # flatten
            labels = labels.view(-1, *labels.shape[2:])  # flatten
            loss = criterion(outputs, labels)
            acc = (outputs.argmax(-1) == labels).float().mean()
            losses.append(loss.item())
            accs.append(acc.item())
    return np.mean(losses), np.mean(accs)

# Visualize Atari game and play endlessly

def run_closed_loop(model, env, num_episodes=None):
    obs = env.reset()
    device = next(model.parameters()).device
    hx = None  # Hidden state of the RNN
    returns = []
    total_reward = 0
    with torch.no_grad():
        while True:
            # PyTorch require channel first images -> transpose data
            obs = np.transpose(obs, [2, 0, 1]).astype(np.float32) / 255.0
            # add batch and time dimension (with a single element in each)
            obs = torch.from_numpy(obs).unsqueeze(0).unsqueeze(0).to(device)
            pred, hx = model(obs, hx)
            # remove time and batch dimension -> then argmax
            action = pred.squeeze(0).squeeze(0).argmax().item()
            obs, r, done, _ = env.step(action)
            total_reward += r
            if done:
                obs = env.reset()
                hx = None  # Reset hidden state of the RNN
                returns.append(total_reward)
                total_reward = 0
                if num_episodes is not None:
                    # Count down the number of episodes
                    num_episodes = num_episodes - 1
                    if num_episodes == 0:
                        return returns




# Device setup
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
model = ConvCfC(n_actions=env.action_space.n).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# 打開文件以追加模式寫入日誌
log_file = open("training_log_wiring.txt", "a")

for epoch in range(20):  # loop over the dataset multiple times
    
    train_one_epoch(model, criterion, optimizer, trainloader)
    # Evaluate model on the validation set
    val_loss, val_acc = eval(model, valloader)
    
    log_message = f"Epoch {epoch+1}, val_loss={val_loss:0.4g}, val_acc={100*val_acc:0.2f}%\n"

    print(f"Epoch {epoch+1}, val_loss={val_loss:0.4g}, val_acc={100*val_acc:0.2f}%")
    # Apply model in closed-loop environment
    
    # 打印到控制台
    print(log_message)
    
    # 將日誌寫入文件
    log_file.write(log_message)

# 關閉文件
log_file.close()
    


In [1]:
def run_closed_loop(model, env, num_episodes=None):
    obs = env.reset()
    if isinstance(obs, tuple):
        obs = obs[0]
    device = next(model.parameters()).device
    hx = None  # Hidden state of the RNN
    returns = []
    total_reward = 0
    with torch.no_grad():
        while True:
            print(f"Original obs shape: {obs.shape}")  # 打印原始 obs 形狀
            obs = np.asarray(obs)
            if len(obs.shape) == 3:  # 確保 obs 是 3 維
                obs = np.transpose(obs, [2, 0, 1]).astype(np.float32) / 255.0
            else:
                raise ValueError(f"Unexpected obs shape: {obs.shape}")
            obs = torch.from_numpy(obs).unsqueeze(0).unsqueeze(0).to(device)
            pred, hx = model(obs, hx)
            action = pred.squeeze(0).squeeze(0).argmax().item()
            
            result = env.step(action)
            if len(result) == 4:
                obs, r, done, info = result
            else:
                obs, r, done, info, _ = result
            
            if isinstance(obs, tuple):
                obs = obs[0]
            total_reward += r
            if done:
                obs = env.reset()
                if isinstance(obs, tuple):
                    obs = obs[0]
                hx = None  # Reset hidden state of the RNN
                returns.append(total_reward)
                total_reward = 0
                if num_episodes is not None:
                    num_episodes = num_episodes - 1
                    if num_episodes == 0:
                        return returns


returns = run_closed_loop(model, env, num_episodes=20)
print(f"Mean return {np.mean(returns)} (n={len(returns)})")

# 保存模型
torch.save(model.state_dict(), 'cfc_model_wiring.pt')
print("Model saved as cfc_model.pt")


NameError: name 'model' is not defined

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
import gym
from ray.rllib.env.wrappers.atari_wrappers import wrap_deepmind
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from ncps.datasets.torch import AtariCloningDataset
from ncps.torch import CfC, CfCCell
from ncps.wirings.wirings import Wiring
import torch
from torch import nn
from typing import Optional, Union
import ncps
from ncps.torch.lstm import LSTMCell 

# ConvBlock definition
class ConvBlock(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(4, 64, 5, padding=2, stride=2)
        self.conv2 = nn.Conv2d(64, 128, 5, padding=2, stride=2)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 128, 5, padding=2, stride=2)
        self.conv4 = nn.Conv2d(128, 256, 5, padding=2, stride=2)
        self.bn4 = nn.BatchNorm2d(256)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = F.relu(self.bn4(self.conv4(x)))
        x = x.mean((-1, -2))  # Global average pooling
        return x

class ConvCfC(nn.Module):
    def __init__(self, n_actions):
        super().__init__()
        self.conv_block = ConvBlock()
        self.rnn = CfC(256, 64, batch_first=True, proj_size=n_actions)

    def forward(self, x, hx=None):
        batch_size = x.size(0)
        seq_len = x.size(1)
        # Merge time and batch dimension into a single one (because the Conv layers require this)
        x = x.view(batch_size * seq_len, *x.shape[2:])
        x = self.conv_block(x)  # apply conv block to merged data
        # Separate time and batch dimension again
        x = x.view(batch_size, seq_len, *x.shape[1:])
        x, hx = self.rnn(x, hx)  # hx is the hidden state of the RNN
        return x, hx
def run_closed_loop(model, env, num_episodes=None):
    obs = env.reset()
    if isinstance(obs, tuple):
        obs = obs[0]
    device = next(model.parameters()).device
    hx = None  # Hidden state of the RNN
    returns = []
    total_reward = 0
    with torch.no_grad():
        while True:
            #print(f"Original obs shape: {obs.shape}")  # 打印原始 obs 形狀
            obs = np.asarray(obs)
            if len(obs.shape) == 3:  # 確保 obs 是 3 維
                obs = np.transpose(obs, [2, 0, 1]).astype(np.float32) / 255.0
            else:
                raise ValueError(f"Unexpected obs shape: {obs.shape}")
            obs = torch.from_numpy(obs).unsqueeze(0).unsqueeze(0).to(device)
            pred, hx = model(obs, hx)
            action = pred.squeeze(0).squeeze(0).argmax().item()
            
            result = env.step(action)
            print(result[1])
            if len(result) == 4:
                obs, r, done, info = result
            else:
                obs, r, done, info, _ = result
            
            if isinstance(obs, tuple):
                obs = obs[0]
            total_reward += r
            if done:
                obs = env.reset()
                if isinstance(obs, tuple):
                    obs = obs[0]
                hx = None  # Reset hidden state of the RNN
                returns.append(total_reward)
                total_reward = 0
                if num_episodes is not None:
                    num_episodes = num_episodes - 1
                    if num_episodes == 0:
                        return returns


device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
loaded_model = ConvCfC(n_actions=env.action_space.n,).to(device)
loaded_model.load_state_dict(torch.load('cfc_model_wiring.pt'))
print("Model loaded from cfc_model.pt")

# 確保模型處於評估模式
loaded_model.eval()

# 再次運行閉環測試
returns = run_closed_loop(loaded_model, env, num_episodes=500)
print(f"Mean return {returns} (n={len(returns)})")
print(np.mean(returns))

Model loaded from cfc_model.pt


  if not isinstance(terminated, (bool, np.bool8)):


0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
