# Benchmark/evaluate trained agents

## run-20220812_151220-14657fa9

Random samples

In [45]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)



# Make the environment (test mode)
env_name = "FuselageActuators-v12"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, "Train", False) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220812_151220-14657fa9/files/agent_1024000steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
n_actuators = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Episode", i, "reward:", episodeReward)
    initErrors.append(info[0]["initError"])
    finalErrors.append(info[0]["Error"])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

****************************** Episode: 1 ******************************
Initial shape from SolutionInputDP24
Target shape from SolutionInputDP26
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Reconnect failed - remote exit again
Wait and try to reconnect again - attempt 1
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 2
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready
Initial shape from SolutionInputDP15
Target shape from SolutionInputDP20
Episode 0 reward: [0.95151471]
****************************** Episode: 2 ******************************
Initial shape from SolutionInputDP04
Target shape from SolutionInputDP12
Initial shape from SolutionInputDP21
Target shape from SolutionInputDP34
Episode 1 reward: [0.82111646]
****************************** Episode: 3 ********

Previous results

In [51]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220812_151220-14657fa9/files/agent_1024000steps.pt", map_location=device))

# Initialize variables
initErrors = []
finalErrors = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v12"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Episode", i, "reward:", episodeReward)
    initErrors.append(info[0]["initError"])
    finalErrors.append(info[0]["Error"])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

****************************** File: SolutionInputDP41.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Reconnect failed - remote exit again
Wait and try to reconnect again - attempt 1
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 2
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready
Episode 99 reward: [0.91860027]
****************************** File: SolutionInputDP42.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Reconnect failed - remote exit again
Wait and try to reconnect again - attempt 1
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnecte

## run-20220822_015612-3cslv0ll

In [4]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)



# Make the environment (test mode)
env_name = "FuselageActuators-v12"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, "Train", False) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220822_015612-3cslv0ll/files/agent_16382976steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
n_actuators = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Episode", i, "reward:", episodeReward)
    initErrors.append(info["initError"][0])
    finalErrors.append(info["Error"][0])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

  deprecation(
  deprecation(
  deprecation(


****************************** Episode: 1 ******************************


  logger.warn(
  logger.warn(
  logger.warn(


Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Initial shape from SolutionInputDP39
Target shape from SolutionInputDP16


  logger.deprecation(


Initial shape from SolutionInputDP35
Target shape from SolutionInputDP30
Episode 0 reward: [0.95935654]
****************************** Episode: 2 ******************************
Initial shape from SolutionInputDP28
Target shape from SolutionInputDP33
Initial shape from SolutionInputDP04
Target shape from SolutionInputDP12
Episode 1 reward: [0.98011491]
****************************** Episode: 3 ******************************
Initial shape from SolutionInputDP36
Target shape from SolutionInputDP14
Initial shape from SolutionInputDP13
Target shape from SolutionInputDP38
Episode 2 reward: [0.95133899]
****************************** Episode: 4 ******************************
Initial shape from SolutionInputDP35
Target shape from SolutionInputDP37
Initial shape from SolutionInputDP08
Target shape from SolutionInputDP09
Episode 3 reward: [0.98073755]
****************************** Episode: 5 ******************************
Initial shape from SolutionInputDP02
Target shape from SolutionInputDP05


Previous results

In [6]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220822_015612-3cslv0ll/files/agent_16382976steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v12"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Episode", i, "reward:", episodeReward)
    initErrors.append(info["initError"][0])
    finalErrors.append(info["Error"][0])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))

  deprecation(
  deprecation(
  deprecation(


****************************** File: SolutionInputDP41.inp ******************************


  logger.warn(
  logger.warn(
  logger.warn(


Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 6 processors


  logger.deprecation(


Episode 99 reward: [0.9478183]
****************************** File: SolutionInputDP42.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 6 processors
Episode 99 reward: [0.88967005]
****************************** File: SolutionInputDP43.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 6 processors
Episode 99 reward: [0.96434363]
****************************** File: SolutionInputDP44.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Episode 99 reward: [0.97140936]
****************************** File: SolutionInputDP45.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try

## run-20220823_195737-15u36mp0

In [1]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)



# Make the environment (test mode)
env_name = "FuselageActuators-v12"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 8, "Train", False) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220823_195737-15u36mp0/files/agent_16382976steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
n_actuators = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Episode", i, "reward:", episodeReward)
    initErrors.append(info["initError"][0])
    finalErrors.append(info["Error"][0])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

  """Start MAPDL locally in gRPC mode.


LicenseServerConnectionError: 2022/08/24 10:06:38    DENIED              ansys                           22.1 (2021.1108)             1/0/0/0                 1/1/1/1   19968:FEAT_ANSYS:tlutz@DESKTOP-H0E6H7J:winx64              6892:192.168.0.204  
		Request name ansys does not exist in the licensing pool.
		Cannot connect to license server system.
		 The license server manager (lmgrd) has not been started yet,
		 the wrong port@host or license file is being used, or the
		 port or hostname in the license file has been changed.
		Feature:       ansys
		Server name:   198.82.162.15
		License path:  1055@ansys.software.vt.edu;
		FlexNet Licensing error:-15,10032

2022/08/24 10:06:38    DENIED              FEAT_ANSYS                      22.1 (2021.1108)             1/0/0/0                 1/1/1/1   19968:FEAT_ANSYS:tlutz@DESKTOP-H0E6H7J:winx64              6892:192.168.0.204  
		Failover feature 'Ansys Mechanical Enterprise' is not available.
		Request name ansys does not exist in the licensing pool.
		Cannot connect to license server system.
		 The license server manager (lmgrd) has not been started yet,
		 the wrong port@host or license file is being used, or the
		 port or hostname in the license file has been changed.
		Feature:       ansys
		Server name:   198.82.162.15
		License path:  1055@ansys.software.vt.edu;
		FlexNet Licensing error:-15,10032

Previous results

In [1]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v12"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
envs = gym.vector.SyncVectorEnv(
    [make_env(env_name, 0 + i, i, 8, file1, file2, False) for i in range(1)]
)

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 8).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220823_195737-15u36mp0/files/agent_16382976steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v12"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("File:", file1, "reward:", episodeReward)
    initErrors.append(info["initError"][0])
    finalErrors.append(info["Error"][0])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))

  deprecation(
  deprecation(
  deprecation(
Exception ignored in: <function VectorEnv.__del__ at 0x000002333F676170>
Traceback (most recent call last):
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\vector\vector_env.py", line 294, in __del__
    self.close()
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\vector\vector_env.py", line 221, in close
    self.close_extras(**kwargs)
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in close_extras
    [env.close() for env in self.envs]
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in <listcomp>
    [env.close() for env in self.envs]
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\core.py", line 435, in close
    return self.env.close()
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\core.py", line 435, in close
    return self.env.close()
  File "c:\Users\TL\Python\venv\pyANSYS\li

****************************** File: SolutionInputDP41.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Reconnect failed - remote exit again
Wait and try to reconnect again - attempt 1
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 6 processors
Sucessfully reconnected to Ansys on attempt 2
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready


  logger.deprecation(


File: C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/SolutionInputDP41.inp reward: [0.92973315]
****************************** File: SolutionInputDP42.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 6 processors
Sucessfully reconnected to Ansys on attempt 1
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready
File: C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/SolutionInputDP42.inp reward: [0.9052981]
****************************** File: SolutionInputDP43.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Product:       

## run-20220823_110710-191kxpmg

In [None]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)



# Make the environment (test mode)
env_name = "FuselageActuators-v12"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 12, "Train", False) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 12).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220823_110710-191kxpmg/files/agent_16382976steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
n_actuators = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Episode", i, "reward:", episodeReward)
    initErrors.append(info["initError"][0])
    finalErrors.append(info["Error"][0])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

  """Start MAPDL locally in gRPC mode.


LicenseServerConnectionError: 2022/08/24 10:06:38    DENIED              ansys                           22.1 (2021.1108)             1/0/0/0                 1/1/1/1   19968:FEAT_ANSYS:tlutz@DESKTOP-H0E6H7J:winx64              6892:192.168.0.204  
		Request name ansys does not exist in the licensing pool.
		Cannot connect to license server system.
		 The license server manager (lmgrd) has not been started yet,
		 the wrong port@host or license file is being used, or the
		 port or hostname in the license file has been changed.
		Feature:       ansys
		Server name:   198.82.162.15
		License path:  1055@ansys.software.vt.edu;
		FlexNet Licensing error:-15,10032

2022/08/24 10:06:38    DENIED              FEAT_ANSYS                      22.1 (2021.1108)             1/0/0/0                 1/1/1/1   19968:FEAT_ANSYS:tlutz@DESKTOP-H0E6H7J:winx64              6892:192.168.0.204  
		Failover feature 'Ansys Mechanical Enterprise' is not available.
		Request name ansys does not exist in the licensing pool.
		Cannot connect to license server system.
		 The license server manager (lmgrd) has not been started yet,
		 the wrong port@host or license file is being used, or the
		 port or hostname in the license file has been changed.
		Feature:       ansys
		Server name:   198.82.162.15
		License path:  1055@ansys.software.vt.edu;
		FlexNet Licensing error:-15,10032

Previous results

In [None]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v12"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
envs = gym.vector.SyncVectorEnv(
    [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
)

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 12).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220823_110710-191kxpmg/files/agent_16382976steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v12"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 12, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("File:", file1, "reward:", episodeReward)
    initErrors.append(info["initError"][0])
    finalErrors.append(info["Error"][0])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))

  deprecation(
  deprecation(
  deprecation(
Exception ignored in: <function VectorEnv.__del__ at 0x00000236F1C020E0>
Traceback (most recent call last):
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\vector\vector_env.py", line 294, in __del__
    self.close()
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\vector\vector_env.py", line 221, in close
    self.close_extras(**kwargs)
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in close_extras
    [env.close() for env in self.envs]
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in <listcomp>
    [env.close() for env in self.envs]
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\core.py", line 435, in close
    return self.env.close()
  File "c:\Users\TL\Python\venv\pyANSYS\lib\site-packages\gym\core.py", line 435, in close
    return self.env.close()
  File "c:\Users\TL\Python\venv\pyANSYS\li

****************************** File: SolutionInputDP41.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 6 processors
Sucessfully reconnected to Ansys on attempt 1
Try running again
Simulation setup complete
Applied forces
Solve finished


## run-20220906_113755-i6dt2th4

Surrogate training data

In [2]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)



# Make the environment (test mode)
env_name = "FuselageActuators-v12"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, "Surrogate", False) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220906_113755-i6dt2th4/files/agent_16382976steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Episode", i, "reward:", episodeReward)
    initErrors.append(info["initError"][0])
    finalErrors.append(info["Error"][0])
    maxForces.append(np.max(np.abs(envs[0].forces)))
    maxDevs.append(info["maxDev"][0])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

FileNotFoundError: [Errno 2] No such file or directory: './wandb/run-20220906_113755-i6dt2th4/files/agent_16382976steps.pt'

Previous results (benchmark!)

In [1]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action_mean, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v12"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
envs = gym.vector.SyncVectorEnv(
    [make_env(env_name, 0 + i, i, 8, file1, file2, False) for i in range(1)]
)

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20220906_113755-i6dt2th4/files/agent_16382976steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
maxDevs = []
maxForces = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v12"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("File:", file1, "reward:", episodeReward)
    initErrors.append(info["initError"][0])
    finalErrors.append(info["Error"][0])
    maxDevs.append(info["maxDev"][0])
    maxForces.append(np.max(np.abs(info["Forces"][0])))
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))

  logger.warn(


Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors


  deprecation(
  deprecation(
  deprecation(


****************************** File: SolutionInputDP41.inp ******************************


  logger.warn(
  logger.warn(
  logger.warn(


Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 1
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready


  logger.deprecation(


File: C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/SolutionInputDP41.inp reward: [0.95084065]
****************************** File: SolutionInputDP42.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 6 processors
File: C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/SolutionInputDP42.inp reward: [0.91269352]
****************************** File: SolutionInputDP43.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Research
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 6 processors
File: C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/SolutionInputDP43.inp reward: [0.96105648]
******

In [None]:
print(np.mean(finalErrors[0:-1]))

## run 1rh99fva

Previous results (benchmark!)

In [1]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action_mean, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v12"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
envs = gym.vector.SyncVectorEnv(
    [make_env(env_name, 0 + i, i, 8, file1, file2, False) for i in range(1)]
)

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-202210xx-1rh99fva/files/agent_16383936steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
maxDevs = []
maxForces = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files[:-1]:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v12"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Reward:", episodeReward)
    print("Final Error:", info[0]["Error"])
    initErrors.append(info[0]["initError"])
    finalErrors.append(info[0]["Error"])
    maxDevs.append(info[0]["maxDev"])
    maxForces.append(np.max(np.abs(info[0]["Forces"])))
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))

KeyboardInterrupt: 

**********************************************************
Initial error (mean) = 0.369
Initial error (median) = 0.373
Initial error (stdev) = 0.193
Initial error (max) = 0.735
**********************************************************
Final error (mean) = 0.014
Final error (median) = 0.014
Final error (stdev) = 0.004
Final error (max) = 0.026
**********************************************************
Max Deviation (mean) = 0.038
Max Deviation (median) = 0.035
Max Deviation (stdev) = 0.010
Max Deviation (max) = 0.070
**********************************************************
Max Force (mean) = 192.650
Max Force (median) = 195.592
Max Force (stdev) = 62.441
Max Force (max) = 315.294
**********************************************************
Episode Rewards (mean) = 0.946
Episode Rewards (median) = 0.961
Episode Rewards (stdev) = 0.040


In [8]:
print(finalErrors)

[0.0298507100599101, 0.017254446530365584, 0.021615704528441883, 0.012942392856088397, 0.012800510961419985, 0.014857135124434255, 0.01998216653345925, 0.01642131162053982, 0.02601855027292011, 0.018140539986531385, 0.019549890548037456, 0.015642428588348228, 0.023637804940378884, 0.022819586410309598, 0.03136077142805789, 0.01513990791904582, 0.025504290414473225, 0.02225182094722002, 0.020857447868979428, 0.01606911742928942, 0.027204796655699696, 0.03848169485582721, 0.020343618576019556, 0.02110250492627685, 0.022595118497838622, 0.02257560536889411, 0.011467658426140413, 0.008720533973174092, 0.01513990791904582, 0.015944365237115524, 0.016638273395600433, 0.013802727491867976, 0.0728753895407129, 0.020047359780752224, 0.01832969143647896, 0.011862521635073658, 0.01712280019619034, 0.019145937629995308, 0.0186856890498836, 0.016603520799107464, 0.008433037214234615, 0.014971371137607305, 0.026981450051999062, 0.026491180321745075, 0.01841226591987247, 0.012875062665836644, 0.01520

Surrogate training data

In [26]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)



# Make the environment (test mode)
env_name = "FuselageActuators-v12"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, "Surrogate", False) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-202210xx-1rh99fva/files/agent_16383936steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files

for i in range(100):
    # print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    minError=10
    
    for i in range(100):
        with torch.no_grad():
            action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        # print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
    # print('_'*30)
    # print("Episodic Reward:", episodeReward)
    # print("Final Error:", min(errors))
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

  logger.warn(
  deprecation(
  deprecation(
  deprecation(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


**********************************************************
Initial error (mean) = 0.733
Initial error (median) = 0.668
Initial error (stdev) = 0.481
Initial error (max) = 2.161
**********************************************************
Final error (mean) = 0.008
Final error (median) = 0.008
Final error (stdev) = 0.001
Final error (max) = 0.011
**********************************************************
Max Deviation (mean) = 0.020
Max Deviation (median) = 0.020
Max Deviation (stdev) = 0.006
Max Deviation (max) = 0.034
**********************************************************
Max Force (mean) = 44.503
Max Force (median) = 0.277
Max Force (stdev) = 70.952
Max Force (max) = 414.716
**********************************************************
Episode Rewards (mean) = 95.397
Episode Rewards (median) = 95.410
Episode Rewards (stdev) = 0.442


**********************************************************
Initial error (mean) = 0.751
Initial error (median) = 0.729
Initial error (stdev) = 0.486
Initial error (max) = 2.120
**********************************************************
Final error (mean) = 0.021
Final error (median) = 0.020
Final error (stdev) = 0.011
Final error (max) = 0.073
**********************************************************
Max Deviation (mean) = 0.051
Max Deviation (median) = 0.047
Max Deviation (stdev) = 0.021
Max Deviation (max) = 0.151
**********************************************************
Max Force (mean) = 291.501
Max Force (median) = 281.630
Max Force (stdev) = 93.408
Max Force (max) = 494.694
**********************************************************
Episode Rewards (mean) = 0.950
Episode Rewards (median) = 0.970
Episode Rewards (stdev) = 0.059

## run 1rw7aiak

Previous results (benchmark!)

In [2]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action_mean, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v22"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
envs = gym.vector.SyncVectorEnv(
    [make_env(env_name, 0 + i, i, 8, file1, file2, False) for i in range(1)]
)

# Create agent
device = torch.device("cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230202_201959-1rw7aiak/files/agent_16383936steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
maxDevs = []
maxForces = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files[:-1]:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v22"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Reward:", episodeReward)
    print("Final Error:", info["Error"])
    initErrors.append(info["initError"])
    finalErrors.append(info["Error"])
    maxDevs.append(info["maxDev"])
    maxForces.append(np.max(np.abs(info["Forces"])))
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))

Exception ignored in: <function VectorEnv.__del__ at 0x0000027DE482A8C0>
Traceback (most recent call last):
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\vector_env.py", line 294, in __del__
    self.close()
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\vector_env.py", line 221, in close
    self.close_extras(**kwargs)
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in close_extras
    [env.close() for env in self.envs]
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in <listcomp>
    [env.close() for env in self.envs]
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\core.py", line 435, in close
    return self.env.close()
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\core.py", line 435, in close
    ret

Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
****************************** File: SolutionInputDP41.inp ******************************
Reward: [0.96401438]
Final Error: [0.01075947]
****************************** File: SolutionInputDP42.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Reward: [0.93329732]
Final Error: [0.01060037]
****************************** File: SolutionInputDP43.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Reward: [0.97416241]
Final Error: [0.0145414]
****************************** File: SolutionInputDP44.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Teac

**********************************************************
Initial error (mean) = 0.467

Initial error (median) = 0.482

Initial error (stdev) = 0.251

Initial error (max) = 0.923

**********************************************************
Final error (mean) = 0.013

Final error (median) = 0.011

Final error (stdev) = 0.005

Final error (max) = 0.028

**********************************************************
Max Deviation (mean) = 0.042

Max Deviation (median) = 0.041

Max Deviation (stdev) = 0.011

Max Deviation (max) = 0.073
**********************************************************
Max Force (mean) = 39.613

Max Force (median) = 0.296

Max Force (stdev) = 64.003

Max Force (max) = 355.364

**********************************************************
Episode Rewards (mean) = 0.963

Episode Rewards (median) = 0.973

Episode Rewards (stdev) = 0.027


### Multiple refinements for benchmarking

Test samples from DOE

In [6]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
    #env = gym.wrappers.RecordEpisodeStatistics(env)
    return env


def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[idx[-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd#.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action_mean, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v22"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
# envs = gym.vector.SyncVectorEnv(
#     [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
# )

envs = make_env(env_name, 0, 0, 10, file1, file2, False)

# Create agent
device = torch.device("cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230202_201959-1rw7aiak/files/agent_16383936steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
maxDevs = []
maxForces = []
rewards = []
bestForces = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files[:-1]:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    # env_name = "FuselageActuators-v22"
    # envs = gym.vector.SyncVectorEnv(
    #     [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    # )

    envs = make_env(env_name, 0, 0, 10, file1, file2, False)

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    # initErrors.append(envs.error_initial)
    episodeReward = 0
    minError=10
    
    for j in range(10):
        with torch.no_grad():
            action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
            bestForce = info["Forces"]
    print('_'*30)
    print("Best Error:", minError)
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    bestForces.append(bestForce)

    envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Best Forces:", bestForces)


Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors


  deprecation(
  deprecation(


****************************** File: SolutionInputDP41.inp ******************************
Resetting the environment


  logger.warn(
  logger.warn(
  logger.warn(


Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Reconnect failed - remote exit again
Wait and try to reconnect again - attempt 1
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 2
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready
[   0.           77.64316     -12.641788   -164.10995       0.
    0.           -0.35935456    0.            0.            0.
    0.38042971    0.            0.            4.231175    -38.10283
   31.882853    -47.72741    -128.57443   ]


  logger.deprecation(


Intermediate Error: 0.010759471368781668
[   0.           87.329285      0.82870483 -179.8241        0.
    0.           -0.5641745     0.            0.            0.
    0.6437365     0.            0.          -11.063778    -35.56475
   10.108953    -15.6756935  -125.70817   ]
Intermediate Error: 0.01002723042450995
[   0.           85.818436      8.455743   -186.74683       0.
    0.           -0.7507243     0.            0.            0.
    0.94481623    0.            0.          -10.627416    -26.231667
  -11.358032      3.6207485  -132.32411   ]
Intermediate Error: 0.010042940233643364
[   0.          82.090195    16.287361  -191.31284      0.
    0.          -0.9349557    0.           0.           0.
    1.2459487    0.           0.         -10.11258    -17.59326
  -33.853115    23.129026  -138.23785  ]
Intermediate Error: 0.010053380033033023
[   0.          78.1914      24.045918  -195.74991      0.
    0.          -1.1204238    0.           0.           0.
    1.5453703    0.

In [4]:
np.count_nonzero(bestForces, axis=1)

array([10, 11, 12, 11, 11, 11, 11, 11, 11, 12, 11, 13, 13, 11, 13, 13, 12,
       11, 12], dtype=int64)

Initial error (mean) = 0.467
Initial error (median) = 0.482
Initial error (stdev) = 0.251
Initial error (max) = 0.923
**********************************************************
Final error (mean) = 0.011
Final error (median) = 0.011
Final error (stdev) = 0.002
Final error (max) = 0.017
**********************************************************
Max Deviation (mean) = 0.039
Max Deviation (median) = 0.038
Max Deviation (stdev) = 0.006
Max Deviation (max) = 0.047
**********************************************************
Max Force (mean) = 41.837
Max Force (median) = 0.370
Max Force (stdev) = 68.257
Max Force (max) = 356.915

Surrogate eval

In [15]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions):
    env = gym.make(env_id, n_actuators=n_actions, mode="Surrogate", seed=seed, port=50056+idx)
    #env = gym.wrappers.RecordEpisodeStatistics(env)
    return env


def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[idx[-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd#.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action_mean, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment (test mode)
env_name = "FuselageActuators-v22"
run_name = "dummy"
envs = make_env(env_name, 0 , 0, 10)


# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230202_201959-1rw7aiak/files/agent_16383936steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    minError=10
    
    for j in range(100):
        with torch.no_grad():
            action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        # print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
    print('_'*30)
    print("Initial Error", info["initError"])
    print("Best Error:", minError)
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

  deprecation(
  deprecation(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(


****************************** Episode: 1 ******************************
______________________________
Initial Error 1.409499695489975
Best Error: 0.022528682478405358
****************************** Episode: 2 ******************************
______________________________
Initial Error 0.6146374866299307
Best Error: 0.014846525888562013
****************************** Episode: 3 ******************************
______________________________
Initial Error 0.04961305486189449
Best Error: 0.012661854768269967
****************************** Episode: 4 ******************************
______________________________
Initial Error 1.810665534719634
Best Error: 0.01796157227331766
****************************** Episode: 5 ******************************
______________________________
Initial Error 0.6504018599821338
Best Error: 0.022893587444894132
****************************** Episode: 6 ******************************
______________________________
Initial Error 0.5765460732187481
Best Error: 0.0

Initial error (mean) = 0.915
Initial error (median) = 0.819
Initial error (stdev) = 0.599
Initial error (max) = 2.665
**********************************************************
Final error (mean) = 0.016
Final error (median) = 0.015
Final error (stdev) = 0.005
Final error (max) = 0.027
**********************************************************
Max Deviation (mean) = 0.055
Max Deviation (median) = 0.052
Max Deviation (stdev) = 0.018
Max Deviation (max) = 0.098
**********************************************************
Max Force (mean) = 424.104
Max Force (median) = 348.344
Max Force (stdev) = 236.795
Max Force (max) = 1220.125
**********************************************************
Episode Rewards (mean) = 0.187
Episode Rewards (median) = 0.298
Episode Rewards (stdev) = 0.490

In [3]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action_mean, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)



# Make the environment (test mode)
env_name = "FuselageActuators-v22"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, "Surrogate", False) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230202_201959-1rw7aiak/files/agent_16383936steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files
for i in range(100):
    # print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    minError=10
    
    for j in range(1):
        with torch.no_grad():
            action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        # print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
    # print('_'*30)
    # print("Episodic Reward:", episodeReward)
    # print("Final Error:", min(errors))
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

  logger.deprecation(


**********************************************************
Initial error (mean) = 0.936
Initial error (median) = 0.907
Initial error (stdev) = 0.622
Initial error (max) = 2.665
**********************************************************
Final error (mean) = 0.018
Final error (median) = 0.016
Final error (stdev) = 0.007
Final error (max) = 0.062
**********************************************************
Max Deviation (mean) = 0.056
Max Deviation (median) = 0.053
Max Deviation (stdev) = 0.020
Max Deviation (max) = 0.157
**********************************************************
Max Force (mean) = 61.372
Max Force (median) = 0.376
Max Force (stdev) = 98.788
Max Force (max) = 587.291
**********************************************************
Episode Rewards (mean) = 0.962
Episode Rewards (median) = 0.983
Episode Rewards (stdev) = 0.051


## run di4gu1ls (rpo on v22)

In [2]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        idx = torch.argsort(abs(x))
        lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action_mean, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v22"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
envs = gym.vector.SyncVectorEnv(
    [make_env(env_name, 0 + i, i, 8, file1, file2, False) for i in range(1)]
)

# Create agent
device = torch.device("cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230203_112925-di4gu1ls/files/agent_1023936steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
maxDevs = []
maxForces = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files[:-1]:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v22"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Reward:", episodeReward)
    print("Final Error:", info["Error"])
    initErrors.append(info["initError"])
    finalErrors.append(info["Error"])
    maxDevs.append(info["maxDev"])
    maxForces.append(np.max(np.abs(info["Forces"])))
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))

Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors


  deprecation(
  deprecation(
  deprecation(


****************************** File: SolutionInputDP41.inp ******************************


  logger.warn(
  logger.warn(
  logger.warn(


Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors


  logger.deprecation(


Reward: [0.79062737]
Final Error: [0.06260109]
****************************** File: SolutionInputDP42.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Reward: [0.60651544]
Final Error: [0.06253247]
****************************** File: SolutionInputDP43.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Reward: [0.8572601]
Final Error: [0.08033405]
****************************** File: SolutionInputDP44.inp ******************************
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Reward: [0.74844222]
Final Error: [0.14498156]
****************************** File: SolutionInputDP45.inp ******************************


KeyboardInterrupt: 

Not very good

## run-20230203_132128-2rw3jtp8 (rpo on v22)

In [6]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs, n_actions):
        self.n_actions = n_actions
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-3*torch.ones(1, np.prod(envs.single_action_space.shape)))  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        action_mean = torch.tanh(self.fc3(x))
        # Use hardshrink to enforce max number of nonzero outputs
        # idx = torch.argsort(abs(x))
        # lambd = abs(x[0][idx[0][-(self.n_actions+1)]]).item()
        # action_mean = F.hardshrink(x, lambd=lambd) # sets outputs whose magnitudes are smaller than lambda to zero
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)



# Make the environment (test mode)
env_name = "FuselageActuators-v22"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, "Surrogate", False) for i in range(1)]
    )

# Create agent
device = torch.device("cpu")
agent = Agent(envs, 10).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230203_132128-2rw3jtp8/files/agent_1023936steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Episode", i, "reward:", episodeReward, "error:", info["Error"])

    initErrors.append(info["initError"])
    finalErrors.append(info["Error"])
    maxForces.append(np.max(np.abs(info["Forces"])))
    maxDevs.append(info["maxDev"])
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Inital error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

  deprecation(
  deprecation(
  deprecation(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


****************************** Episode: 1 ******************************
Episode 0 reward: [0.96858875] error: [0.04427414]
****************************** Episode: 2 ******************************
Episode 1 reward: [0.47729179] error: [0.02593315]
****************************** Episode: 3 ******************************
Episode 2 reward: [0.90736648] error: [0.06024902]
****************************** Episode: 4 ******************************
Episode 3 reward: [0.34802085] error: [0.02022634]
****************************** Episode: 5 ******************************
Episode 4 reward: [0.96735534] error: [0.03847433]
****************************** Episode: 6 ******************************
Episode 5 reward: [0.81543913] error: [0.04340074]
****************************** Episode: 7 ******************************
Episode 6 reward: [0.95621471] error: [0.08602923]
****************************** Episode: 8 ******************************
Episode 7 reward: [0.92885557] error: [0.03872339]
********

## run_20230205_144415_19zo595q

surrogate eval

In [None]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, mode, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode=mode, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

# ALGO LOGIC: initialize agent here:
class QNetwork(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 1)

    def forward(self, x, a):
        x = torch.cat([x, a], 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


class Actor(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
        # action rescaling
        self.register_buffer(
            "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32)
        )
        self.register_buffer(
            "action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32)
        )

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.tanh(self.fc_mu(x))
        return x * self.action_scale + self.action_bias


# Make the environment (test mode)
env_name = "FuselageActuators-v22"
run_name = "dummy"
envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, "Surrogate", False) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Actor(envs).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run_20230205_144415_19zo595q/files/agent_16383936steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files
for i in range(100):
    # print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    minError=10
    
    for j in range(100):
        with torch.no_grad():
            action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        # print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
    # print('_'*30)
    # print("Episodic Reward:", episodeReward)
    # print("Final Error:", min(errors))
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

  deprecation(
  deprecation(
  deprecation(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(


**********************************************************
Initial error (mean) = 0.913
Initial error (median) = 0.841
Initial error (stdev) = 0.619
Initial error (max) = 2.725
**********************************************************
Final error (mean) = 0.007
Final error (median) = 0.007
Final error (stdev) = 0.001
Final error (max) = 0.008
**********************************************************
Max Deviation (mean) = 0.022
Max Deviation (median) = 0.022
Max Deviation (stdev) = 0.005
Max Deviation (max) = 0.030
**********************************************************
Max Force (mean) = 35.055
Max Force (median) = 0.221
Max Force (stdev) = 59.562
Max Force (max) = 391.326
**********************************************************
Episode Rewards (mean) = 96.574
Episode Rewards (median) = 96.604
Episode Rewards (stdev) = 0.373


## run-20230207_092039-33bs2orw

In [None]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="Surrogate", seed=seed, port=50056+idx)
        # env = gym.wrappers.RecordEpisodeStatistics(env)
        return env
    return thunk

class Actor(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
        # action rescaling
        self.register_buffer(
            "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32)
        )
        self.register_buffer(
            "action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32)
        )

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.tanh(self.fc_mu(x))
        return x * self.action_scale + self.action_bias


# Make the environment (test mode)
env_name = "FuselageActuators-v22"
run_name = "dummy"
# envs = make_env(env_name, 0 , 0, 10)

envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Actor(envs).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230207_092039-33bs2orw/files/agent_1023999steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    minError=10
    
    for j in range(100):
        with torch.no_grad():
            action = agent.forward(obs)
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        # print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
    print('_'*30)
    print("Initial Error", info["initError"])
    print("Best Error:", minError)
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  deprecation(
  deprecation(
  deprecation(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(


****************************** Episode: 1 ******************************
______________________________
Initial Error [1.4094997]
Best Error: [0.04207955]
****************************** Episode: 2 ******************************
______________________________
Initial Error [0.04961305]
Best Error: [0.03306554]
****************************** Episode: 3 ******************************
______________________________
Initial Error [0.65040186]
Best Error: [0.05778247]
****************************** Episode: 4 ******************************
______________________________
Initial Error [0.03102299]
Best Error: [0.05137268]
****************************** Episode: 5 ******************************
______________________________
Initial Error [1.17857945]
Best Error: [0.06762843]
****************************** Episode: 6 ******************************
______________________________
Initial Error [0.23515676]
Best Error: [0.03223966]
****************************** Episode: 7 ***********************

## run-20230207_092039-33bs2orw

In [None]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="Surrogate", seed=seed, port=50056+idx)
        # env = gym.wrappers.RecordEpisodeStatistics(env)
        return env
    return thunk

class Actor(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
        # action rescaling
        self.register_buffer(
            "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32)
        )
        self.register_buffer(
            "action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32)
        )

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.tanh(self.fc_mu(x))
        return x * self.action_scale + self.action_bias


# Make the environment (test mode)
env_name = "FuselageActuators-v22"
run_name = "dummy"
# envs = make_env(env_name, 0 , 0, 10)

envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Actor(envs).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230207_092039-33bs2orw/files/agent_1023999steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    minError=10
    
    for j in range(100):
        with torch.no_grad():
            action = agent.forward(obs)
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        # print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
    print('_'*30)
    print("Initial Error", info["initError"])
    print("Best Error:", minError)
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  deprecation(
  deprecation(
  deprecation(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(


****************************** Episode: 1 ******************************
______________________________
Initial Error [1.4094997]
Best Error: [0.04207955]
****************************** Episode: 2 ******************************
______________________________
Initial Error [0.04961305]
Best Error: [0.03306554]
****************************** Episode: 3 ******************************
______________________________
Initial Error [0.65040186]
Best Error: [0.05778247]
****************************** Episode: 4 ******************************
______________________________
Initial Error [0.03102299]
Best Error: [0.05137268]
****************************** Episode: 5 ******************************
______________________________
Initial Error [1.17857945]
Best Error: [0.06762843]
****************************** Episode: 6 ******************************
______________________________
Initial Error [0.23515676]
Best Error: [0.03223966]
****************************** Episode: 7 ***********************

Initial error (mean) = 0.936
Initial error (median) = 0.907
Initial error (stdev) = 0.622
Initial error (max) = 2.665
**********************************************************
Final error (mean) = 0.062
Final error (median) = 0.050
Final error (stdev) = 0.043
Final error (max) = 0.284
**********************************************************
Max Deviation (mean) = 0.159
Max Deviation (median) = 0.135
Max Deviation (stdev) = 0.119
Max Deviation (max) = 0.920
**********************************************************
Max Force (mean) = 169.797
Max Force (median) = 44.289
Max Force (stdev) = 338.433
Max Force (max) = 8324.552
**********************************************************
Episode Rewards (mean) = -11.289
Episode Rewards (median) = -11.323
Episode Rewards (stdev) = 2.323

In [16]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="Surrogate", seed=seed, port=50056+idx)
        # env = gym.wrappers.RecordEpisodeStatistics(env)
        return env
    return thunk

class Actor(nn.Module):
    def __init__(self, env):
        super().__init__()
        self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
        # action rescaling
        self.register_buffer(
            "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32)
        )
        self.register_buffer(
            "action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32)
        )

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.tanh(self.fc_mu(x))
        return x * self.action_scale + self.action_bias


# Make the environment (test mode)
env_name = "FuselageActuators-v22"
run_name = "dummy"
# envs = make_env(env_name, 0 , 0, 10)

envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10) for i in range(1)]
    )

# Create agent
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = Actor(envs).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230207_092039-33bs2orw/files/agent_1023999steps.pt", map_location=device))


# Load the trained policy

initErrors = []
finalErrors = []
maxForces = []
n_actuators = []
maxDevs = []
rewards = []
# Loop over all files
for i in range(100):
    print('*' * 30, f'Episode: {i+1}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    minError=10
    
    for j in range(100):
        with torch.no_grad():
            action = agent.forward(obs)
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        # print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
    print('_'*30)
    print("Initial Error", info["initError"])
    print("Best Error:", minError)
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))
# print("**********************************************************")
# print("Number of actuators (mean) = %.3f" %np.mean(n_actuators))
# print("Number of actuators (median) = %.3f" %np.median(n_actuators))
# print("Number of actuators (stdev) = %.3f" %np.std(n_actuators))

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  deprecation(
  deprecation(
  deprecation(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.deprecation(


****************************** Episode: 1 ******************************
______________________________
Initial Error [1.4094997]
Best Error: [0.04207955]
****************************** Episode: 2 ******************************
______________________________
Initial Error [0.04961305]
Best Error: [0.03306554]
****************************** Episode: 3 ******************************
______________________________
Initial Error [0.65040186]
Best Error: [0.05778247]
****************************** Episode: 4 ******************************
______________________________
Initial Error [0.03102299]
Best Error: [0.05137268]
****************************** Episode: 5 ******************************
______________________________
Initial Error [1.17857945]
Best Error: [0.06762843]
****************************** Episode: 6 ******************************
______________________________
Initial Error [0.23515676]
Best Error: [0.03223966]
****************************** Episode: 7 ***********************

Initial error (mean) = 0.936
Initial error (median) = 0.907
Initial error (stdev) = 0.622
Initial error (max) = 2.665
**********************************************************
Final error (mean) = 0.062
Final error (median) = 0.050
Final error (stdev) = 0.043
Final error (max) = 0.284
**********************************************************
Max Deviation (mean) = 0.159
Max Deviation (median) = 0.135
Max Deviation (stdev) = 0.119
Max Deviation (max) = 0.920
**********************************************************
Max Force (mean) = 169.797
Max Force (median) = 44.289
Max Force (stdev) = 338.433
Max Force (max) = 8324.552
**********************************************************
Episode Rewards (mean) = -11.289
Episode Rewards (median) = -11.323
Episode Rewards (stdev) = 2.323

## run-20230217_131757-1fkv29v4 (anneal action variation)

In [4]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    def thunk():
        env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs):
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.single_action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.single_action_space.shape)), requires_grad=False)  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None, scaleStd=1):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        action_mean = torch.tanh(self.fc3(x))
        # Build action distribution
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)*scaleStd
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v22"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
envs = gym.vector.SyncVectorEnv(
    [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
)

# Create agent
device = torch.device("cpu")
agent = Agent(envs).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230217_131757-1fkv29v4/files/agent_32767872steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
maxDevs = []
maxForces = []
rewards = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files[:-1]:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    env_name = "FuselageActuators-v22"
    envs = gym.vector.SyncVectorEnv(
        [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    )

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    #initErrors.append(envs.error_initial)
    episodeReward = 0
    # done=False
    # while not done:
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
    obs, reward, done, info = envs.step(action.cpu().numpy())
    obs = torch.Tensor(obs).to(device)
    episodeReward += reward
    print("Reward:", episodeReward)
    print("Final Error:", info["Error"])
    initErrors.append(info["initError"])
    finalErrors.append(info["Error"])
    maxDevs.append(info["maxDev"])
    maxForces.append(np.max(np.abs(info["Forces"])))
    rewards.append(episodeReward)
    # n_actuators.append(np.count_nonzero(envs.forces))

envs.close()

print("**********************************************************")
print("Initial error (mean) = %.3f" %np.mean(initErrors))
print("Initial error (median) = %.3f" %np.median(initErrors))
print("Initial error (stdev) = %.3f" %np.std(initErrors))
print("Initial error (max) = %.3f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.3f" %np.mean(finalErrors))
print("Final error (median) = %.3f" %np.median(finalErrors))
print("Final error (stdev) = %.3f" %np.std(finalErrors))
print("Final error (max) = %.3f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.3f" %np.mean(maxDevs))
print("Max Deviation (median) = %.3f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.3f" %np.std(maxDevs))
print("Max Deviation (max) = %.3f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.3f" %np.mean(maxForces))
print("Max Force (median) = %.3f" %np.median(maxForces))
print("Max Force (stdev) = %.3f" %np.std(maxForces))
print("Max Force (max) = %.3f" %np.max(maxForces))
print("**********************************************************")
print("Episode Rewards (mean) = %.3f" %np.mean(rewards))
print("Episode Rewards (median) = %.3f" %np.median(rewards))
print("Episode Rewards (stdev) = %.3f" %np.std(rewards))

Exception ignored in: <function VectorEnv.__del__ at 0x000001BC755269E0>
Traceback (most recent call last):
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\vector_env.py", line 294, in __del__
    self.close()
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\vector_env.py", line 221, in close
    self.close_extras(**kwargs)
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in close_extras
    [env.close() for env in self.envs]
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in <listcomp>
    [env.close() for env in self.envs]
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\core.py", line 435, in close
    return self.env.close()
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\core.py", line 435, in close
    ret

****************************** File: SolutionInputDP41.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 1
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready


  logger.deprecation(


Reward: [0.96718062]
Final Error: [0.00981279]
****************************** File: SolutionInputDP42.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 1
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready
Reward: [0.94158648]
Final Error: [0.00928306]
****************************** File: SolutionInputDP43.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 1
Try running again
Simulation setup complete
Applied forces
Solve finished
Re

**********************************************************
Initial error (mean) = 0.467

Initial error (median) = 0.482

Initial error (stdev) = 0.251

Initial error (max) = 0.923
**********************************************************
Final error (mean) = 0.012

Final error (median) = 0.010

Final error (stdev) = 0.006

Final error (max) = 0.028
**********************************************************
Max Deviation (mean) = 0.037

Max Deviation (median) = 0.036

Max Deviation (stdev) = 0.013

Max Deviation (max) = 0.076
**********************************************************
Max Force (mean) = 42.936

Max Force (median) = 8.154

Max Force (stdev) = 56.856

Max Force (max) = 268.755
**********************************************************
Episode Rewards (mean) = 0.965

Episode Rewards (median) = 0.969

Episode Rewards (stdev) = 0.025

### Multiple refinements

In [7]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    env = gym.make(env_id, n_actuators=n_actions, mode="File", file1=file1, file2=file2, record=record, seed=seed, port=50056+idx)
    #env = gym.wrappers.RecordEpisodeStatistics(env)
    return env


def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs):
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.action_space.shape)), requires_grad=False)  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None, scaleStd=1):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        action_mean = torch.tanh(self.fc3(x))
        # Build action distribution
        action_logstd = self.actor_logstd#.expand_as(action_mean)
        action_std = torch.exp(action_logstd)*scaleStd
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v22"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
# envs = gym.vector.SyncVectorEnv(
#     [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
# )

envs = make_env(env_name, 0, 0, 10, file1, file2, False)

# Create agent
device = torch.device("cpu")
agent = Agent(envs).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230217_131757-1fkv29v4/files/agent_32767872steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
maxDevs = []
maxForces = []
rewards = []
bestForces = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files[:-1]:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    # env_name = "FuselageActuators-v22"
    # envs = gym.vector.SyncVectorEnv(
    #     [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    # )

    envs = make_env(env_name, 0, 0, 10, file1, file2, False)

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    # initErrors.append(envs.error_initial)
    episodeReward = 0
    minError=10
    
    for j in range(10):
        with torch.no_grad():
            action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
            bestForce = info["Forces"]
    print('_'*30)
    print("Best Error:", minError)
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    bestForces.append(bestForce)

    envs.close()

print("**********************************************************")
print("Initial error (mean) = %.4f" %np.mean(initErrors))
print("Initial error (median) = %.4f" %np.median(initErrors))
print("Initial error (stdev) = %.4f" %np.std(initErrors))
print("Initial error (max) = %.4f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.4f" %np.mean(finalErrors))
print("Final error (median) = %.4f" %np.median(finalErrors))
print("Final error (stdev) = %.4f" %np.std(finalErrors))
print("Final error (max) = %.4f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.4f" %np.mean(maxDevs))
print("Max Deviation (median) = %.4f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.4f" %np.std(maxDevs))
print("Max Deviation (max) = %.4f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.4f" %np.mean(maxForces))
print("Max Force (median) = %.4f" %np.median(maxForces))
print("Max Force (stdev) = %.4f" %np.std(maxForces))
print("Max Force (max) = %.4f" %np.max(maxForces))
print("**********************************************************")
print("Best Forces:", bestForces)


****************************** File: SolutionInputDP41.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Reconnect failed - remote exit again
Wait and try to reconnect again - attempt 1
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 2
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready
Intermediate Error: 0.009812788319896537
Intermediate Error: 0.009566103180384084
Intermediate Error: 0.029143302538397236
Intermediate Error: 0.009623502792903617
Intermediate Error: 0.012324360217118436
Intermediate Error: 0.03788054748716914
Intermediate Error: 0.009551541400808746
Intermediate Error: 0.0130421704813252
Intermediate Error: 0.009717816194979204
Intermediate Error: 0.009790655700307033
______________________________
Best Error: 0.00955154

Exception ignored in: <function VectorEnv.__del__ at 0x000001BC755269E0>
Traceback (most recent call last):
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\vector_env.py", line 294, in __del__
    self.close()
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\vector_env.py", line 221, in close
    self.close_extras(**kwargs)
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in close_extras
    [env.close() for env in self.envs]
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\vector\sync_vector_env.py", line 234, in <listcomp>
    [env.close() for env in self.envs]
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\core.py", line 435, in close
    return self.env.close()
  File "c:\Users\TL\Documents\PythonVENV\venv3_10-pyAnsys\lib\site-packages\gym\core.py", line 435, in close
    ret

Intermediate Error: 0.011465180619962934
Intermediate Error: 0.010346405936671242
Intermediate Error: 0.13958319083182827
Intermediate Error: 0.09949363657676186
Intermediate Error: 0.05642071904489391
Intermediate Error: 0.009537327842655301
Intermediate Error: 0.06876661187287697
______________________________
Best Error: 0.009537327842655301
****************************** File: SolutionInputDP51.inp ******************************
Exit Ansys and try to reconnect
No active Ansys process found. Wait and try to reconnect
Product:             Ansys Mechanical Enterprise Academic Teaching
MAPDL Version:       22.1
ansys.mapdl Version: 0.61.2

Running on 4 processors
Sucessfully reconnected to Ansys on attempt 1
Try running again
Simulation setup complete
Applied forces
Solve finished
Results ready
Intermediate Error: 0.008309622019290782
Intermediate Error: 0.0082851463175274
Intermediate Error: 0.0075398874900382586
Intermediate Error: 0.007252644546075483
Intermediate Error: 0.006976943

### Multiple refinements (test)

In [1]:
import os
from os import path
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import gym
import torch
from torch import nn
import torch.nn.functional as F
from torch.distributions.normal import Normal

from AssemblyGym.envs import FuselageActuators

def make_env(env_id, seed, idx, n_actions, file1, file2, record):
    env = gym.make(env_id, n_actuators=n_actions, mode="Test", record=record, seed=seed)
    #env = gym.wrappers.RecordEpisodeStatistics(env)
    return env


def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer


class Agent(nn.Module):
    def __init__(self, envs):
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 64)),
            nn.Tanh(),
            layer_init(nn.Linear(64, 1), std=1.0),
        )
        # layers for self.actor_mean
        self.fc1 = layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 64))
        self.fc2 = layer_init(nn.Linear(64, 64))
        self.fc3 = layer_init(nn.Linear(64, np.prod(envs.action_space.shape)), std=0.01)
        
        self.actor_logstd = nn.Parameter(-5*torch.ones(1, np.prod(envs.action_space.shape)), requires_grad=False)  # initial action_std = exp(actor_logstd)

    def get_value(self, obs):
        return self.critic(obs)

    def get_action_and_value(self, obs, action=None, scaleStd=1):
        # Start with standard MLP
        x = torch.tanh(self.fc1(obs))
        x = torch.tanh(self.fc2(x))
        action_mean = torch.tanh(self.fc3(x))
        # Build action distribution
        action_logstd = self.actor_logstd#.expand_as(action_mean)
        action_std = torch.exp(action_logstd)*scaleStd
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        if action == "deterministic":
            action = action_mean
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(obs)

# Make the environment
env_name = "FuselageActuators-v22"
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
file1 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
# envs = gym.vector.SyncVectorEnv(
#     [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
# )

envs = make_env(env_name, 0, 0, 10, file1, file2, False)

# Create agent
device = torch.device("cpu")
agent = Agent(envs).to(device)
agent.load_state_dict(torch.load(
            "./wandb/run-20230217_131757-1fkv29v4/files/agent_32767872steps.pt", map_location=device))

# Initialze variables
initErrors = []
finalErrors = []
maxDevs = []
maxForces = []
rewards = []
bestForces = []

# Select files
file2 ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Benchmark/SolutionInputUndeformed.inp'
folder ='C:/Users/TL/Projects/Fuselage Actuator Reinforcement Learning/AssemblyGym/AssemblyGym/envs/FuselageActuators/AnsysFiles/Test/'
files = os.listdir(folder)

# Loop over all files
for f in files[:-1]:
    file1 = path.join(folder, f)
    dp = file1[-8:-4] # Design point

    # Make the environment
    # env_name = "FuselageActuators-v22"
    # envs = gym.vector.SyncVectorEnv(
    #     [make_env(env_name, 0 + i, i, 10, file1, file2, False) for i in range(1)]
    # )

    envs = make_env(env_name, 0, 0, 10, file1, file2, False)

    print('*' * 30, f'File: {f}', '*' * 30)
    # Perform test and track error
    obs = torch.Tensor(envs.reset()).to(device)
    # initErrors.append(envs.error_initial)
    episodeReward = 0
    minError=10
    
    for j in range(10):
        with torch.no_grad():
            action, logprob, _, value = agent.get_action_and_value(obs, action = "deterministic")
        obs, reward, done, info = envs.step(action.cpu().numpy())
        obs = torch.Tensor(obs).to(device)
        episodeReward += reward
        # print("Intermediate Reward:", reward)
        print("Intermediate Error:", info["Error"])
        if info["Error"]<minError:
            minError= info["Error"]
            maxDev = info["maxDev"]
            maxForce = np.max(np.abs(info["Forces"]))
            bestForce = info["Forces"]
    print('_'*30)
    print("Best Error:", minError)
    initErrors.append(info["initError"])
    finalErrors.append(minError)
    maxForces.append(maxForce)
    maxDevs.append(maxDev)
    rewards.append(episodeReward)
    bestForces.append(bestForce)

    envs.close()

print("**********************************************************")
print("Initial error (mean) = %.4f" %np.mean(initErrors))
print("Initial error (median) = %.4f" %np.median(initErrors))
print("Initial error (stdev) = %.4f" %np.std(initErrors))
print("Initial error (max) = %.4f" %np.max(initErrors))
print("**********************************************************")
print("Final error (mean) = %.4f" %np.mean(finalErrors))
print("Final error (median) = %.4f" %np.median(finalErrors))
print("Final error (stdev) = %.4f" %np.std(finalErrors))
print("Final error (max) = %.4f" %np.max(finalErrors))
print("**********************************************************")
print("Max Deviation (mean) = %.4f" %np.mean(maxDevs))
print("Max Deviation (median) = %.4f" %np.median(maxDevs))
print("Max Deviation (stdev) = %.4f" %np.std(maxDevs))
print("Max Deviation (max) = %.4f" %np.max(maxDevs))
print("**********************************************************")
print("Max Force (mean) = %.4f" %np.mean(maxForces))
print("Max Force (median) = %.4f" %np.median(maxForces))
print("Max Force (stdev) = %.4f" %np.std(maxForces))
print("Max Force (max) = %.4f" %np.max(maxForces))
print("**********************************************************")
print("Best Forces:", bestForces)


  import distutils.sysconfig as sysconfig


PyMAPDL is taking longer than expected to connect to an MAPDL session.
Checking if there are any available licenses...
PyMAPDL is taking longer than expected to connect to an MAPDL session.
Checking if there are any available licenses...


  and re.search("ansys\d\d\d", os.path.basename(os.path.normpath(exe_loc)))
  """Start MAPDL locally.


KeyboardInterrupt: 

Initial error (mean) = 0.467
Initial error (median) = 0.482
Initial error (stdev) = 0.251
Initial error (max) = 0.923
**********************************************************
Final error (mean) = 0.010
Final error (median) = 0.010
Final error (stdev) = 0.003
Final error (max) = 0.015
**********************************************************
Max Deviation (mean) = 0.034
Max Deviation (median) = 0.035
Max Deviation (stdev) = 0.009
Max Deviation (max) = 0.056
**********************************************************
Max Force (mean) = 214.811
Max Force (median) = 207.556
Max Force (stdev) = 89.130
Max Force (max) = 432.262
**********************************************************