In [1]:
# Notebook aims to develop Control and correction of Manufacturing Systems using Deep Reinforement Learning (CCMS-DRL)
#Deep Q Learning
#VRM Matlab Integration
#Control and Correction
#Markov Decsion Process

import pandas as pd
import numpy as np
import tensorflow as tf
import os
import sys
from tensorflow.keras import backend as K
K.clear_session()

os.environ["CUDA_VISIBLE_DEVICES"]="0"


In [2]:
# Inititating basic data structures
param_headers=[]
param_dim=3
stage_dim=2
dev_dim=3
cop_dim=8047
timesteps=1

nominal_cop_x=np.zeros((timesteps, cop_dim))
nominal_cop_y=np.zeros((timesteps, cop_dim))
nominal_cop_z=np.zeros((timesteps, cop_dim))

for i in range(param_dim):
    param_headers.append("pp_"+str(i+1))

In [3]:
# Conducting PCA for dimension Reduction

dataset_x = pd.read_csv("./pca_data/pca_datacop_pca_drl_x_3_0.csv",header=None).iloc[:, :-1]
dataset_y = pd.read_csv("./pca_data/pca_datacop_pca_drl_y_3_0.csv",header=None).iloc[:, :-1]
dataset_z = pd.read_csv("./pca_data/pca_datacop_pca_drl_z_3_0.csv",header=None).iloc[:, :-1]


In [4]:
# Preprocesing
from sklearn.preprocessing import MinMaxScaler
dev_data=pd.concat([dataset_x, dataset_y,dataset_z], axis = 1)
print(dev_data.shape)
scaler_t= MinMaxScaler()
dev_data_t = scaler_t.fit_transform(dev_data)

(700, 24141)


In [5]:
#PCA with 95% variance explanantion
var_limit=0.999
from sklearn.decomposition import PCA
pca_t = PCA(var_limit)
#pca.fit(dev_data)
dev_pc = pca_t.fit_transform(dev_data_t)
print(dev_pc.shape)
explained_var=pca_t.explained_variance_ratio_
print(explained_var)

(700, 8)
[0.61500754 0.19004906 0.08689917 0.05935777 0.02187135 0.01608941
 0.00658602 0.0032461 ]


In [6]:
dev_pc
dev_df = pd.DataFrame(dev_pc)
dev_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7
count,700.0,700.0,700.0,700.0,700.0,700.0,700.0,700.0
mean,9.744586e-16,-6.090366e-16,-2.33464e-16,-1.624098e-16,-2.030122e-17,2.385393e-16,1.268826e-16,-1.763669e-16
std,20.69757,11.50567,7.780131,6.430102,3.903165,3.34772,2.141858,1.503695
min,-23.53483,-32.72252,-21.1422,-12.96156,-9.469738,-8.971204,-6.882221,-5.10764
25%,-14.91381,-8.305674,-5.322579,-5.236408,-2.93579,-2.261339,-1.264315,-0.5607888
50%,-8.835909,3.367156,0.2220401,-0.1373003,-0.3723971,-0.2611092,-0.0513719,0.08929002
75%,7.84573,8.66123,5.052208,5.186665,2.182402,1.918606,0.8345282,0.5029736
max,81.14733,44.22742,21.27252,13.7623,11.03621,16.3016,21.07247,9.158121


In [7]:
scaler_p=MinMaxScaler()
dev_pc_t = scaler_p.fit_transform(dev_pc)
dev_pc_t 

array([[0.06083309, 0.14866408, 0.54730575, ..., 0.34938026, 0.3172509 ,
        0.26667299],
       [0.65156089, 0.3792449 , 0.65360388, ..., 0.50339834, 0.31380435,
        0.45429322],
       [0.89502996, 0.151917  , 0.81420031, ..., 0.3858864 , 0.24537683,
        0.69688963],
       ...,
       [0.0683665 , 0.44938039, 0.45766938, ..., 0.31671602, 0.22242038,
        0.42334898],
       [0.2458102 , 0.62016534, 0.76323687, ..., 0.22439522, 0.25979887,
        0.39387176],
       [0.08721407, 0.46662595, 0.4135524 , ..., 0.33743256, 0.22574263,
        0.43419451]])

In [8]:
dev_pc_t_df = pd.DataFrame(dev_pc_t)
dev_pc_t_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7
count,700.0,700.0,700.0,700.0,700.0,700.0,700.0,700.0
mean,0.224822,0.425244,0.498464,0.485018,0.461804,0.354975,0.246192,0.358035
std,0.197718,0.149522,0.18343,0.240613,0.190343,0.132463,0.076619,0.105406
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.082354,0.317308,0.372975,0.289073,0.318637,0.265497,0.200965,0.318725
50%,0.140415,0.469002,0.503699,0.479881,0.443644,0.344643,0.244354,0.364294
75%,0.29977,0.537801,0.617578,0.679102,0.568232,0.43089,0.276045,0.393292
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [9]:
# Building The dataset for training, inital Exploration
# Enviornment Functions
#<state_t,action_t, reward_t,state_t+1>
time_steps=1
point_dim=8047
cop_state_dim=point_dim*3
std_val=0
ucl=4
lcl=-4
filename_base="./ddpg_data/state_t_"
ucl_vector=[ucl]*param_dim
lcl_vector=[lcl]*param_dim
file_name_cop=["./ddpg_data/cop_drl_x_","./ddpg_data/cop_drl_y_","./ddpg_data/cop_drl_z_"]

def get_initial_state(time_steps,run_id,initial_sample_size=1):
    
    
    initial_samples=np.zeros((initial_sample_size, 3))
    
    for i in range(param_dim):
        initial_samples[:,i]=np.random.uniform(lcl_vector[i],ucl_vector[i],initial_sample_size)
        #initial_samples[:,i]=np.random.uniform(-0.5,0.5,initial_sample_size)
        
    file_path=filename_base+str(run_id)
    
    np.savetxt(file_path+".csv",initial_samples, delimiter=",")
    
    #Running Simulations
    #print(initial_samples)
    run_system_model(initial_samples[0,:],timesteps,run_id)
    
    cop_tensor,cop_state=get_cop_tensor(run_id)
    
    kcc_state=initial_samples
    
    return cop_state,kcc_state

def run_simulations(run_id,type_flag="drl"):
    
    import matlab.engine
    
    print("Initiating Matlab Engine...")

    #Initiating CAE engine within AI environment
    eng = matlab.engine.start_matlab()

    #change to absolute path here
    #eng.cd(r'C:\Users\sinha_s\Desktop\VRM - GUI - datagen\Demos',nargout=0)
    #Chnaging to Cross Member Assembly
    eng.cd(r'C:\Users\SINHA_S\Desktop\cross_member_datagen\Demos\Fixture simulation\Multi station\locator_halo',nargout=0)

    print("Initiating CAE simulations for run ID: ",run_id)
    #print("Runnning MatLab")
    
    eng.halo_reinforcement_learning(run_id,type_flag,nargout=0)
    
    print("Simulation_Completed")

#state, reward, done, info = env.step(action)
def run_system_model(action_kccs,timesteps,run_id):
    
    action_matrix=np.zeros((timesteps, param_dim))
    std_vector_gen=[0.0,0.0,0.0]
    
    for i in range(param_dim):
        action_matrix[:,i]=np.clip(np.random.normal(action_kccs[i], std_vector_gen[i], timesteps),lcl_vector[i],ucl_vector[i])
    
    file_path=filename_base+str(run_id)
    
    np.savetxt(file_path+".csv", action_matrix, delimiter=",")
    
    #Running Multi-Physcis Based VRM Model
    run_simulations(run_id)
    
    #CALL OSER MODEL HERE
    #state_matrix=run_OSER_model()
    cop_tensor,cop_state=get_cop_tensor(run_id)
    
    #CURRENTLY STATE ESTIMATES are jittered based on OSER model accuracy
    #state_matrix=action_matrix
    
    #state=np.mean(state_matrix, axis=0)
    
    return cop_state

def get_cop_tensor(run_id):
    
    cop_tensor=[]
    
    for file in file_name_cop:
        cop_tensor.append(pd.read_csv(file+str(run_id)+".csv",header=None).iloc[:, :-1])
    
    #cop_state=np.zeros((3, point_dim))
    cop_state=pd.concat([cop_tensor[0],cop_tensor[1],cop_tensor[2]], axis = 1)
    
    #cop_index=0
    #print(cop_state.shape)
    #for cop in cop_tensor:
        #cop_state[cop_index,:]=cop.values[0,:]
        #print(cop.values[0,:].shape)
        #print(cop.values)
        #cop_index=cop_index+1
    
    #cop_state=cop_state.flatten()
    
    cop_scale_transform=scaler_t.transform(cop_state)
    
    #print("Cop State: ",cop_scale_transform)
    
    cop_state_transform=pca_t.transform(cop_scale_transform)
    
    cop_state_scale_transform=scaler_p.transform(cop_state_transform)
   
    cop_state_scale_transform=cop_state_scale_transform.flatten()
    
    print("Cop State: ",cop_state_scale_transform)
    
    return cop_tensor,cop_state_scale_transform

def get_reward(cop_tensor,state_matrix,action_kccs):
    
        kcc_wts=np.array([0.3,0.3,0.3])
        sys_rigid=0.5
        current_kccs=np.mean(state_matrix, axis=0)
        #KCC Loss
        kcc_sse = np.absolute(current_kccs - action_kccs)
        kcc_loss = np.sum(kcc_sse * kcc_wts) / np.sum(kcc_wts)
        
        #print(cop_tensor[0].shape,nominal_cop_x.shape)
        #KPI Loss
        kpi_loss = (np.absolute(cop_tensor[0] - nominal_cop_x)).values.mean()+(np.absolute(cop_tensor[1] - nominal_cop_x)).values.mean()+(np.absolute(cop_tensor[2] - nominal_cop_x)).values.mean()
        
        scaling_factor=0.1
        kpi_loss=scaling_factor*kpi_loss
        
        #Sytem_Loss
        system_loss=2*((1-sys_rigid)*kpi_loss+sys_rigid*kcc_loss)
        
        #Reward as negative of system loss
        reward= -1*system_loss
        
        return reward, kpi_loss,kcc_loss

def get_reward_signal(reward, loss_thres=-0.3,los_neg_thres=-2):
    
    #less_thres can be annleaded with time for continous improvement
    reward_signal=0.0
    
    #Scaling Rewards Between 0 to 1
    
    if(reward>loss_thres):
        reward_signal=1.0
    elif(reward<los_neg_thres):
        reward_signal=0.0
    else:
        reward_signal=(((reward-los_neg_thres)/(loss_thres-los_neg_thres))*1)
    
    return reward_signal

In [10]:
#state, reward, done, info = env.step(action)

def envior_step(action_kccs,state_matrix,run_id,kpi_thres=0.3):
    cop_state=run_system_model(action_kccs,time_steps,run_id)
    cop_tensor,cop_state=get_cop_tensor(run_id)
    reward, kpi_loss,kcc_loss=get_reward(cop_tensor,state_matrix,action_kccs)
    
    reward_signal=get_reward_signal(reward, loss_thres=-0.4,los_neg_thres=-10)
    info={"Reward": reward,"Reward Signal":reward_signal,"KPI Loss": kpi_loss,"KCC Loss": kcc_loss}
    
    if(kpi_loss<kpi_thres):
        done=True
    else:
        done=False
    
    return cop_state,reward,done,info

In [11]:
# Manufacturing Enviornment System Test
#Test Run: Succsess
#run_id=5
#cop_state,kcc_state=get_initial_state(time_steps,run_id)
#action_kccs=kcc_state[0,:]

#print(action_kccs)
#envior_step(action_kccs,kcc_state,run_id,kpi_thres=0.3)

In [12]:
# Reinforcement Learning Model 
#num_states=param_dim*time_steps
pca_output_dim=dev_pc.shape[1]
num_states=pca_output_dim
print("State Vector of the System: ",num_states)

num_actions=3
#param_dim=1
print("Action vector of the System: ",num_actions)

State Vector of the System:  8
Action vector of the System:  3


In [13]:
#Ornstein-Uhlenbeck process for Exploration and Exploitation

class OUActionNoise:
    def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None):
        self.theta = theta
        self.mean = mean
        self.std_dev = std_deviation
        self.dt = dt
        self.x_initial = x_initial
        self.reset()

    def __call__(self):
        # Formula taken from https://www.wikipedia.org/wiki/Ornstein-Uhlenbeck_process.
        x = (
            self.x_prev
            + self.theta * (self.mean - self.x_prev) * self.dt
            + self.std_dev * np.sqrt(self.dt) * np.random.normal(size=self.mean.shape)
        )
        # Store x into x_prev
        # Makes next noise dependent on current one
        self.x_prev = x
        return x

    def reset(self):
        if self.x_initial is not None:
            self.x_prev = self.x_initial
        else:
            self.x_prev = np.zeros_like(self.mean)

In [14]:
#Experience Relay Buffer Class

class Buffer:
    def __init__(self, buffer_capacity=100000, batch_size=64):

        # Number of "experiences" to store at max
        self.buffer_capacity = buffer_capacity
        # Num of tuples to train on.
        self.batch_size = batch_size

        # Its tells us num of times record() was called.
        self.buffer_counter = 0

        # Instead of list of tuples as the exp.replay concept go
        # We use different np.arrays for each tuple element
        self.state_buffer = np.zeros((self.buffer_capacity, num_states))
        self.action_buffer = np.zeros((self.buffer_capacity, num_actions))
        self.reward_buffer = np.zeros((self.buffer_capacity, 1))
        self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))

    # Takes (s,a,r,s') obervation tuple as input
    def record(self, obs_tuple):
        # Set index to zero if buffer_capacity is exceeded,
        # replacing old records
        index = self.buffer_counter % self.buffer_capacity

        self.state_buffer[index] = obs_tuple[0]
        self.action_buffer[index] = obs_tuple[1]
        self.reward_buffer[index] = obs_tuple[2]
        self.next_state_buffer[index] = obs_tuple[3]

        self.buffer_counter += 1

    # We compute the loss and update parameters
    def learn(self):
        # Get sampling range
        record_range = min(self.buffer_counter, self.buffer_capacity)
        # Randomly sample indices
        batch_indices = np.random.choice(record_range, self.batch_size)

        # Convert to tensors
        state_batch = tf.convert_to_tensor(self.state_buffer[batch_indices])
        action_batch = tf.convert_to_tensor(self.action_buffer[batch_indices])
        reward_batch = tf.convert_to_tensor(self.reward_buffer[batch_indices])
        reward_batch = tf.cast(reward_batch, dtype=tf.float64)
        next_state_batch = tf.convert_to_tensor(self.next_state_buffer[batch_indices])

        # Training and updating Actor & Critic networks.
        # See Pseudo Code.
        with tf.GradientTape() as tape:
            target_actions = target_actor(next_state_batch)
            y = reward_batch + gamma * target_critic([next_state_batch, target_actions])
            critic_value = critic_model([state_batch, action_batch])
            critic_loss = tf.math.reduce_mean(tf.math.square(y - critic_value))

        critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables)
        critic_optimizer.apply_gradients(
            zip(critic_grad, critic_model.trainable_variables)
        )
       
        with tf.GradientTape() as tape:
            actions = actor_model(state_batch)
            critic_value = critic_model([state_batch, actions])
            # Used `-value` as we want to maximize the value given
            # by the critic for our actions
            actor_loss = -tf.math.reduce_mean(critic_value)

        actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables)
        actor_optimizer.apply_gradients(
            zip(actor_grad, actor_model.trainable_variables)
        )


# This update target parameters slowly
# Based on rate `tau`, which is much less than one.
def update_target(tau):
    new_weights = []
    target_variables = target_critic.weights
    for i, variable in enumerate(critic_model.weights):
        new_weights.append(variable * tau + target_variables[i] * (1 - tau))

    target_critic.set_weights(new_weights)

    new_weights = []
    target_variables = target_actor.weights
    for i, variable in enumerate(actor_model.weights):
        new_weights.append(variable * tau + target_variables[i] * (1 - tau))

    target_actor.set_weights(new_weights)

In [15]:
#Define Actor and Critic neural networks
from tensorflow.keras import layers
#Setting upper bound based on upper control limit
upper_bound=ucl
lower_bound=lcl

tf.keras.backend.set_floatx('float64')
def get_actor():
    # Initialize weights between -3e-3 and 3-e3
    last_init = tf.random_uniform_initializer(minval=-0.0003, maxval=0.0003)

    inputs = layers.Input(shape=(num_states))
    out = layers.Dense(512, activation="relu")(inputs)
    out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
    out = layers.BatchNormalization()(out)
    outputs = layers.Dense(num_actions, activation="tanh", kernel_initializer=last_init)(out)

    outputs = outputs * upper_bound
    model = tf.keras.Model(inputs, outputs)
    return model


def get_critic():
    # State as input
    # Initialize weights between -3e-3 and 3-e3
    last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)

    state_input = layers.Input(shape=(num_states))
    state_out = layers.Dense(16, activation="relu")(state_input)
    state_out = layers.BatchNormalization()(state_out)
    state_out = layers.Dense(32, activation="relu")(state_out)
    state_out = layers.BatchNormalization()(state_out)

    # Action as input
    action_input = layers.Input(shape=(num_actions))
    action_out = layers.Dense(32, activation="relu")(action_input)
    action_out = layers.BatchNormalization()(action_out)

    # Both are passed through seperate layer before concatenating
    concat = layers.Concatenate()([state_out, action_out])

    out = layers.Dense(512, activation="relu")(concat)
    out = layers.BatchNormalization()(out)
    out = layers.Dense(512, activation="relu")(out)
    out = layers.BatchNormalization()(out)
    outputs = layers.Dense(1,kernel_initializer=last_init)(out)

    # Outputs single value for give state-action
    model = tf.keras.Model([state_input, action_input], outputs)

    return model

In [16]:
#Def Policy as adding noise to the action
# To be updated to noise depedent on the Bayesain Model Uncertianity

def policy(state, noise_object):
    sampled_actions = tf.squeeze(actor_model(state))
    noise = noise_object()
    # Adding noise to action
    print("Sampled Action: ",sampled_actions)
    sampled_actions = sampled_actions.numpy() + noise
    print("Sampled Action after noise: ",sampled_actions," Noise: ",noise)

    # We make sure action is within bounds
    #import random  
    #greedy_factor=random.uniform(0, 1)
    
    #greedy_threshold=0.2
    #normal_noise_threshold=0.2
    
    #if(greedy_factor>greedy_threshold):
        #sampled_actions = sampled_actions.numpy()
    
    #if(greedy_factor<greedy_threshold):
        #print("Random Action Selected")
        #sampled_actions = np.array(np.array(random.uniform(-1, 1)))
    
    legal_action = np.clip(sampled_actions, lower_bound, upper_bound)

    return [np.squeeze(legal_action)]

In [17]:
# Defining Training Hyper_parameters
#std_dev = np.array([0.3,0.3,0.3]).astype(np.float64)
std_dev=0.2
ou_noise = OUActionNoise(mean=np.zeros(num_actions), std_deviation=std_dev * np.ones(num_actions))

actor_model = get_actor()
critic_model = get_critic()

target_actor = get_actor()
target_critic = get_critic()

# Making the weights equal initially
target_actor.set_weights(actor_model.get_weights())
target_critic.set_weights(critic_model.get_weights())

# Learning rate for actor-critic models
critic_lr = 0.002
actor_lr = 0.001

critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

total_episodes = 200

# Discount factor for future rewards
gamma = 0.3
# Used to update target networks
tau = 0.1

buffer = Buffer(1000, 32)

In [18]:
#Learning from VRM system
import tensorflow as tf
tf.keras.backend.clear_session()
tf.keras.backend.set_floatx('float64')

%matplotlib inline
from matplotlib import pyplot as plt
from IPython.display import clear_output


plt.ion() ## Note this correction
fig=plt.figure()
x=[]
y=[]

# To store reward history of each episode
ep_reward_list = []
avg_ep_reward_list=[]
ep_run_length=[]

# To store average reward history of last few episodes
avg_reward_list = []
avg_episode_reward_list= []

run_id=0
episode_run_id=0


print("Object Shape Error Correction using Deep Reinforcement Learning ...")

for ep in range(total_episodes):

    #prev_state = env.reset()
    #Getting inital state of the manufacturing system
    
    print("Run ID for inital step: ", run_id)
    prev_state_cop,prev_state_kcc=get_initial_state(time_steps,run_id)
    print(prev_state_kcc)
    run_id=run_id+1
    #prev_state=prev_state.flatten()
    #prev_state=np.mean(prev_state, axis=0)
    episodic_reward = 0
    within_episode_run_id=0
    
    while True:

        tf_prev_state = tf.expand_dims(tf.convert_to_tensor(prev_state_cop), 0)

        action = policy(tf_prev_state, ou_noise)
        
        print("Action: ",action)
        # Recieve state and reward from environment.
        action_input=np.zeros(3)
        action_input=action[0]
        
        print(prev_state_kcc,action)
        state, reward, done, info=envior_step(action_input,prev_state_kcc,run_id,kpi_thres=0.3)
        
        #Rewards need to be normalized
        
        print("Previous State: ", prev_state_kcc,"New State: ",action_input)
        
        #state, reward, done, info = env.step(action)
        
        print(info)
        print("Reward Signal: ", reward)
        
        buffer.record((prev_state_cop, action[0], reward, state))
        episodic_reward += reward

        buffer.learn()
        update_target(tau)

        print("Overall Run ID: ",run_id)
        print("Episode Run ID: ",episode_run_id)
        print("Within Episode Run ID: ",within_episode_run_id)
        
        #increase Run_ID
        run_id=run_id+1
        within_episode_run_id=within_episode_run_id+1
        
        # End this episode when `done` is True
        if done:
            break

        prev_state_cop = state
        
        prev_state_kcc=action_input
        
        print("Previous KCC State: ",prev_state_kcc)
        
    #Appending Within List
    ep_reward_list.append(episodic_reward)
    avg_ep_reward_list.append(episodic_reward/within_episode_run_id)
    ep_run_length.append(within_episode_run_id+1)
                              
    print("Episodic Reward: ",episodic_reward)
    print("Average Episodic Reward: ",episodic_reward/within_episode_run_id," No of Runs in Episode: ",within_episode_run_id)
    
    #increase Episode Run_ID
    episode_run_id=episode_run_id+1
    
    # Mean of last 40 episodes
    avg_reward = np.mean(ep_reward_list[-40:])
    avg_reward_episode = np.mean(avg_ep_reward_list[-40:])
    print("Episode * {} * Avg Reward is ==> {} Episodic AVg Reward ==> {}".format(ep, avg_reward,avg_reward_episode))
    avg_reward_list.append(avg_reward)
    avg_episode_reward_list.append(avg_reward_episode)
    
    x.append(episode_run_id)
    y.append(episodic_reward)
    plt.scatter(x,y)
    plt.show()
    plt.pause(0.05) #Note this correction
    #clear_output(wait=True)
                              
# Plotting graph
# Episodes versus Avg. Rewards
plt.plot(avg_reward_list)
plt.xlabel("Episode")
plt.ylabel("Avg. Epsiodic Reward")
plt.show()

Object Shape Error Correction using Deep Reinforcement Learning ...
Run ID for inital step:  0
Initiating Matlab Engine...
Initiating CAE simulations for run ID:  0
Simulation_Completed
Cop State:  [0.19713347 0.55804178 0.46510014 0.50982585 0.42249233 0.3412608
 0.25371952 0.37814959]
Cop State:  [0.19713347 0.55804178 0.46510014 0.50982585 0.42249233 0.3412608
 0.25371952 0.37814959]
[[ 2.22311814  2.58771803 -0.0608289 ]]
Sampled Action:  tf.Tensor([0.00054947 0.00135151 0.00145899], shape=(3,), dtype=float64)
Sampled Action after noise:  [-0.02279184 -0.01149278 -0.02362485]  Noise:  [-0.02334131 -0.01284429 -0.02508384]
Action:  [array([-0.02279184, -0.01149278, -0.02362485])]
[[ 2.22311814  2.58771803 -0.0608289 ]] [array([-0.02279184, -0.01149278, -0.02362485])]
Initiating Matlab Engine...
Initiating CAE simulations for run ID:  1
Simulation_Completed
Cop State:  [0.13691857 0.53078395 0.50896165 0.52264643 0.48465126 0.41016647
 0.18583143 0.36007133]
Cop State:  [0.13691857 0

MatlabExecutionError: 
  File C:\Program Files\MATLAB\R2019b\toolbox\matlab\iofun\csvwrite.m, line 47, in csvwrite

  File C:\Users\SINHA_S\Desktop\cross_member_datagen\Demos\Fixture simulation\Multi station\locator_halo\halo_reinforcement_learning.m, line 243, in halo_reinforcement_learning
Cannot open file C:\Users\SINHA_S\Desktop\cross_member_datagen\Demos\Fixture simulation\Multi station\locator_halo\ddpg_data\cop_drl_x_3.csv.


<Figure size 432x288 with 0 Axes>