In [55]:
import random
import numpy as np
import pandas as pd
import simpy
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
import math


import logging
logging.basicConfig(filename='manufacturing_RC_env9.log', level=logging.INFO, 
                    format='%(asctime)s:%(levelname)s:%(message)s', filemode='w')


# Constants and assumptions
NUM_MACHINES = 6
BUFFER_CAPACITY = 10
MEAN_PROCESSING_TIME = 83.70
MEAN_STARTUP_TIME = 30
MTBF = 3600
MTTR = 30
MEAN_PART_ARRIVAL_TIME = 20


# Power consumption for each state (in kW)
POWER_STANDBY = 0.1
POWER_IDLE = 9.3
POWER_STARTUP = 10
POWER_BUSY = 15
POWER_FAILED = 0.1


# Power consumption rates (kW)
POWER_CONSUMPTION = {
    'busy': 15,
    'idle': 9,
    'startup': 10,
    'standby': 0,
    'failed': 0
}

print("Imports and constants are defined correctly.")

Imports and constants are defined correctly.


In [53]:
class ManufacturingEnv(gym.Env):
    def __init__(self):
        super(ManufacturingEnv, self).__init__()


      
        self.action_space = gym.spaces.Discrete(NUM_MACHINES + 1) ###############
        #self.observation_space = gym.spaces.MultiDiscrete([BUFFER_CAPACITY + 1] + [5] * NUM_MACHINES) ##########################
        #low = np.array([0] * (NUM_MACHINES + 2), dtype=np.float32)  # Minimum values
        #high = np.array([BUFFER_CAPACITY] + [4] * NUM_MACHINES + [np.inf], dtype=np.float32)  # Maximum values
        #self.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=np.zeros(8), high=np.inf * np.ones(8))



        self.env = simpy.Environment()
        self.buffer = simpy.Store(self.env, capacity=BUFFER_CAPACITY)
        self.max_parts = 0
        self.produced_parts=0
        self.Capacity=0 
        self.data=[]
        self.CC=0
        self.machines_energy=[0]
        self.energy=0
        self.AON_produced=0
        self.produced=0

        self.machines = [{
            'id': i,
            'state': 'standby',  # Start machines in standby state
            'total_energy': 0,
            'time_in_states': {'busy': 0, 'idle': 0, 'startup': 0, 'standby': 0, 'failed': 0}, 
            'cumprocessingtime': 0, 'state_energy_consumption':0,
            'failuretime': random.expovariate(1.0 / MTBF)
        } for i in range(NUM_MACHINES)]

    
    def collect_data(self, machine):
       
     state_energy = 0
     for state, time in machine['time_in_states'].items():
        time_hours=0
        if state==machine['state']:
          time_hours = time
          time_hours=time_hours
          state_energy += POWER_CONSUMPTION[state] * time_hours
          self.data.append({
         'time': self.env.now,
         'machine_id': machine['id'],
         'state': machine['state'],
         'time_in_preocess': time_hours,'state_energy':machine['state_energy_consumption'],
         'total_state_energy':state_energy})


    def process(self, machine):

        yield self.env.timeout(1)
     
        while True:

            logging.info(f'-------------------- Process just started --------------------------')
            logging.info(f'env time: {self.env.now}, machine id {machine['id']} is {machine['state']}')
            processing_time = random.expovariate(1.0 / MEAN_PROCESSING_TIME)
            #failure_time = random.expovariate(1.0 / MTBF) 

            if machine['state'] == 'idle':

                machine['time_in_states']['idle'] = 0.99 

                #if self.CC==0:  
                     
                machine['state_energy_consumption']= POWER_CONSUMPTION['idle']
                self.collect_data(machine)
                  
                yield self.env.timeout(0.99)

            elif machine['state'] == 'standby':

                machine['time_in_states']['standby'] = 0.99

                #if self.CC==0:
                 
                machine['state_energy_consumption']= POWER_CONSUMPTION['standby']
                self.collect_data(machine)

                yield self.env.timeout(0.99)               

            elif machine['state']=='startup':
                
                startup_time=random.expovariate(1.0 /MEAN_STARTUP_TIME)
                machine['time_in_states']['startup'] = startup_time
                machine['state_energy_consumption']=POWER_CONSUMPTION['startup']
                self.collect_data(machine)

                yield self.env.timeout(startup_time) 
                machine['state'] = 'busy'

#########################################################################################################################################################
            elif machine['state'] == 'busy':

                if machine['cumprocessingtime']+processing_time <= machine['failuretime']:
                    machine['cumprocessingtime']=processing_time+machine['cumprocessingtime']
                    machine['time_in_states']['busy'] = processing_time

                    machine['state_energy_consumption']=POWER_CONSUMPTION['busy'] ###################################################
                    
                    yield self.buffer.get()

                    self.produced_parts=self.max_parts- len(self.buffer.items)
                    self.produced=self.produced+1 
                    
                    self.collect_data(machine)

                    yield self.env.timeout(processing_time)
                    machine['state'] = 'idle'

                else:
                    failure_time_remaining = machine['failuretime'] - machine['cumprocessingtime']
                    machine['time_in_states']['busy'] = failure_time_remaining

                    machine['state_energy_consumption']=POWER_CONSUMPTION['busy'] ############################################

                    yield self.buffer.get() 
########################################################################################################################################################
                    
                    machine['cumprocessingtime']=0
                    self.collect_data(machine)

                    yield self.env.timeout(failure_time_remaining)
                    machine['state'] = 'failed'

                    repair_time = random.expovariate(1.0 / MTTR)
                    machine['time_in_states']['failed'] = repair_time 
                    yield self.env.timeout(repair_time)
                    self.collect_data(machine)
                    machine['state'] ='idle'
        

    def part_arrival(self):

        yield self.env.timeout(1)

        while True:
          
          if len(self.buffer.items)<BUFFER_CAPACITY:
             yield self.env.timeout(np.random.exponential(MEAN_PART_ARRIVAL_TIME))
             logging.info(f'Part just arrived, env time: {self.env.now}')
             yield self.buffer.put(f"{self.env.now}") 
             self.produced_parts=0
             self.Capacity= self.Capacity+1
             self.max_parts = len(self.buffer.items)
             self.AON_produced=self.AON_produced+1

          else:
           self.max_parts = len(self.buffer.items)
           yield self.env.timeout(1)
               



    def get_available_machines(self): ############################################################
        # Count the number of machines that are in a state where they can be turned on
        available_machines = 0
        for machine in self.machines:
            if machine['state'] in ['idle', 'standby']:  # Machines that can be turned on
                available_machines += 1
        return available_machines

    def adjust_action_space(self): ##############################################################
        # Get the number of available machines
        available_machines = self.get_available_machines()

        # Limit the action space to the number of available machines
        self.action_space = gym.spaces.Discrete(available_machines + 1)  # +1 to include the "do nothing" action
        logging.info(f'permited actions: {self.action_space}, env time: {self.env.now}')




    def step(self, action):
              
               
              
              if self.env.now>=1:

               logging.info(f'------------------- Control just activated ---------------------------')
               logging.info(f'permited action {self.action_space} and action: {action}, env time: {self.env.now}, buffer:{len(self.buffer.items)}')
            

               self.adjust_action_space() ##################################################################
               # Execute the action in the environment
               self.actions_controller(action)
        
              # Advance the simulation by a fixed time step (e.g., 1 time unit)
              self.env.run(until=self.env.now + 1)

              logging.info(f'-------------------------------------------------- Environment: {self.env.now}--------------------------------------------------------')
              
              logging.info(f'--------------------- Observations -------------------------')
              observation=self.get_state()
              current_machines_energy=observation[-1]

              self.machines_energy.append(current_machines_energy)
              current_machines_energy= self.machines_energy[-1]
              previous_machines_energy= self.machines_energy[-2]
              total_reward=self.calculate_reward(current_machines_energy, previous_machines_energy)

              logging.info(f'state info before control: {observation}, env time: {self.env.now}')
              logging.info(f'----------------------------------------------')

              done = self.env.now >= 10000  # Define a condition to terminate the episode
              truncated=False 
              

              #print('states are', observation, 'should be', self.observation_space, 'action', self.action_space)
              return observation, total_reward, done, truncated, {}
                           
 
    def actions_controller(self, action):

        active_machines = action
        self.CC=0
       
        for machine in self.machines:
            
            logging.info(f'Machine under investigation: {machine["id"]} and the state {machine["state"]}, env time: {self.env.now}, capacity: {self.Capacity}')
            
            #if machine['state'] == 'idle' and active_machines > 0 and self.Capacity>0:
            if machine['state'] == 'idle' and active_machines > 0 : 
                self.CC=1
                machine['state'] = 'busy'
                active_machines -= 1
                self.Capacity -= 1
                logging.info(f"Machine id {machine['id']} mode changed from idle to {machine['state']}, actions left: {active_machines}, env time: {self.env.now}")
            
            #elif machine['state'] == 'idle' and active_machines > 0 and self.Capacity==0: 
                #self.CC=1
                #machine['state'] = 'standby'
                #logging.info(f"Machine id {machine['id']} mode changed from idle to {machine['state']}, actions left: {active_machines}, env time: {self.env.now}")
                
            elif machine['state'] == 'idle' and active_machines == 0:
                self.CC=1
                machine['state'] = 'standby'
                logging.info(f"Machine id {machine['id']} mode changed from idle to {machine['state']}, actions left: {active_machines}, env time: {self.env.now}") 

            #elif machine['state'] == 'standby' and active_machines > 0 and self.Capacity>0:
            elif machine['state'] == 'standby' and active_machines > 0 :
                self.CC=1
                machine['state'] = 'startup'
                active_machines -= 1
                self.Capacity -= 1
                logging.info(f"Machine id {machine['id']} mode changed from standby to {machine['state']}, actions left: {active_machines}, env time: {self.env.now}")

        #return self.get_state()
        


    def get_state(self):

        parts_in_buffer = len(self.buffer.items)

        self.energy=0

        for machine in self.machines:
          
          #logging.info(f"''''''''''''''''''''''''''Energy consumption in get state function''''''''''''''''''''''''''''''''''''''', env: {self.env.now}")
          
          #logging.info(f"energy consumption of machine id {machine['id']} is {machine['total_energy']}, env time: {self.env.now}")
          self.energy += machine['total_energy'] 

        logging.info(f"total energy consumption {self.energy}, env time: {self.env.now}")

        state_mapping = {'idle': 0, 'busy': 1, 'startup': 2, 'standby': 3, 'failed': 4} ############################################
        machine_states = [state_mapping[machine['state']] for machine in self.machines]
        observation = np.array([parts_in_buffer] + machine_states + [self.energy])

        return observation
        
    
    def consumption_calculation(self):

        yield self.env.timeout(1)

        while True:
         
         if self.env.now>1:
         
          logging.info(f"---------------Energy consumption calculation just started---------------")
         
          for machine in self.machines:
            machine['total_energy']=machine['total_energy']+machine['state_energy_consumption']

            logging.info(f"energy consumption of machine id {machine['id']} is {machine['total_energy']}, env time: {self.env.now}")

         yield self.env.timeout(0.99)


    def calculate_reward(self, current_machines_energy, previous_machines_energy):

        logging.info(f"produced parts {self.produced}, max_parts {self.AON_produced}")
        productivity_reward = np.exp((self.produced) / (self.AON_produced+0.000000001))/math.e
        consumption_reward = np.exp(-0.01*(current_machines_energy -(previous_machines_energy+0.000000001 ))) 
        total_reward = 0* productivity_reward + consumption_reward
        logging.info(f"consumption reward is {consumption_reward},productivity reward is {productivity_reward}, total reward is {total_reward}, env time: {self.env.now}")

        return total_reward


    def reset(self, seed=None):

        if seed is not None: 
            np.random.seed(seed)
            random.seed(seed)

        # Reinitialize all environment variables
        self.env = simpy.Environment()
        self.buffer = simpy.Store(self.env, capacity=BUFFER_CAPACITY)
        self.machines_energy = [0]
        self.produced_parts = 0
        self.Capacity = 0
        self.data = []
        self.CC = 0
        self.AON_produced = 0
        self.produced = 0
        # Reinitialize the machines
        self.machines = [{
            'id': i,
            'state': 'idle',
            'total_energy': 0,
            'time_in_states': {'busy': 0, 'idle': 0, 'startup': 0, 'standby': 0, 'failed': 0},
            'cumprocessingtime': 0, 'state_energy_consumption': 0,
            'failuretime': random.expovariate(1.0 / MTBF)
        } for i in range(NUM_MACHINES)]
        # Restart the processes
        for machine in self.machines:
            self.env.process(self.process(machine))
        self.env.process(self.part_arrival())
        self.env.process(self.consumption_calculation())
        # Return the initial state
        initial_state= self.get_state()

        info={} 
    

        return initial_state, info

    @staticmethod
    def LogHandel():
        for handler in logging.root.handlers[:]:
            handler.close()
            logging.root.removeHandler(handler)


In [54]:
# Create the environment (wrapped in DummyVecEnv for stable_baselines3 compatibility)
MEnv = ManufacturingEnv()
ENV = DummyVecEnv([lambda: MEnv])

# Create the DQN model
dqn_model = DQN("MlpPolicy", ENV, ######################################################
                verbose=1, learning_rate=0.001, buffer_size=10000, learning_starts=1000, 
                batch_size=32, gamma=0.99, target_update_interval=500, exploration_fraction=0.1, 
                exploration_final_eps=0.01, train_freq=4, gradient_steps=1)

# Train the model
dqn_model.learn(total_timesteps=10000)

# Save the model
dqn_model.save("dqn_manufacturing_env")

# Load the trained model (optional, for testing later)
dqn_model = DQN.load("dqn_manufacturing_env")

# Test the trained model
obs = MEnv.reset()

action = random.randint(0, NUM_MACHINES)

done = False
while not done:

    obs, reward, done, truncated, x= MEnv.step(action)
    logging.info(f"Observations {obs}, reward {reward}, env {MEnv.env.now}")
    #print(f"Action: {action}, Reward: {reward}, Done: {done}, env {MEnv.env.now}", )

    action, _states = dqn_model.predict(obs, deterministic=True)  # Use the trained model to predict actions
    logging.info(f"Predicted actions {action}, env {MEnv.env.now}") 
    obs, reward, done, truncated, info = ENV.step(action)

# Data collection
Data = pd.DataFrame(MEnv.data)
Data.to_excel("Data.xlsx", sheet_name="Data", index=False)

# Log handler
MEnv.LogHandel()

Using cpu device
