In [4]:
### REINFORCEMENT LEARNING I ###
### TRAIN, SAVE, EVALUATE MODEL ###

import gym
import stable_baselines3 as sb
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.evaluation import evaluate_policy
import pickle

total_timesteps = 2e6

# Load diagnostics/prognostics model from disk and initiate environment

print('Diagnostics-based RL')
diag_model = pickle.load(open('diagnostics/model_pn1_mn0', 'rb'))
env = gym.make('Production-v0', diag_model = diag_model, spare_part = False, process_noise = 0.1)
# Callback for best model
best_callback = EvalCallback(env, best_model_save_path='./callback/IFAC',
                        log_path='./callback/IFAC', eval_freq=1000,
                        deterministic=True, render=False)
model = sb.PPO('MlpPolicy', env, tensorboard_log="./tensorboard/IFAC/")
model.learn(total_timesteps=total_timesteps, tb_log_name='PPO', callback = best_callback)

Diagnostics-based RL
Eval num_timesteps=1000, episode_reward=-2951.34 +/- 26.97
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=2000, episode_reward=-2957.67 +/- 20.31
Episode length: 100.00 +/- 0.00
Eval num_timesteps=3000, episode_reward=-875.91 +/- 58.36
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=4000, episode_reward=-905.45 +/- 40.62
Episode length: 100.00 +/- 0.00
Eval num_timesteps=5000, episode_reward=-1141.70 +/- 20.07
Episode length: 100.00 +/- 0.00
Eval num_timesteps=6000, episode_reward=-1125.10 +/- 34.72
Episode length: 100.00 +/- 0.00
Eval num_timesteps=7000, episode_reward=-872.19 +/- 34.93
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=8000, episode_reward=-856.76 +/- 26.17
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=9000, episode_reward=-996.51 +/- 16.59
Episode length: 100.00 +/- 0.00
Eval num_timesteps=10000, episode_reward=-953.97 +/- 22.23
Episode length: 10

<stable_baselines3.ppo.ppo.PPO at 0x20a0dfe9d60>

In [5]:
### REINFORCEMENT LEARNING II ###
### LOAD MODEL ###
import gym
import stable_baselines3 as sb
from stable_baselines3 import DQN
from stable_baselines3 import A2C
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
import pickle

diag_model = pickle.load(open('diagnostics/model_pn1_mn0', 'rb'))
env = gym.make('Production-v0', diag_model = diag_model, spare_part = False, process_noise = 0.1)
model = PPO.load('./callback/IFAC/best_model', env = env)

# Evaluate the agent
print(evaluate_policy(model, model.get_env(), n_eval_episodes=10))

(215.10500000000002, 148.42888453734332)


In [35]:
### REINFORCEMENT LEARNING III ###
### TRY AND EVALUATE MY MODEL ###
import pandas as pd
from stable_baselines3 import PPO
import gym
import pickle



diag_model = pickle.load(open('diagnostics/model_pn1_mn0', 'rb'))
env = gym.make('Production-v0', diag_model = diag_model, spare_part = False, process_noise = 0.1)
model = PPO.load('./callback/IFAC/best_model', env = env)
# Initilaize Reward
result_df = pd.DataFrame([[0, 0, 0, 0, 0]], columns=['RM', 'PM', 'Inventory', 'Reward', 'Upper'])
# Set iterations
iterations = 1
for i in range(iterations):
    # Initialize episode
    store = []
    obs = env.reset()
    done = False
    store.append([0, obs[0], env.breakdown, obs[2], 0, done, obs[1]])
    # Compute one episode
    while not done:
        # Get best action for state
        action, _state = model.predict(obs, deterministic=True)
        # Compute next state
        obs, reward, done, info = env.step(action)
        # Store results of this episode
        store.append([action, obs[0], env.breakdown, obs[2], reward, done, obs[1]])
    eps_df = pd.DataFrame(store, columns=['action', 'health_rul', 'breakdown', 'inventory', 'reward', 'done', 'next_order'])
    # Calculate nr. of reactive maintenance interventions by counting health 'resets' and substracting PM actions
    result_df.iloc[0]['RM'] = result_df.iloc[0]['RM'] + sum(eps_df['breakdown']==True)
    # Calculate nr. of preventive maintenance interventions
    result_df.iloc[0]['PM'] = result_df.iloc[0]['PM'] + sum(eps_df['action']==5)
    # Calculate inventory
    result_df.iloc[0]['Inventory'] = result_df.iloc[0]['Inventory'] + sum(eps_df['inventory'])
    # Calculate reward
    result_df.iloc[0]['Reward'] = result_df.iloc[0]['Reward'] + sum(eps_df['reward'])
    # Calculate reward with no costs and fulfillment of all orders
    result_df.iloc[0]['Upper'] = result_df.iloc[0]['Upper'] + sum(eps_df.iloc[:-1]['next_order']) * env.order_r

print("The average number of reactive maintenance interventions per episode is: ", result_df.iloc[0]['RM']/iterations)
print("The average number of preventive maintenance interventions per episode is: ", result_df.iloc[0]['PM']/iterations) 
print("The average sum of inventory per episode is: ", result_df.iloc[0]['Inventory']/iterations)
print("The average reward per episode is: ", result_df.iloc[0]['Reward']/iterations)
print("The average upper bound per episode is: ", result_df.iloc[0]['Upper']/iterations)


The average number of reactive maintenance interventions per episode is:  0.0
The average number of preventive maintenance interventions per episode is:  3.0
The average sum of inventory per episode is:  79.0
The average reward per episode is:  271.0
The average upper bound per episode is:  406.0


In [36]:
eps_df.to_excel("visuals/IFAC/episode1.xlsx")

In [22]:
### REINFORCEMENT LEARNING IIIa ###
### EVALUATE REACTIVE MODEL ###

import pandas as pd
import numpy as np
import gym

env = gym.make('Production-v0', spare_part = False)
# Set iterations
iterations = 100

# Initilaize Reward
result_df = pd.DataFrame([[0, 0, 0, 0, 0]], columns=['RM', 'PM', 'Inventory', 'Reward', 'Upper'])

for i in range(iterations):
    # Initialize episode
    store = []
    obs = env.reset()
    done = False
    store.append([0, obs[0], env.breakdown, obs[2], 0, done, obs[1]])
    # Compute one episode
    while not done:   
        action = round(obs[1])
        # Compute next state
        obs, reward, done, info = env.step(action)
        # Store results of this episode
        store.append([action, obs[0], env.breakdown, obs[2], reward, done, obs[1]])
    eps_df = pd.DataFrame(store, columns=['action', 'health', 'breakdown', 'inventory', 'reward', 'done', 'next_order'])
    # Calculate nr. of reactive maintenance interventions by counting health 'resets' and substracting PM actions
    result_df.iloc[0]['RM'] = result_df.iloc[0]['RM'] + sum(eps_df['breakdown']==True)
    # Calculate nr. of preventive maintenance interventions
    result_df.iloc[0]['PM'] = result_df.iloc[0]['PM'] + sum(eps_df['action']==5)
    # Calculate mean time between failures
    # Cut df after last breakdown
    eps_df_trim = eps_df.iloc[:(np.where(eps_df['breakdown'].eq(True), eps_df.index, 0).max()+1)]
    # Calculate inventory
    result_df.iloc[0]['Inventory'] = result_df.iloc[0]['Inventory'] + sum(eps_df['inventory'])
    # Calculate reward
    result_df.iloc[0]['Reward'] = result_df.iloc[0]['Reward'] + sum(eps_df['reward'])
    # Calculate reward with no costs and fulfillment of all orders
    result_df.iloc[0]['Upper'] = result_df.iloc[0]['Upper'] + sum(eps_df.iloc[:-1]['next_order']) * env.order_r

print("The average number of reactive maintenance interventions per episode is: ", result_df.iloc[0]['RM']/iterations)
print("The average number of preventive maintenance interventions per episode is: ", result_df.iloc[0]['PM']/iterations)
print("The average sum of inventory per episode is: ", result_df.iloc[0]['Inventory']/iterations)
print("The average reward per episode is: ", result_df.iloc[0]['Reward']/iterations)
print("The average upper bound per episode is: ", result_df.iloc[0]['Upper']/iterations)

The average number of reactive maintenance interventions per episode is:  2.0
The average number of preventive maintenance interventions per episode is:  0.0
The average sum of inventory per episode is:  73.75
The average reward per episode is:  -768.58
The average upper bound per episode is:  400.42


In [27]:
### REINFORCEMENT LEARNING IIIb ###
### EVALUATE TIME-BASED PREVENTIVE MODEL ###

import pandas as pd
import numpy as np
import gym

env = gym.make('Production-v0', spare_part = False)
#kay = range(0,7)
kay = range (31,32)
# Set iterations
iterations = 100

for k in kay:
    mtbf = k
    # Initilaize Reward
    result_df = pd.DataFrame([[0, 0, 0, 0, 0]], columns=['RM', 'PM', 'Inventory', 'Reward', 'Upper'])

    for i in range(iterations):
        # Initialize episode
        store = []
        obs = env.reset()
        done = False
        store.append([0, obs[0], env.breakdown, obs[2], 0, done, obs[1]])
        # Compute one episode
        while not done:
            # At period of mtbf: maintain
            if env.scheduled_maintenance_counter == mtbf:
                action = 5
            # Else: action = order    
            else:             
                action = round(obs[1])
            # Compute next state
            obs, reward, done, info = env.step(action)
            # Store results of this episode
            store.append([action, obs[0], env.breakdown, obs[2], reward, done, obs[1]])
        eps_df = pd.DataFrame(store, columns=['action', 'health', 'breakdown', 'inventory', 'reward', 'done', 'next_order'])
        # Calculate nr. of reactive maintenance interventions by counting health 'resets' and substracting PM actions
        result_df.iloc[0]['RM'] = result_df.iloc[0]['RM'] + sum(eps_df['breakdown']==True)
        # Calculate nr. of preventive maintenance interventions
        result_df.iloc[0]['PM'] = result_df.iloc[0]['PM'] + sum(eps_df['action']==5)
        # Calculate mean time between failures
        # Cut df after last breakdown
        eps_df_trim = eps_df.iloc[:(np.where(eps_df['breakdown'].eq(True), eps_df.index, 0).max()+1)]
        # Calculate inventory
        result_df.iloc[0]['Inventory'] = result_df.iloc[0]['Inventory'] + sum(eps_df['inventory'])
        # Calculate reward
        result_df.iloc[0]['Reward'] = result_df.iloc[0]['Reward'] + sum(eps_df['reward'])
        # Calculate reward with no costs and fulfillment of all orders
        result_df.iloc[0]['Upper'] = result_df.iloc[0]['Upper'] + sum(eps_df.iloc[:-1]['next_order']) * env.order_r

    print("\n", "Maintenance Interval: ", mtbf)
    print("The average number of reactive maintenance interventions per episode is: ", result_df.iloc[0]['RM']/iterations)
    print("The average number of preventive maintenance interventions per episode is: ", result_df.iloc[0]['PM']/iterations)
    print("The average sum of inventory per episode is: ", result_df.iloc[0]['Inventory']/iterations)
    print("The average reward per episode is: ", result_df.iloc[0]['Reward']/iterations)
    print("The average upper bound per episode is: ", result_df.iloc[0]['Upper']/iterations)


 Maintenance Interval:  31
The average number of reactive maintenance interventions per episode is:  0.02
The average number of preventive maintenance interventions per episode is:  2.98
The average sum of inventory per episode is:  73.49
The average reward per episode is:  161.86
The average upper bound per episode is:  402.65
