In [48]:
### DATA-DRIVEN PROGNOSTICS I ###
### GENERATE DATA FOR DATA-DRIVEN MODEL ###

import random
from prog_models.models import BatteryCircuit
import pandas as pd
import warnings

# Ignore warnings when machine exceeds its end of life
warnings.filterwarnings("ignore")

""" Method that uses a physical machine model from the prog_models package and a current (health) state of the model and
an action (i.e., intensity), which is performed for 100 time steps
    Parameter:
        machine             machine model from the prog_models package
        state               current (health) state of the model
        action              loading of the machine for the next 100 time steps
    Return:
        health                               
    """
def produce_model(machine, states, action):
        
        # Define load of battery
        def future_loading(t, x=None):
            return {'i': action}

        # Set current state of machine
        machine.parameters['x0'] = states
        # Simulate 100 steps
        options = {
            'save_freq': 100,  # Frequency at which results are saved
            'dt': 2  # Timestep
        }
        (_, _, states, outputs, event_states) = machine.simulate_to(100, future_loading, **options)
        health = event_states[-1]['EOD']
        return(round(health, 2), states[-1], outputs[-1]['t'], outputs[-1]['v'])
def reset_states(machine):
    # Returns initial states of machine, e.g., {'tb': 18.95, 'qb': 7856.3254, 'qcp': 0, 'qcs': 0} for Battery
    return(machine.default_parameters['x0'])

battery = BatteryCircuit()
states = reset_states(battery)
reset_counter = 0
dataset = []
for i in range(int(1e4)):
    # If asset failed last period, reset all historical values
    if reset_counter == 0: t = v = t_1 = v_1 = t_2 = v_2 = t_3 = v_3 = 0 
    # Shift history by one time period
    v_3 = v_2
    t_3 = t_2
    v_2 = v_1
    t_2 = t_1
    v_1 = v
    t_1 = t

    # Increment reset_counter
    reset_counter = reset_counter + 1
    # Compute new health, states, t, and v using last battery state and a random new action
    health, states, t, v = produce_model(machine=battery, states=states, action=random.sample((0, 1, 2, 3, 4), 1)[0])
    
    if health <= 0: 
        # Reset battery states to initialize battery for next produce_model call
        states = reset_states(battery)
        # Initialize reset_counter
        reset_counter = 0
        # Sometimes produce_model returns weird or negative values as the end of life is exceeded
        # Here, we just simply set it to zero to not confuse a later learner 
        health = 0

    # append to two-dimensional list
    dataset.append([t, v, t_1, v_1, t_2, v_2, t_3, v_3, health])

    # print progress every 10,000 iterations
    if (i+1) % 10000 == 0: print("Iteration", i+1)
# Transform two-dim list to dataframe
dataset = pd.DataFrame(dataset, columns=['t', 'v', 't_1', 'v_1', 't_2', 'v_2', 't_3', 'v_3', 'health'])
# Save it as pickle
dataset.to_pickle('diagnostics/data')

Iteration 10000


In [1]:
### DATA-DRIVEN PROGNOSTICS II ###
### FIT AND TEST MODEL ###
from sklearn import tree, linear_model, kernel_ridge, svm, neighbors, gaussian_process, ensemble, neural_network
import pandas as pd
from sklearn.model_selection import cross_val_score
import pickle

dataset = pd.read_pickle('diagnostics/data')
X = dataset[['t', 'v', 't_1', 'v_1', 't_2', 'v_2', 't_3', 'v_3']]
y = dataset['health']
#learner = [linear_model.LinearRegression(), linear_model.Ridge(), linear_model.Lasso(), linear_model.BayesianRidge(), tree.DecisionTreeRegressor(), # Fast
#        kernel_ridge.KernelRidge(), svm.SVR(), neighbors.KNeighborsRegressor(), gaussian_process.GaussianProcessRegressor(), # Slow
#        ensemble.RandomForestRegressor(), neural_network.MLPRegressor()] # Slow
learner = [ensemble.RandomForestRegressor()]
for i in learner:
    reg = i
    print(i, ":", cross_val_score(reg, X, y, cv=5)) # default scoring R2
model = learner[0].fit(X, y)
pickle.dump(model, open('diagnostics/model', 'wb'))

RandomForestRegressor() : [0.92967429 0.94783845 0.94303161 0.94675743 0.93744603]


In [7]:
### REINFORCEMENT LEARNING I ###
### TRAIN, SAVE, EVALUATE MODEL ###

import gym
import stable_baselines3 as sb
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.evaluation import evaluate_policy
import pickle

# Load diagnostics model from disk
diag_model = pickle.load(open('diagnostics/model', 'rb'))
# Initiate environment
env = gym.make('Production-v0', diag_model = diag_model)
# Callback for best model
best_callback = EvalCallback(env, best_model_save_path='./callback/',
                             log_path='./callback/', eval_freq=1000,
                             deterministic=True, render=False)

model = sb.DQN('MlpPolicy', env, tensorboard_log="./tensorboard/", gamma = 0.99, learning_rate=0.01)
model.learn(total_timesteps=2e6, tb_log_name="DQN_DIAG_model", callback = best_callback)
model.save("DQN_DIAG_model")

# Evaluate the agent
evaluate_policy(model, model.get_env(), n_eval_episodes=10)

Eval num_timesteps=1000, episode_reward=-2003.80 +/- 252.23
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=2000, episode_reward=-1990.40 +/- 225.91
Episode length: 100.00 +/- 0.00
New best mean reward!


(-1915.1, 208.74695207355722)

In [8]:
### REINFORCEMENT LEARNING II ###
### LOAD MODEL ###
import gym
import stable_baselines3 as sb
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy

env = gym.make('Production-v0')
# Best Model
model = DQN.load('./callback/best_model', env = env)
# Last Model
#model = DQN.load('DQN_1_model', env = env)

# Evaluate the agent
evaluate_policy(model, model.get_env(), n_eval_episodes=10)

(483.5, 19.50512753098528)

In [9]:
### REINFORCEMENT LEARNING III ###
### TRY MODEL ###
import pandas as pd

store = []
obs = env.reset()
done = False
store.append([0, obs[0], obs[2], obs[3], 0, done, obs[1]])
while not done:
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    store.append([action, obs[0], obs[2], obs[3], reward, done, obs[1]])

store_df = pd.DataFrame(store, columns=['action', 'health', 'inventory', 'sp_inventory', 'reward', 'done', 'next_order'])
print("Cumulative reward is: ", sum(store_df['reward']))

Cumulative reward is:  498.0
