In [2]:
### DATA-DRIVEN PROGNOSTICS I ###
### GENERATE DATA FOR DATA-DRIVEN MODEL ###

import random
from prog_models.models import BatteryCircuit
import pandas as pd
import warnings

warnings.filterwarnings("ignore")

def produce_model(machine, states, action): # TODO: Allow seed
        
        # Define load of battery
        def future_loading(t, x=None):
            return {'i': action}

        # Set current state of machine
        machine.parameters['x0'] = states
        # Simulate 100 steps
        options = {
            'save_freq': 100,  # Frequency at which results are saved
            'dt': 2  # Timestep
        }
        (_, _, states, outputs, event_states) = machine.simulate_to(100, future_loading, **options)
        rul = event_states[-1]['EOD']
        return(round(rul, 2), states[-1], outputs[-1]['t'], outputs[-1]['v'])
def reset_states(machine):
    # Returns initial states of machine, e.g., {'tb': 18.95, 'qb': 7856.3254, 'qcp': 0, 'qcs': 0} for Battery
    return(machine.default_parameters['x0'])

battery = BatteryCircuit()
states = reset_states(battery)
dataset = []
for i in range(int(1e3)):
    rul, states, t, v = produce_model(machine=battery, states=states, action=random.sample((0, 1, 2, 3, 4), 1)[0])
    if rul <= 0: 
        states = reset_states(battery)
        rul = 0
    dataset.append([t, v, rul])
    if (i+1) % 10000 == 0: print("Iteration", i+1)
dataset = pd.DataFrame(dataset, columns=['t', 'v', 'rul'])
dataset.to_pickle('condition_dataset')

In [7]:
### DATA-DRIVEN PROGNOSTICS II ###
### FIT AND TEST MODEL ###
from sklearn import svm
from sklearn.model_selection import cross_val_score

#X_train, X_test, y_train, y_test = train_test_split(dataset[['t', 'v']], dataset['rul'], test_size=0.33, random_state=42)
X = dataset[['t', 'v']]
y = dataset['rul']
clf = svm.SVR(verbose=1)
#cross_val_score(clf, X, y, cv=5, verbose=True) # default scoring R2
clf = clf.fit(X, y)

[LibSVM]

In [3]:
### REINFORCEMENT LEARNING I ###
### TRAIN, SAVE, EVALUATE MODEL ###

import gym
import stable_baselines3 as sb
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.evaluation import evaluate_policy

env = gym.make('Production-v0', prog_model = clf)
# Callback for best model
best_callback = EvalCallback(env, best_model_save_path='./callback/',
                             log_path='./callback/', eval_freq=1000,
                             deterministic=True, render=False)

model = sb.DQN('MlpPolicy', env, tensorboard_log="./tensorboard/", gamma = 0.99, learning_rate=0.01)
model.learn(total_timesteps=5e5, tb_log_name="DQN_C", callback = best_callback)
model.save("DQN_1_model")

# Evaluate the agent
evaluate_policy(model, model.get_env(), n_eval_episodes=10)

Eval num_timesteps=1000, episode_reward=-3166.13 +/- 598.67
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=2000, episode_reward=-3138.55 +/- 133.58
Episode length: 100.00 +/- 0.00
New best mean reward!


(-2965.0363563, 170.4154840047198)

In [4]:
### REINFORCEMENT LEARNING II ###
### LOAD MODEL ###
import gym
import stable_baselines3 as sb
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy

env = gym.make('Production-v0')
# Best Model
model = DQN.load('./callback/best_model', env = env)
# Last Model
#model = DQN.load('DQN_1_model', env = env)

# Evaluate the agent
evaluate_policy(model, model.get_env(), n_eval_episodes=10)

(492.0, 20.605824419323774)

In [4]:
### REINFORCEMENT LEARNING III ###
### TRY MODEL ###
import pandas as pd

store = []
obs = env.reset()
done = False
store.append([0, obs[0], obs[2], obs[3], 0, done, obs[1]])
while not done:
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    store.append([action, obs[0], obs[2], obs[3], reward, done, obs[1]])

store_df = pd.DataFrame(store, columns=['action', 'rul', 'inventory', 'sp_inventory', 'reward', 'done', 'next_order'])
print("Cumulative reward is: ", sum(store_df['reward']))

Cumulative reward is:  -2619.452949977145
