# Time-resolved fitting with RL

Run 189245: 0.6V to 0.1V

In [10]:
import sys
import os
import time
import numpy as np
np.random.seed(42)

import json

import matplotlib as mpl
from matplotlib import pyplot as plt
import matplotlib.lines as mlines
#%matplotlib notebook
#%matplotlib inline
%matplotlib notebook

import warnings
warnings.filterwarnings('ignore', module='numpy')
warnings.filterwarnings('ignore')

import importlib
git_dir = os.path.join(os.path.expanduser('~'), 'git', 'time-resolved-nr')
src_dir = os.path.join(os.path.expanduser('~'), 'git', 'time-resolved-nr', 'src')
sys.path.append(src_dir)


In [43]:
import rl_model
importlib.reload(rl_model)

initial_state_expt_file = os.path.join(git_dir, 'data', '189237-expt.json')
final_state_expt_file = os.path.join(git_dir, 'data', '189246-expt.json')

data_file = os.path.join(git_dir, 'data', 'r189245-time-resolved.json')
#ec_cp1 = np.loadtxt(os.path.join(project_dir, 'ec-data', 'Expt6_Summary_CP1.txt'), delimiter=',', skiprows=1).T


with open(data_file) as fd:
    m = json.load(fd)
    timestamps = np.asarray(m['times'])
    _data = m['data']
    print("Number of times: %s" % len(_data))
    

REVERSE = False
  
# create an instance of our custom environment
env = rl_model.SLDEnv(initial_state_file=initial_state_expt_file,
                      final_state_file=None, #final_state_expt_file,
                      data=_data, reverse=REVERSE)

# use the Gymnasium 'check_env' function to check the environment
# - returns nothing if the environment is verified as ok
from gymnasium.utils.env_checker import check_env
check_env(env)

# initialize the environment
env.reset()
env.render()


Number of times: 25
<class 'numpy.ndarray'>
0


In [44]:
terminated = False

#while not terminated: 
for i in range(1):
    #time.sleep(1)

    # choose a random action
    if False:
        action = env.action_space.sample() 
    else:
        action = env.normalized_parameters

    # take the action and get the information from the environment
    new_state, reward, terminated, truncated, info = env.step(action)
    print("reward: %s" % reward)
    # show the current position and reward
    env.render(action=action, reward=reward) 
    env.plot(errors=True)

reward: -3.326158959435721
[-0.27628032  0.30918931  0.46417551]


<IPython.core.display.Javascript object>

In [45]:
%%time

from stable_baselines3 import PPO, SAC
from stable_baselines3.common.callbacks import CheckpointCallback

checkpoint_callback = CheckpointCallback(
    save_freq=1000,
    #save_path="./logs-forward-no-final-state/",
    save_path="./logs-test/",
    name_prefix="rl_model-forward",
    save_replay_buffer=False,
    save_vecnormalize=True,
)

model = SAC('MlpPolicy', env, use_sde=False, verbose=0)

if False:
    model.learn(2000)
    #model.learn(200000, callback=checkpoint_callback)
    #model.save('UIUC-06-to-01-SAC-forward-no-final-state')
else:
    model = SAC.load('UIUC-06-to-01-SAC-forward-no-final-state')

CPU times: user 25.3 ms, sys: 14.6 ms, total: 39.9 ms
Wall time: 35.6 ms


# Compute uncertainties

In [46]:
deltas = env.high_array - env.low_array

samples = []

for n in range(100):
    actions = []
    obs, info = env.reset()
    for i in range(len(env.data)):
        action, _ = model.predict(obs, deterministic=False)
        obs, reward, terminated, truncated, info = env.step(action)
        actions.append(action)
    
    actions = np.asarray(actions)
    values = env.low_array + (1+actions[:,:len(env.parameters)]) * deltas / 2.0
    samples.append(values)


samples = np.asarray(samples).T
print(samples.shape)

errs = np.std(samples, axis=2)
print(errs.shape)

(3, 25, 100)
(3, 25)


In [47]:
# Evaluate the agent

episode_reward = 0

n_times = len(env.data)
print("Number of times: %s" % n_times)

obs, info = env.reset()

actions = []
chi2 = []
for i in range(n_times):
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action)
    chi2.append(env.chi2)
    new_obs, reward, terminated, truncated, info = env.step(action)  
    episode_reward += reward
    print("Time %s %s: %g [%s] %g" % (timestamps[i], obs, reward, terminated, episode_reward))
    obs = new_obs


actions = np.asarray(actions)

Number of times: 25
Time 0 [0.]: -3.19848 [False] -3.19848
Time 60 [0.04166667]: -3.25606 [False] -6.45454
Time 120 [0.08333334]: -3.05237 [False] -9.5069
Time 180 [0.125]: -2.58836 [False] -12.0953
Time 240 [0.16666667]: -2.174 [False] -14.2693
Time 300 [0.20833333]: -3.20168 [False] -17.4709
Time 360 [0.25]: -3.10147 [False] -20.5724
Time 420 [0.29166666]: -1.9259 [False] -22.4983
Time 480 [0.33333334]: -2.11349 [False] -24.6118
Time 540 [0.375]: -2.15923 [False] -26.771
Time 600 [0.41666666]: -1.86792 [False] -28.6389
Time 660 [0.45833334]: -2.22696 [False] -30.8659
Time 720 [0.5]: -2.5659 [False] -33.4318
Time 780 [0.5416667]: -2.74258 [False] -36.1744
Time 840 [0.5833333]: -2.28227 [False] -38.4567
Time 900 [0.625]: -2.73018 [False] -41.1868
Time 960 [0.6666667]: -2.75502 [False] -43.9419
Time 1020 [0.7083333]: -2.61435 [False] -46.5562
Time 1080 [0.75]: -3.58081 [False] -50.137
Time 1140 [0.7916667]: -2.82667 [False] -52.9637
Time 1200 [0.8333333]: -2.73044 [False] -55.6941
Time 

In [50]:
deltas = env.high_array - env.low_array
values = env.low_array + (1+actions[:,:len(env.parameters)]) * deltas / 2.0

pars = values.T
errors = errs.T

# Bayesian fit trend
with open(os.path.join(git_dir, 'data', '189245-bayes-forward.json')) as fd:
    bayes_fits = json.load(fd)

if REVERSE:
    t_initial = timestamps[-1]+50
    t_final = -50
    _times = np.flip(timestamps)
else:
    t_final = timestamps[-1]+50
    t_initial = -50
    _times = timestamps

n_times = len(env.data)

fig, axs = plt.subplots(4,1, dpi=100, figsize=(9,10), sharex=True)
plt.subplots_adjust(left=0.15, right=.95, top=0.98, bottom=0.1)
        
print(env.par_labels)
for i in range(pars.shape[0]):
    ax = plt.subplot(4, 1, i+1)
    
    plt.errorbar(_times, pars[i], yerr=errs[i], label=env.par_labels[i])

    label = env.par_labels[i].lower().replace(" ", "_")
    if label in bayes_fits:
        values = bayes_fits[label]
        d_values = bayes_fits[label+'_err']
        plt.errorbar(_times, values, yerr=d_values, label='Bayesian fit')
    
    plt.plot([t_initial,], [env.parameters[i]], linestyle='', marker='*', markersize=10)
    
    plt.ylabel(env.par_labels[i])
    plt.legend()

i +=1
ax = plt.subplot(4, 1, i+1)
plt.plot(_times, chi2, label = 'RL')
plt.plot(_times, bayes_fits['chi2'], label = 'Bayesian fit')
plt.legend()
plt.ylabel("$\chi^2$")
plt.xlabel("time [seconds]")

<IPython.core.display.Javascript object>

['solvent interface', 'PVF thickness', 'PVF rho']


Text(0.5, 0, 'time [seconds]')

In [51]:
# Evaluate the agent
episode_reward = 0

n_times = len(env.data)
print("Number of times: %s" % n_times)

obs, info = env.reset()

fig = plt.subplots(dpi=100, figsize=(6, 15))
for i in range(1, n_times, 1):
    action, _ = model.predict(obs, deterministic=True)
    
    obs, reward, terminated, truncated, info = env.step(action)

    env.plot(scale=10.**i, newfig=False, errors=True, label=_times[i])


Number of times: 25


<IPython.core.display.Javascript object>

# Training progression

In [76]:
log_dir = "./logs-forward-no-final-state/"
_dir_list = os.listdir(log_dir)

def evaluate(model):
    episode_reward = 0
    n_times = len(env.data)
    obs, info = env.reset()

    chi2 = 0
    for i in range(n_times):
        action, _ = model.predict(obs, deterministic=True)
        chi2 += env.chi2
        obs, reward, terminated, truncated, info = env.step(action)  
        episode_reward += reward
    return episode_reward

itr_list = []
reward_list = []

for file in _dir_list:
    model = SAC.load(os.path.join(log_dir, file))
    reward = evaluate(model)
    iteration = int(file.replace("rl_model-forward_", "").replace("_steps.zip", ""))
    itr_list.append(iteration)
    reward_list.append(reward)

itr_list = np.asarray(itr_list)
reward_list = np.asarray(reward_list)
idx = itr_list.argsort()

fig = plt.figure(dpi=100)
plt.plot(itr_list[idx], reward_list[idx])
plt.xlabel('iteration')
plt.ylabel('$\sum \chi^2_i$')
plt.show()
        

<IPython.core.display.Javascript object>