From 1b86b141b1033b1e3cf74e49084158012e7c0c9d Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Fri, 9 Aug 2024 00:00:27 +0200 Subject: [PATCH 01/15] state2state_rl implementation --- src/qutip_qoc/_rl.py | 144 ++++++++++++++++++++++++++++------- src/qutip_qoc/pulse_optim.py | 2 +- tests/test_result.py | 34 +++++---- 3 files changed, 137 insertions(+), 43 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index bb12900..94989c0 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -3,6 +3,7 @@ """ import qutip as qt from qutip import Qobj, QobjEvo +from qutip_qoc import Result import numpy as np @@ -11,6 +12,7 @@ from stable_baselines3 import PPO from stable_baselines3.common.env_checker import check_env +import time class _RL(gym.Env): # TODO: this should be similar to your GymQubitEnv(gym.Env) implementation """ @@ -19,50 +21,83 @@ class _RL(gym.Env): # TODO: this should be similar to your GymQubitEnv(gym.Env) def __init__( self, - objective, + objectives, + control_parameters, time_interval, time_options, - control_parameters, alg_kwargs, - guess_params, - **integrator_kwargs, + optimizer_kwargs, + minimizer_kwargs, + integrator_kwargs, + qtrl_optimizers, ): - super().__init__() # TODO: super init your gym environment here + super(_RL,self).__init__() # TODO:(ok) super init your gym environment here # ------------------------------- copied from _GOAT class ------------------------------- # TODO: you dont have to use (or keep them) if you don't need the following attributes # this is just an inspiration how to extract information from the input - - self._Hd = objective.H[0] - self._Hc_lst = objective.H[1:] + self._Hd_lst, self._Hc_lst = [], [] + if not isinstance(objectives, list): + objectives = [objectives] + for objective in objectives: + # extract drift and control Hamiltonians from the objective + self._Hd_lst.append(objective.H[0]) + self._Hc_lst.append([H[0] if isinstance(H, list) else H for H in objective.H[1:]]) self._control_parameters = control_parameters - self._guess_params = guess_params - self._H = self._prepare_generator() + # extract bounds for _control_parameters + bounds = [] + for key in control_parameters.keys(): + bounds.append(control_parameters[key].get("bounds")) + self.lbound = [b[0][0] for b in bounds] + self.ubound = [b[0][1] for b in bounds] + + #self._H = self._prepare_generator() self._initial = objective.initial self._target = objective.target + self.state = None + + self._result = Result( + objectives = objectives, + time_interval = time_interval, + start_local_time = time.localtime(), # initial optimization time + n_iters = 0, # Number of iterations(episodes) until convergence + iter_seconds = [], # list containing the time taken for each iteration(episode) of the optimization + var_time = False, # Whether the optimization was performed with variable time + ) + + #for the reward + self._step_penalty = 1 - self._evo_time = time_interval.evo_time + # To check if it exceeds the maximum number of steps in an episode + self.current_step = 0 + + #self._evo_time = time_interval.evo_time + self._fid_err_targ = alg_kwargs["fid_err_targ"] # inferred attributes self._norm_fac = 1 / self._target.norm() + # control function + self.pulse = self._pulse + + # contains the action of the last completed or truncated episode + self.action = None + # integrator options self._integrator_kwargs = integrator_kwargs - self._rtol = self._integrator_kwargs.get("rtol", 1e-5) self._atol = self._integrator_kwargs.get("atol", 1e-5) # choose solver and fidelity type according to problem - if self._Hd.issuper: + if self._Hd_lst[0].issuper: self._fid_type = alg_kwargs.get("fid_type", "TRACEDIFF") - self._solver = qt.MESolver(H=self._H, options=self._integrator_kwargs) - + #self._solver = qt.MESolver(H=self._H, options=self._integrator_kwargs) else: self._fid_type = alg_kwargs.get("fid_type", "PSU") - self._solver = qt.SESolver(H=self._H, options=self._integrator_kwargs) + #self._solver = qt.SESolver(H=self._H, options=self._integrator_kwargs) self.infidelity = self._infid # TODO: should be used to calculate the reward @@ -70,18 +105,25 @@ def __init__( # TODO: set up your gym environment as you did correctly in post10 self.max_episode_time = time_interval.evo_time # maximum time for an episode self.max_steps = time_interval.n_tslots # maximum number of steps in an episode - self.step_duration = time_interval.tslots[-1] / time_interval.n_tslots # step duration for mesvole() - ... - + self.step_duration = time_interval.tslots[-1] / time_interval.n_tslots # step duration for mesvole + self.max_episodes = alg_kwargs["max_iter"] # maximum number of episodes for training + self.total_timesteps = self.max_episodes * self.max_steps # for learn() of gym + # Define action and observation spaces (Gym) + self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32) # Continuous action space from -1 to +1, as suggested from gym + self.observation_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32) # Observation space, |v> have 2 real and 2 imaginary numbers -> 4 # ---------------------------------------------------------------------------------------- - def _infid(self, params): + def _pulse(self, t, p): + return p + + def _infid(self, params=None): """ Calculate infidelity to be minimized """ + ''' X = self._solver.run( - self._initial, [0.0, self._evo_time], args={"p": params} + self.state, [0.0, self.step_duration], args={"p": params} ).final_state if self._fid_type == "TRACEDIFF": @@ -96,17 +138,65 @@ def _infid(self, params): infid = 1 - np.abs(g) elif self._fid_type == "SU": # f_SU (incl global phase) infid = 1 - np.real(g) - + ''' + infid = 1 - qt.fidelity(self.state, self._target) return infid # TODO: don't hesitate to add the required methods for your rl environment def step(self, action): - ... + action = action[0] + alpha = ((action + 1) / 2 * (self.ubound[0] - self.lbound[0])) + self.lbound[0] # for example, scale action from [-1, 1] to [9, 13] + args = {"alpha" : alpha} + + H = [self._Hd_lst[0], [self._Hc_lst[0][0], lambda t, args: self.pulse(t, args["alpha"])]] + step_result = qt.mesolve(H, self.state, [0, self.step_duration], args = args) + self.state = step_result.states[-1] + + infidelity = self.infidelity() + self._result.infidelity = infidelity + reward = (1 - infidelity) - self._step_penalty + + terminated = infidelity <= self._fid_err_targ # the episode ended reaching the goal + truncated = self.current_step >= self.max_steps # if the episode ended without reaching the goal + + if terminated or truncated: + time_diff = time.mktime(time.localtime()) - time.mktime(self._result.start_local_time) + self._result.iter_seconds.append(time_diff) + self.current_step = 0 # Reset the step counter + self.action = alpha + + observation = self._get_obs() + return observation, reward, bool(terminated), bool(truncated), {} + + def _get_obs(self): + rho = self.state.full().flatten() # to have state vector as NumPy array and flatten into one dimensional array.[a+i*b c+i*d] + obs = np.concatenate((np.real(rho), np.imag(rho))) + return obs.astype(np.float32) # Gymnasium expects the observation to be of type float32 + + def reset(self, seed=None): + self.state = self._initial + return self._get_obs(), {} + + def result(self): + # TODO: return qoc.Result object with the optimized pulse amplitudes + self._result.message = "Optimization finished!" + self._result.end_local_time = time.localtime() + self._result.n_iters = len(self._result.iter_seconds) + self._result.optimized_params = np.array(self.action) + self._result._optimized_controls = np.array(self.action) #TODO: is equal to optimized_params ? + self._result._final_states = (self._result._final_states if self._result._final_states is not None else []) + [self.state] + self._result.start_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.start_local_time) # Convert to a string + self._result.end_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.end_local_time) # Convert to a string + self._result._guess_controls = [] + return self._result def train(self): - ... + # Check if the environment follows Gym API + check_env(self, warn=True) - def result(self): - # TODO: return qoc.Result object with the optimized pulse amplitudes - ... \ No newline at end of file + # Create the model + model = PPO('MlpPolicy', self, verbose=1) # verbose = 1 to show training in terminal + + # Train the model + model.learn(total_timesteps = self.total_timesteps) \ No newline at end of file diff --git a/src/qutip_qoc/pulse_optim.py b/src/qutip_qoc/pulse_optim.py index 661c577..573a51d 100644 --- a/src/qutip_qoc/pulse_optim.py +++ b/src/qutip_qoc/pulse_optim.py @@ -152,7 +152,7 @@ def optimize_pulses( # extract guess and bounds for the control pulses x0, bounds = [], [] for key in control_parameters.keys(): - x0.append(control_parameters[key].get("guess")) + x0.append(control_parameters[key].get("guess")) # TODO: for now only consider bounds bounds.append(control_parameters[key].get("bounds")) try: # GRAPE, CRAB format lbound = [b[0][0] for b in bounds] diff --git a/tests/test_result.py b/tests/test_result.py index 084aa74..a7460d7 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -157,7 +157,7 @@ def sin_z_jax(t, r, **kwargs): # you can use this routine to test your implementation # state to state transfer -init = qt.basis(2, 0) +initial = qt.basis(2, 0) target = qt.basis(2, 1) H_c = [qt.sigmax(), qt.sigmay(), qt.sigmaz()] # control Hamiltonians @@ -169,34 +169,38 @@ def sin_z_jax(t, r, **kwargs): state2state_rl = Case( objectives=[Objective(initial, H, target)], - control_parameters={"bounds": [-13, 13]}, # TODO: for now only consider bounds + #control_parameters={"bounds": [-13, 13]}, # TODO: for now only consider bounds + control_parameters = { + "p": {"bounds": [(-13, 13)],} + }, tlist=np.linspace(0, 10, 100), # TODO: derive single step duration and max evo time / max num steps from this algorithm_kwargs={ "fid_err_targ": 0.01, "alg": "RL", - "max_iter": 100, - } + "max_iter": 700, + }, + optimizer_kwargs={} ) # TODO: no big difference for unitary evolution -initial = qt.qeye(2) # Identity -target = qt.gates.hadamard_transform() +#initial = qt.qeye(2) # Identity +#target = qt.gates.hadamard_transform() -unitary_rl = state2state_rl._replace( - objectives=[Objective(initial, H, target)], -) +#unitary_rl = state2state_rl._replace( +# objectives=[Objective(initial, H, target)], +#) @pytest.fixture( params=[ - pytest.param(state2state_grape, id="State to state (GRAPE)"), - pytest.param(state2state_crab, id="State to state (CRAB)"), - pytest.param(state2state_param_crab, id="State to state (param. CRAB)"), - pytest.param(state2state_goat, id="State to state (GOAT)"), - pytest.param(state2state_jax, id="State to state (JAX)"), + #pytest.param(state2state_grape, id="State to state (GRAPE)"), + #pytest.param(state2state_crab, id="State to state (CRAB)"), + #pytest.param(state2state_param_crab, id="State to state (param. CRAB)"), + #pytest.param(state2state_goat, id="State to state (GOAT)"), + #pytest.param(state2state_jax, id="State to state (JAX)"), pytest.param(state2state_rl, id="State to state (RL)"), - pytest.param(unitary_rl, id="Unitary (RL)"), + #pytest.param(unitary_rl, id="Unitary (RL)"), ] ) def tst(request): From 2c6c3025f63a35a6315437499b268b32c12e0d28 Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Sat, 10 Aug 2024 21:36:20 +0200 Subject: [PATCH 02/15] Using QobjEvo with Hc, Hd and controls. Dimension action space variable based on how many controls there are. using mesolve with QobjEvo object _H --- src/qutip_qoc/_rl.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index 94989c0..315e074 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -45,6 +45,13 @@ def __init__( self._Hd_lst.append(objective.H[0]) self._Hc_lst.append([H[0] if isinstance(H, list) else H for H in objective.H[1:]]) + # create the QobjEvo with Hd, Hc and controls(args) + self.args = {f"alpha{i+1}": (1) for i in range(len(self._Hc_lst[0]))} # set the control parameters to 1 fo all the Hc + self._H_lst = [self._Hd_lst[0]] + for i, Hc in enumerate(self._Hc_lst[0]): + self._H_lst.append([Hc, lambda t, args: self.pulse(t, self.args, i+1)]) + self._H = qt.QobjEvo(self._H_lst, self.args) + self._control_parameters = control_parameters # extract bounds for _control_parameters bounds = [] @@ -80,9 +87,6 @@ def __init__( # inferred attributes self._norm_fac = 1 / self._target.norm() - # control function - self.pulse = self._pulse - # contains the action of the last completed or truncated episode self.action = None @@ -110,12 +114,15 @@ def __init__( self.total_timesteps = self.max_episodes * self.max_steps # for learn() of gym # Define action and observation spaces (Gym) - self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32) # Continuous action space from -1 to +1, as suggested from gym + #self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32) # Continuous action space from -1 to +1, as suggested from gym + self.action_space = spaces.Box(low=-1, high=1, shape=(len(self._Hc_lst[0]),), dtype=np.float32) self.observation_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32) # Observation space, |v> have 2 real and 2 imaginary numbers -> 4 # ---------------------------------------------------------------------------------------- - def _pulse(self, t, p): - return p + #def _pulse(self, t, p): + # return p + def pulse(self, t, args, idx): + return 1*args[f"alpha{idx}"] def _infid(self, params=None): """ @@ -145,12 +152,16 @@ def _infid(self, params=None): # TODO: don't hesitate to add the required methods for your rl environment def step(self, action): - action = action[0] - alpha = ((action + 1) / 2 * (self.ubound[0] - self.lbound[0])) + self.lbound[0] # for example, scale action from [-1, 1] to [9, 13] - args = {"alpha" : alpha} + alphas = [((action[i] + 1) / 2 * (self.ubound[0] - self.lbound[0])) + self.lbound[0] for i in range(len(action))] #TODO: use ubound[i] lbound[i] + + for i, value in enumerate(alphas): + self.args[f"alpha{i+1}"] = value + self._H = qt.QobjEvo(self._H_lst, self.args) + + #H = [self._Hd_lst[0], [self._Hc_lst[0][0], lambda t, args: self.pulse(t, args["alpha"])]] + #step_result = qt.mesolve(H, self.state, [0, self.step_duration], args = args) + step_result = qt.mesolve(self._H, self.state, [0, self.step_duration]) - H = [self._Hd_lst[0], [self._Hc_lst[0][0], lambda t, args: self.pulse(t, args["alpha"])]] - step_result = qt.mesolve(H, self.state, [0, self.step_duration], args = args) self.state = step_result.states[-1] infidelity = self.infidelity() @@ -164,7 +175,7 @@ def step(self, action): time_diff = time.mktime(time.localtime()) - time.mktime(self._result.start_local_time) self._result.iter_seconds.append(time_diff) self.current_step = 0 # Reset the step counter - self.action = alpha + self.action = alphas observation = self._get_obs() return observation, reward, bool(terminated), bool(truncated), {} From 973095976abd7cc7840ae5e61f97be4a1d1f09f5 Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Sun, 11 Aug 2024 21:42:37 +0200 Subject: [PATCH 03/15] added _solver. _infidelity() more general --- src/qutip_qoc/_rl.py | 54 +++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index 315e074..832c44c 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -46,7 +46,7 @@ def __init__( self._Hc_lst.append([H[0] if isinstance(H, list) else H for H in objective.H[1:]]) # create the QobjEvo with Hd, Hc and controls(args) - self.args = {f"alpha{i+1}": (1) for i in range(len(self._Hc_lst[0]))} # set the control parameters to 1 fo all the Hc + self.args = {f"alpha{i+1}": (1) for i in range(len(self._Hc_lst[0]))} # set the control parameters to 1 for all the Hc self._H_lst = [self._Hd_lst[0]] for i, Hc in enumerate(self._Hc_lst[0]): self._H_lst.append([Hc, lambda t, args: self.pulse(t, self.args, i+1)]) @@ -61,6 +61,7 @@ def __init__( self.ubound = [b[0][1] for b in bounds] #self._H = self._prepare_generator() + self._alg_kwargs = alg_kwargs self._initial = objective.initial self._target = objective.target @@ -87,24 +88,14 @@ def __init__( # inferred attributes self._norm_fac = 1 / self._target.norm() - # contains the action of the last completed or truncated episode - self.action = None + self.temp_actions = [] # temporary list to save episode actions + self.actions = [] # list of actions(lists) of the last episode # integrator options self._integrator_kwargs = integrator_kwargs self._rtol = self._integrator_kwargs.get("rtol", 1e-5) self._atol = self._integrator_kwargs.get("atol", 1e-5) - # choose solver and fidelity type according to problem - if self._Hd_lst[0].issuper: - self._fid_type = alg_kwargs.get("fid_type", "TRACEDIFF") - #self._solver = qt.MESolver(H=self._H, options=self._integrator_kwargs) - else: - self._fid_type = alg_kwargs.get("fid_type", "PSU") - #self._solver = qt.SESolver(H=self._H, options=self._integrator_kwargs) - - self.infidelity = self._infid # TODO: should be used to calculate the reward - # ---------------------------------------------------------------------------------------- # TODO: set up your gym environment as you did correctly in post10 self.max_episode_time = time_interval.evo_time # maximum time for an episode @@ -118,7 +109,18 @@ def __init__( self.action_space = spaces.Box(low=-1, high=1, shape=(len(self._Hc_lst[0]),), dtype=np.float32) self.observation_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32) # Observation space, |v> have 2 real and 2 imaginary numbers -> 4 # ---------------------------------------------------------------------------------------- - + + def update_solver(self): + # choose solver and fidelity type according to problem + if self._Hd_lst[0].issuper: + self._fid_type = self._alg_kwargs.get("fid_type", "TRACEDIFF") + self._solver = qt.MESolver(H=self._H, options=self._integrator_kwargs) + else: + self._fid_type = self._alg_kwargs.get("fid_type", "PSU") + self._solver = qt.SESolver(H=self._H, options=self._integrator_kwargs) + + self.infidelity = self._infid # TODO: should be used to calculate the reward + #def _pulse(self, t, p): # return p def pulse(self, t, args, idx): @@ -128,10 +130,10 @@ def _infid(self, params=None): """ Calculate infidelity to be minimized """ - ''' X = self._solver.run( self.state, [0.0, self.step_duration], args={"p": params} ).final_state + self.state = X if self._fid_type == "TRACEDIFF": diff = X - self._target @@ -145,8 +147,7 @@ def _infid(self, params=None): infid = 1 - np.abs(g) elif self._fid_type == "SU": # f_SU (incl global phase) infid = 1 - np.real(g) - ''' - infid = 1 - qt.fidelity(self.state, self._target) + #infid = 1 - qt.fidelity(self.state, self._target) return infid # TODO: don't hesitate to add the required methods for your rl environment @@ -158,13 +159,11 @@ def step(self, action): self.args[f"alpha{i+1}"] = value self._H = qt.QobjEvo(self._H_lst, self.args) - #H = [self._Hd_lst[0], [self._Hc_lst[0][0], lambda t, args: self.pulse(t, args["alpha"])]] - #step_result = qt.mesolve(H, self.state, [0, self.step_duration], args = args) - step_result = qt.mesolve(self._H, self.state, [0, self.step_duration]) - - self.state = step_result.states[-1] - + self.update_solver() # _H has changed infidelity = self.infidelity() + + self.current_step += 1 + self.temp_actions.append(alphas) self._result.infidelity = infidelity reward = (1 - infidelity) - self._step_penalty @@ -175,7 +174,7 @@ def step(self, action): time_diff = time.mktime(time.localtime()) - time.mktime(self._result.start_local_time) self._result.iter_seconds.append(time_diff) self.current_step = 0 # Reset the step counter - self.action = alphas + self.actions = self.temp_actions.copy() observation = self._get_obs() return observation, reward, bool(terminated), bool(truncated), {} @@ -186,6 +185,7 @@ def _get_obs(self): return obs.astype(np.float32) # Gymnasium expects the observation to be of type float32 def reset(self, seed=None): + self.temp_actions = [] self.state = self._initial return self._get_obs(), {} @@ -194,12 +194,14 @@ def result(self): self._result.message = "Optimization finished!" self._result.end_local_time = time.localtime() self._result.n_iters = len(self._result.iter_seconds) - self._result.optimized_params = np.array(self.action) - self._result._optimized_controls = np.array(self.action) #TODO: is equal to optimized_params ? + self._result.optimized_params = self.actions.copy() + self._result._optimized_controls = self.actions.copy() self._result._final_states = (self._result._final_states if self._result._final_states is not None else []) + [self.state] self._result.start_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.start_local_time) # Convert to a string self._result.end_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.end_local_time) # Convert to a string self._result._guess_controls = [] + self._result._optimized_H = [self._H] + self._result.guess_params = [] return self._result def train(self): From 2667b6403f6ed2134d44fc845aa359a556060f0d Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Wed, 14 Aug 2024 10:05:40 +0200 Subject: [PATCH 04/15] unitary evolution implementation --- src/qutip_qoc/_rl.py | 15 ++++++++++----- tests/test_result.py | 16 ++++++++-------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index 832c44c..8961c2e 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -63,9 +63,10 @@ def __init__( #self._H = self._prepare_generator() self._alg_kwargs = alg_kwargs - self._initial = objective.initial - self._target = objective.target + self._initial = objectives[0].initial + self._target = objectives[0].target self.state = None + self.dim = self._initial.shape[0] self._result = Result( objectives = objectives, @@ -105,9 +106,13 @@ def __init__( self.total_timesteps = self.max_episodes * self.max_steps # for learn() of gym # Define action and observation spaces (Gym) - #self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32) # Continuous action space from -1 to +1, as suggested from gym - self.action_space = spaces.Box(low=-1, high=1, shape=(len(self._Hc_lst[0]),), dtype=np.float32) - self.observation_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32) # Observation space, |v> have 2 real and 2 imaginary numbers -> 4 + #self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32) + if self._initial.isket: + obs_shape = (2 * self.dim,) + else: # for unitary operators + obs_shape = (2 * self.dim * self.dim,) + self.action_space = spaces.Box(low=-1, high=1, shape=(len(self._Hc_lst[0]),), dtype=np.float32) # Continuous action space from -1 to +1, as suggested from gym + self.observation_space = spaces.Box(low=-1, high=1, shape=obs_shape, dtype=np.float32) # Observation space # ---------------------------------------------------------------------------------------- def update_solver(self): diff --git a/tests/test_result.py b/tests/test_result.py index a7460d7..10fb8bf 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -182,14 +182,14 @@ def sin_z_jax(t, r, **kwargs): optimizer_kwargs={} ) -# TODO: no big difference for unitary evolution +# no big difference for unitary evolution -#initial = qt.qeye(2) # Identity -#target = qt.gates.hadamard_transform() +initial = qt.qeye(2) # Identity +target = qt.gates.hadamard_transform() -#unitary_rl = state2state_rl._replace( -# objectives=[Objective(initial, H, target)], -#) +unitary_rl = state2state_rl._replace( + objectives=[Objective(initial, H, target)], +) @pytest.fixture( @@ -199,8 +199,8 @@ def sin_z_jax(t, r, **kwargs): #pytest.param(state2state_param_crab, id="State to state (param. CRAB)"), #pytest.param(state2state_goat, id="State to state (GOAT)"), #pytest.param(state2state_jax, id="State to state (JAX)"), - pytest.param(state2state_rl, id="State to state (RL)"), - #pytest.param(unitary_rl, id="Unitary (RL)"), + #pytest.param(state2state_rl, id="State to state (RL)"), + pytest.param(unitary_rl, id="Unitary (RL)"), ] ) def tst(request): From 19436092cbe89004cd1c8424018c827bc7576059 Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Sun, 18 Aug 2024 19:51:28 +0200 Subject: [PATCH 05/15] docstring added --- src/qutip_qoc/_rl.py | 73 ++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index 8961c2e..8434fe6 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -1,5 +1,7 @@ """ -This module contains ... +This module contains functions that implement quantum optimal control +using reinforcement learning (RL) techniques, allowing for the optimization +of control pulse sequences in quantum systems. """ import qutip as qt from qutip import Qobj, QobjEvo @@ -14,9 +16,13 @@ import time -class _RL(gym.Env): # TODO: this should be similar to your GymQubitEnv(gym.Env) implementation +class _RL(gym.Env): """ - Class for storing a control problem and ... + Class for storing a control problem and implementing quantum optimal + control using reinforcement learning. This class defines a custom + Gym environment that models the dynamics of quantum systems + under various control pulses, and uses RL algorithms to optimize the + parameters of these pulses. """ def __init__( @@ -31,12 +37,14 @@ def __init__( integrator_kwargs, qtrl_optimizers, ): - super(_RL,self).__init__() # TODO:(ok) super init your gym environment here + """ + Initialize the reinforcement learning environment for quantum + optimal control. Sets up the system Hamiltonian, control parameters, + and defines the observation and action spaces for the RL agent. + """ - # ------------------------------- copied from _GOAT class ------------------------------- + super(_RL,self).__init__() - # TODO: you dont have to use (or keep them) if you don't need the following attributes - # this is just an inspiration how to extract information from the input self._Hd_lst, self._Hc_lst = [], [] if not isinstance(objectives, list): objectives = [objectives] @@ -60,7 +68,6 @@ def __init__( self.lbound = [b[0][0] for b in bounds] self.ubound = [b[0][1] for b in bounds] - #self._H = self._prepare_generator() self._alg_kwargs = alg_kwargs self._initial = objectives[0].initial @@ -83,7 +90,6 @@ def __init__( # To check if it exceeds the maximum number of steps in an episode self.current_step = 0 - #self._evo_time = time_interval.evo_time self._fid_err_targ = alg_kwargs["fid_err_targ"] # inferred attributes @@ -97,8 +103,6 @@ def __init__( self._rtol = self._integrator_kwargs.get("rtol", 1e-5) self._atol = self._integrator_kwargs.get("atol", 1e-5) - # ---------------------------------------------------------------------------------------- - # TODO: set up your gym environment as you did correctly in post10 self.max_episode_time = time_interval.evo_time # maximum time for an episode self.max_steps = time_interval.n_tslots # maximum number of steps in an episode self.step_duration = time_interval.tslots[-1] / time_interval.n_tslots # step duration for mesvole @@ -106,17 +110,19 @@ def __init__( self.total_timesteps = self.max_episodes * self.max_steps # for learn() of gym # Define action and observation spaces (Gym) - #self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32) if self._initial.isket: obs_shape = (2 * self.dim,) else: # for unitary operators obs_shape = (2 * self.dim * self.dim,) self.action_space = spaces.Box(low=-1, high=1, shape=(len(self._Hc_lst[0]),), dtype=np.float32) # Continuous action space from -1 to +1, as suggested from gym self.observation_space = spaces.Box(low=-1, high=1, shape=obs_shape, dtype=np.float32) # Observation space - # ---------------------------------------------------------------------------------------- def update_solver(self): - # choose solver and fidelity type according to problem + """ + Update the solver and fidelity type based on the problem setup. + Chooses the appropriate solver (Schrödinger or master equation) and + prepares for infidelity calculation. + """ if self._Hd_lst[0].issuper: self._fid_type = self._alg_kwargs.get("fid_type", "TRACEDIFF") self._solver = qt.MESolver(H=self._H, options=self._integrator_kwargs) @@ -124,16 +130,17 @@ def update_solver(self): self._fid_type = self._alg_kwargs.get("fid_type", "PSU") self._solver = qt.SESolver(H=self._H, options=self._integrator_kwargs) - self.infidelity = self._infid # TODO: should be used to calculate the reward + self.infidelity = self._infid - #def _pulse(self, t, p): - # return p def pulse(self, t, args, idx): + """ + Returns the control pulse value at time t for a given index. + """ return 1*args[f"alpha{idx}"] def _infid(self, params=None): """ - Calculate infidelity to be minimized + The agent performs a step, then calculate infidelity to be minimized of the current state against the target state. """ X = self._solver.run( self.state, [0.0, self.step_duration], args={"p": params} @@ -152,13 +159,14 @@ def _infid(self, params=None): infid = 1 - np.abs(g) elif self._fid_type == "SU": # f_SU (incl global phase) infid = 1 - np.real(g) - #infid = 1 - qt.fidelity(self.state, self._target) return infid - # TODO: don't hesitate to add the required methods for your rl environment - def step(self, action): - alphas = [((action[i] + 1) / 2 * (self.ubound[0] - self.lbound[0])) + self.lbound[0] for i in range(len(action))] #TODO: use ubound[i] lbound[i] + """ + Perform a single time step in the environment, applying the scaled action (control pulse) + chosen by the RL agent. Updates the system's state and computes the reward. + """ + alphas = [((action[i] + 1) / 2 * (self.ubound[0] - self.lbound[0])) + self.lbound[0] for i in range(len(action))] for i, value in enumerate(alphas): self.args[f"alpha{i+1}"] = value @@ -185,17 +193,27 @@ def step(self, action): return observation, reward, bool(terminated), bool(truncated), {} def _get_obs(self): - rho = self.state.full().flatten() # to have state vector as NumPy array and flatten into one dimensional array.[a+i*b c+i*d] + """ + Get the current state observation for the RL agent. Converts the system's + quantum state or matrix into a real-valued NumPy array suitable for RL algorithms. + """ + rho = self.state.full().flatten() obs = np.concatenate((np.real(rho), np.imag(rho))) return obs.astype(np.float32) # Gymnasium expects the observation to be of type float32 def reset(self, seed=None): + """ + Reset the environment to the initial state, preparing for a new episode. + """ self.temp_actions = [] self.state = self._initial return self._get_obs(), {} def result(self): - # TODO: return qoc.Result object with the optimized pulse amplitudes + """ + Retrieve the results of the optimization process, including the optimized + pulse sequences, final states, and performance metrics. + """ self._result.message = "Optimization finished!" self._result.end_local_time = time.localtime() self._result.n_iters = len(self._result.iter_seconds) @@ -210,11 +228,14 @@ def result(self): return self._result def train(self): + """ + Train the RL agent on the defined quantum control problem using the specified + reinforcement learning algorithm. Checks environment compatibility with Gym API. + """ # Check if the environment follows Gym API check_env(self, warn=True) # Create the model - model = PPO('MlpPolicy', self, verbose=1) # verbose = 1 to show training in terminal - + model = PPO('MlpPolicy', self, verbose=1) # verbose = 1 to display training progress and statistics in the terminal # Train the model model.learn(total_timesteps = self.total_timesteps) \ No newline at end of file From 13ddba0253afc2b76807d55ca65991f2b0d50d15 Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Tue, 20 Aug 2024 23:11:54 +0200 Subject: [PATCH 06/15] added callback to stop training --- src/qutip_qoc/_rl.py | 94 ++++++++++++++++++++++++++++++++++++++------ tests/test_result.py | 20 +++++----- 2 files changed, 93 insertions(+), 21 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index 8434fe6..943bcd3 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -13,6 +13,7 @@ from gymnasium import spaces from stable_baselines3 import PPO from stable_baselines3.common.env_checker import check_env +from stable_baselines3.common.callbacks import BaseCallback import time @@ -90,6 +91,10 @@ def __init__( # To check if it exceeds the maximum number of steps in an episode self.current_step = 0 + self.terminated = False + self.truncated = False + self.episode_info = [] # to contain some information from the latest episode + self._fid_err_targ = alg_kwargs["fid_err_targ"] # inferred attributes @@ -108,6 +113,7 @@ def __init__( self.step_duration = time_interval.tslots[-1] / time_interval.n_tslots # step duration for mesvole self.max_episodes = alg_kwargs["max_iter"] # maximum number of episodes for training self.total_timesteps = self.max_episodes * self.max_steps # for learn() of gym + self.current_episode = 0 # To keep track of the current episode # Define action and observation spaces (Gym) if self._initial.isket: @@ -137,6 +143,19 @@ def pulse(self, t, args, idx): Returns the control pulse value at time t for a given index. """ return 1*args[f"alpha{idx}"] + + def save_episode_info(self): + """ + Save the information of the last episode before resetting the environment. + """ + episode_data = { + "episode": self.current_episode, + "final_infidelity": self._result.infidelity, + "terminated": self.terminated, + "truncated": self.truncated, + "steps_used": self.current_step + } + self.episode_info.append(episode_data) def _infid(self, params=None): """ @@ -180,17 +199,11 @@ def step(self, action): self._result.infidelity = infidelity reward = (1 - infidelity) - self._step_penalty - terminated = infidelity <= self._fid_err_targ # the episode ended reaching the goal - truncated = self.current_step >= self.max_steps # if the episode ended without reaching the goal - - if terminated or truncated: - time_diff = time.mktime(time.localtime()) - time.mktime(self._result.start_local_time) - self._result.iter_seconds.append(time_diff) - self.current_step = 0 # Reset the step counter - self.actions = self.temp_actions.copy() + self.terminated = infidelity <= self._fid_err_targ # the episode ended reaching the goal + self.truncated = self.current_step >= self.max_steps # if the episode ended without reaching the goal observation = self._get_obs() - return observation, reward, bool(terminated), bool(truncated), {} + return observation, reward, bool(self.terminated), bool(self.truncated), {} def _get_obs(self): """ @@ -205,6 +218,15 @@ def reset(self, seed=None): """ Reset the environment to the initial state, preparing for a new episode. """ + self.save_episode_info() + + time_diff = time.mktime(time.localtime()) - time.mktime(self._result.start_local_time) + self._result.iter_seconds.append(time_diff) + self.current_step = 0 # Reset the step counter + self.current_episode += 1 # Increment episode counter + self.actions = self.temp_actions.copy() + self.terminated = False + self.truncated = False self.temp_actions = [] self.state = self._initial return self._get_obs(), {} @@ -214,7 +236,6 @@ def result(self): Retrieve the results of the optimization process, including the optimized pulse sequences, final states, and performance metrics. """ - self._result.message = "Optimization finished!" self._result.end_local_time = time.localtime() self._result.n_iters = len(self._result.iter_seconds) self._result.optimized_params = self.actions.copy() @@ -237,5 +258,56 @@ def train(self): # Create the model model = PPO('MlpPolicy', self, verbose=1) # verbose = 1 to display training progress and statistics in the terminal + + stop_callback = EarlyStopTraining(verbose=1) + # Train the model - model.learn(total_timesteps = self.total_timesteps) \ No newline at end of file + model.learn(total_timesteps = self.total_timesteps, callback=stop_callback) + +class EarlyStopTraining(BaseCallback): + """ + A callback to stop training based on specific conditions (steps, infidelity, max iterations) + """ + def __init__(self, verbose: int = 0): + super(EarlyStopTraining, self).__init__(verbose) + self.stop_train = False + + def _on_step(self) -> bool: + """ + This method is required by the BaseCallback class. We use it only to stop the training. + """ + # Check if we need to stop training + if self.stop_train: + return False # Stop training + return True # Continue training + + def _on_rollout_start(self) -> None: + """ + This method is called before the rollout starts (before collecting new samples). + Checks: + - If all of the last 100 episodes have infidelity below the target and use the same number of steps, stop training. + - Stop training if the maximum number of episodes is reached. + """ + env = self.training_env.envs[0].unwrapped + max_episodes = env.max_episodes + fid_err_targ = env._fid_err_targ + + if len(env.episode_info) >= 100: + last_100_episodes = env.episode_info[-100:] + + min_steps = min(info['steps_used'] for info in last_100_episodes) + steps_condition = all(ep['steps_used'] == min_steps for ep in last_100_episodes) + infid_condition = all(ep['final_infidelity'] <= fid_err_targ for ep in last_100_episodes) + + if steps_condition and infid_condition: + env._result.message = "Training finished. No episode in the last 100 used fewer steps and infidelity was below target infid." + #print(f"Stopping training as no episode in the last 100 used fewer steps and infidelity was below target infid.") + self.stop_train = True # Stop training + #print([ep['steps_used'] for ep in last_100_episodes]) + #print([ep['final_infidelity'] for ep in last_100_episodes]) + + # Check max episodes condition + if env.current_episode >= max_episodes: + env._result.message = f"Reached {max_episodes} episodes, stopping training." + #print(f"Reached {max_episodes} episodes, stopping training.") + self.stop_train = True # Stop training diff --git a/tests/test_result.py b/tests/test_result.py index 10fb8bf..f2ff583 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -158,7 +158,7 @@ def sin_z_jax(t, r, **kwargs): # state to state transfer initial = qt.basis(2, 0) -target = qt.basis(2, 1) +target = (qt.basis(2, 0) + qt.basis(2, 1)).unit() # |+⟩ H_c = [qt.sigmax(), qt.sigmay(), qt.sigmaz()] # control Hamiltonians @@ -173,23 +173,23 @@ def sin_z_jax(t, r, **kwargs): control_parameters = { "p": {"bounds": [(-13, 13)],} }, - tlist=np.linspace(0, 10, 100), # TODO: derive single step duration and max evo time / max num steps from this + tlist=np.linspace(0, 10, 100), algorithm_kwargs={ "fid_err_targ": 0.01, "alg": "RL", - "max_iter": 700, + "max_iter": 70000, }, optimizer_kwargs={} ) # no big difference for unitary evolution -initial = qt.qeye(2) # Identity -target = qt.gates.hadamard_transform() +#initial = qt.qeye(2) # Identity +#target = qt.gates.hadamard_transform() -unitary_rl = state2state_rl._replace( - objectives=[Objective(initial, H, target)], -) +#unitary_rl = state2state_rl._replace( +# objectives=[Objective(initial, H, target)], +#) @pytest.fixture( @@ -199,8 +199,8 @@ def sin_z_jax(t, r, **kwargs): #pytest.param(state2state_param_crab, id="State to state (param. CRAB)"), #pytest.param(state2state_goat, id="State to state (GOAT)"), #pytest.param(state2state_jax, id="State to state (JAX)"), - #pytest.param(state2state_rl, id="State to state (RL)"), - pytest.param(unitary_rl, id="Unitary (RL)"), + pytest.param(state2state_rl, id="State to state (RL)"), + #pytest.param(unitary_rl, id="Unitary (RL)"), ] ) def tst(request): From fadb42e52f9008e2f9846ced4686f77e29c44d04 Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Thu, 22 Aug 2024 17:56:25 +0200 Subject: [PATCH 07/15] update docstring --- src/qutip_qoc/pulse_optim.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/qutip_qoc/pulse_optim.py b/src/qutip_qoc/pulse_optim.py index 573a51d..efc5d84 100644 --- a/src/qutip_qoc/pulse_optim.py +++ b/src/qutip_qoc/pulse_optim.py @@ -25,7 +25,7 @@ def optimize_pulses( integrator_kwargs=None, ): """ - Run GOAT, JOPT, GRAPE or CRAB optimization. + Run GOAT, JOPT, GRAPE, CRAB or RL optimization. Parameters ---------- @@ -37,6 +37,7 @@ def optimize_pulses( Dictionary of options for the control pulse optimization. The keys of this dict must be a unique string identifier for each control Hamiltonian / function. For the GOAT and JOPT algorithms, the dict may optionally also contain the key "__time__". + For RL you don't need to specify the guess. For each control function it must specify: control_id : dict @@ -71,14 +72,15 @@ def optimize_pulses( - alg : str Algorithm to use for the optimization. - Supported are: "GRAPE", "CRAB", "GOAT", "JOPT". + Supported are: "GRAPE", "CRAB", "GOAT", "JOPT" and "RL". - fid_err_targ : float, optional Fidelity error target for the optimization. - max_iter : int, optional Maximum number of iterations to perform. - Referes to local minimizer steps. + Referes to local minimizer steps or in the context of + `alg: "RL"` to the max. number of episodes. Global steps default to 0 (no global optimization). Can be overridden by specifying in minimizer_kwargs. From 0c6840581c9dde925efcc283015129f946d8dd66 Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Thu, 22 Aug 2024 18:00:09 +0200 Subject: [PATCH 08/15] update docstring --- src/qutip_qoc/pulse_optim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qutip_qoc/pulse_optim.py b/src/qutip_qoc/pulse_optim.py index efc5d84..662fb67 100644 --- a/src/qutip_qoc/pulse_optim.py +++ b/src/qutip_qoc/pulse_optim.py @@ -1,7 +1,7 @@ """ This module is the entry point for the optimization of control pulses. It provides the function `optimize_pulses` which prepares and runs the -GOAT, JOPT, GRAPE or CRAB optimization. +GOAT, JOPT, GRAPE, CRAB or RL optimization. """ import numpy as np From e66da0ffd63d65fb9fdd4b0d56071c47440c0d7c Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Sat, 24 Aug 2024 17:32:49 +0200 Subject: [PATCH 09/15] Added shorter_pulses to algorithm_kwargs and changed callback to stop train early, removed update_solver(), use of var_time = True supported, other minor fixes --- src/qutip_qoc/_rl.py | 77 ++++++++++++++++-------------------- src/qutip_qoc/pulse_optim.py | 12 ++++-- tests/test_result.py | 28 ++++++------- 3 files changed, 55 insertions(+), 62 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index 943bcd3..ce4ae74 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -47,8 +47,6 @@ def __init__( super(_RL,self).__init__() self._Hd_lst, self._Hc_lst = [], [] - if not isinstance(objectives, list): - objectives = [objectives] for objective in objectives: # extract drift and control Hamiltonians from the objective self._Hd_lst.append(objective.H[0]) @@ -61,8 +59,7 @@ def __init__( self._H_lst.append([Hc, lambda t, args: self.pulse(t, self.args, i+1)]) self._H = qt.QobjEvo(self._H_lst, self.args) - self._control_parameters = control_parameters - # extract bounds for _control_parameters + # extract bounds for control_parameters bounds = [] for key in control_parameters.keys(): bounds.append(control_parameters[key].get("bounds")) @@ -70,6 +67,7 @@ def __init__( self.ubound = [b[0][1] for b in bounds] self._alg_kwargs = alg_kwargs + self.shorter_pulses = self._alg_kwargs.get("shorter_pulses", False) # lengthen the training to look for pulses of shorter duration, therefore episodes with fewer steps self._initial = objectives[0].initial self._target = objectives[0].target @@ -82,7 +80,7 @@ def __init__( start_local_time = time.localtime(), # initial optimization time n_iters = 0, # Number of iterations(episodes) until convergence iter_seconds = [], # list containing the time taken for each iteration(episode) of the optimization - var_time = False, # Whether the optimization was performed with variable time + var_time = True, # Whether the optimization was performed with variable time ) #for the reward @@ -123,12 +121,7 @@ def __init__( self.action_space = spaces.Box(low=-1, high=1, shape=(len(self._Hc_lst[0]),), dtype=np.float32) # Continuous action space from -1 to +1, as suggested from gym self.observation_space = spaces.Box(low=-1, high=1, shape=obs_shape, dtype=np.float32) # Observation space - def update_solver(self): - """ - Update the solver and fidelity type based on the problem setup. - Chooses the appropriate solver (Schrödinger or master equation) and - prepares for infidelity calculation. - """ + # create the solver if self._Hd_lst[0].issuper: self._fid_type = self._alg_kwargs.get("fid_type", "TRACEDIFF") self._solver = qt.MESolver(H=self._H, options=self._integrator_kwargs) @@ -136,8 +129,6 @@ def update_solver(self): self._fid_type = self._alg_kwargs.get("fid_type", "PSU") self._solver = qt.SESolver(H=self._H, options=self._integrator_kwargs) - self.infidelity = self._infid - def pulse(self, t, args, idx): """ Returns the control pulse value at time t for a given index. @@ -153,7 +144,8 @@ def save_episode_info(self): "final_infidelity": self._result.infidelity, "terminated": self.terminated, "truncated": self.truncated, - "steps_used": self.current_step + "steps_used": self.current_step, + "elapsed_time": time.mktime(time.localtime()) } self.episode_info.append(episode_data) @@ -189,10 +181,8 @@ def step(self, action): for i, value in enumerate(alphas): self.args[f"alpha{i+1}"] = value - self._H = qt.QobjEvo(self._H_lst, self.args) - self.update_solver() # _H has changed - infidelity = self.infidelity() + infidelity = self._infid() self.current_step += 1 self.temp_actions.append(alphas) @@ -220,7 +210,7 @@ def reset(self, seed=None): """ self.save_episode_info() - time_diff = time.mktime(time.localtime()) - time.mktime(self._result.start_local_time) + time_diff = self.episode_info[-1]["elapsed_time"] - (self.episode_info[-2]["elapsed_time"] if len(self.episode_info) > 1 else time.mktime(self._result.start_local_time)) self._result.iter_seconds.append(time_diff) self.current_step = 0 # Reset the step counter self.current_episode += 1 # Increment episode counter @@ -238,7 +228,7 @@ def result(self): """ self._result.end_local_time = time.localtime() self._result.n_iters = len(self._result.iter_seconds) - self._result.optimized_params = self.actions.copy() + self._result.optimized_params = self.actions.copy() + [self._result.total_seconds] # If var_time is True, the last parameter is the evolution time self._result._optimized_controls = self.actions.copy() self._result._final_states = (self._result._final_states if self._result._final_states is not None else []) + [self.state] self._result.start_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.start_local_time) # Convert to a string @@ -274,11 +264,21 @@ def __init__(self, verbose: int = 0): def _on_step(self) -> bool: """ - This method is required by the BaseCallback class. We use it only to stop the training. + This method is required by the BaseCallback class. We use it to stop the training. + - Stop training if the maximum number of episodes is reached. + - Stop training if it finds an episode with infidelity <= than target infidelity """ + env = self.training_env.envs[0].unwrapped + # Check if we need to stop training if self.stop_train: return False # Stop training + elif env.current_episode >= env.max_episodes: + env._result.message = f"Reached {env.max_episodes} episodes, stopping training." + return False # Stop training + elif (env._result.infidelity <= env._fid_err_targ) and not(env.shorter_pulses): + env._result.message = f"Stop training because an episode with infidelity <= target infidelity was found" + return False # Stop training return True # Continue training def _on_rollout_start(self) -> None: @@ -286,28 +286,19 @@ def _on_rollout_start(self) -> None: This method is called before the rollout starts (before collecting new samples). Checks: - If all of the last 100 episodes have infidelity below the target and use the same number of steps, stop training. - - Stop training if the maximum number of episodes is reached. """ + #could be moved to on_step + env = self.training_env.envs[0].unwrapped - max_episodes = env.max_episodes - fid_err_targ = env._fid_err_targ - - if len(env.episode_info) >= 100: - last_100_episodes = env.episode_info[-100:] - - min_steps = min(info['steps_used'] for info in last_100_episodes) - steps_condition = all(ep['steps_used'] == min_steps for ep in last_100_episodes) - infid_condition = all(ep['final_infidelity'] <= fid_err_targ for ep in last_100_episodes) - - if steps_condition and infid_condition: - env._result.message = "Training finished. No episode in the last 100 used fewer steps and infidelity was below target infid." - #print(f"Stopping training as no episode in the last 100 used fewer steps and infidelity was below target infid.") - self.stop_train = True # Stop training - #print([ep['steps_used'] for ep in last_100_episodes]) - #print([ep['final_infidelity'] for ep in last_100_episodes]) - - # Check max episodes condition - if env.current_episode >= max_episodes: - env._result.message = f"Reached {max_episodes} episodes, stopping training." - #print(f"Reached {max_episodes} episodes, stopping training.") - self.stop_train = True # Stop training + #Only if specified in alg_kwargs, the algorithm will search for shorter pulses, resulting in episodes with fewer steps. + if env.shorter_pulses: + if len(env.episode_info) >= 100: + last_100_episodes = env.episode_info[-100:] + + min_steps = min(info['steps_used'] for info in last_100_episodes) + steps_condition = all(ep['steps_used'] == min_steps for ep in last_100_episodes) + infid_condition = all(ep['final_infidelity'] <= env._fid_err_targ for ep in last_100_episodes) + + if steps_condition and infid_condition: + env._result.message = "Training finished. No episode in the last 100 used fewer steps and infidelity was below target infid." + self.stop_train = True # Stop training \ No newline at end of file diff --git a/src/qutip_qoc/pulse_optim.py b/src/qutip_qoc/pulse_optim.py index 662fb67..6964b9a 100644 --- a/src/qutip_qoc/pulse_optim.py +++ b/src/qutip_qoc/pulse_optim.py @@ -37,7 +37,6 @@ def optimize_pulses( Dictionary of options for the control pulse optimization. The keys of this dict must be a unique string identifier for each control Hamiltonian / function. For the GOAT and JOPT algorithms, the dict may optionally also contain the key "__time__". - For RL you don't need to specify the guess. For each control function it must specify: control_id : dict @@ -46,6 +45,7 @@ def optimize_pulses( where ``n`` is the number of independent variables. - bounds : sequence, optional + For RL you don't need to specify the guess. Sequence of ``(min, max)`` pairs for each element in `guess`. None is used to specify no bound. @@ -84,6 +84,11 @@ def optimize_pulses( Global steps default to 0 (no global optimization). Can be overridden by specifying in minimizer_kwargs. + - shorter_pulses : bool, optional + If set to True, allows the algorithm to search for shorter control + pulses that can achieve the desired fidelity target using fewer steps. + By default, it is set to False, only attempting to reach the target infidelity. + Algorithm specific keywords for GRAPE,CRAB can be found in :func:`qutip_qtrl.pulseoptim.optimize_pulse`. @@ -154,7 +159,7 @@ def optimize_pulses( # extract guess and bounds for the control pulses x0, bounds = [], [] for key in control_parameters.keys(): - x0.append(control_parameters[key].get("guess")) # TODO: for now only consider bounds + x0.append(control_parameters[key].get("guess")) bounds.append(control_parameters[key].get("bounds")) try: # GRAPE, CRAB format lbound = [b[0][0] for b in bounds] @@ -351,8 +356,7 @@ def optimize_pulses( qtrl_optimizers.append(qtrl_optimizer) - # TODO: we can deal with proper handling later - if alg == "RL": + elif alg == "RL": rl_env = _RL( objectives, control_parameters, diff --git a/tests/test_result.py b/tests/test_result.py index f2ff583..1ffa3df 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -153,8 +153,6 @@ def sin_z_jax(t, r, **kwargs): ) # ----------------------- RL -------------------- -# TODO: this is the input for optimiz_pulses() function -# you can use this routine to test your implementation # state to state transfer initial = qt.basis(2, 0) @@ -169,7 +167,6 @@ def sin_z_jax(t, r, **kwargs): state2state_rl = Case( objectives=[Objective(initial, H, target)], - #control_parameters={"bounds": [-13, 13]}, # TODO: for now only consider bounds control_parameters = { "p": {"bounds": [(-13, 13)],} }, @@ -177,30 +174,31 @@ def sin_z_jax(t, r, **kwargs): algorithm_kwargs={ "fid_err_targ": 0.01, "alg": "RL", - "max_iter": 70000, + "max_iter": 300, + "shorter_pulses": True, }, optimizer_kwargs={} ) # no big difference for unitary evolution -#initial = qt.qeye(2) # Identity -#target = qt.gates.hadamard_transform() +initial = qt.qeye(2) # Identity +target = qt.gates.hadamard_transform() -#unitary_rl = state2state_rl._replace( -# objectives=[Objective(initial, H, target)], -#) +unitary_rl = state2state_rl._replace( + objectives=[Objective(initial, H, target)], +) @pytest.fixture( params=[ - #pytest.param(state2state_grape, id="State to state (GRAPE)"), - #pytest.param(state2state_crab, id="State to state (CRAB)"), - #pytest.param(state2state_param_crab, id="State to state (param. CRAB)"), - #pytest.param(state2state_goat, id="State to state (GOAT)"), - #pytest.param(state2state_jax, id="State to state (JAX)"), + pytest.param(state2state_grape, id="State to state (GRAPE)"), + pytest.param(state2state_crab, id="State to state (CRAB)"), + pytest.param(state2state_param_crab, id="State to state (param. CRAB)"), + pytest.param(state2state_goat, id="State to state (GOAT)"), + pytest.param(state2state_jax, id="State to state (JAX)"), pytest.param(state2state_rl, id="State to state (RL)"), - #pytest.param(unitary_rl, id="Unitary (RL)"), + pytest.param(unitary_rl, id="Unitary (RL)"), ] ) def tst(request): From 6cee77c3823e63ad8039710de6f1f9d1f66d2c03 Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Sat, 24 Aug 2024 19:07:37 +0200 Subject: [PATCH 10/15] Fixed final state saving --- src/qutip_qoc/_rl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index ce4ae74..ede6d5e 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -218,6 +218,7 @@ def reset(self, seed=None): self.terminated = False self.truncated = False self.temp_actions = [] + self._result._final_states = [self.state] self.state = self._initial return self._get_obs(), {} @@ -230,7 +231,6 @@ def result(self): self._result.n_iters = len(self._result.iter_seconds) self._result.optimized_params = self.actions.copy() + [self._result.total_seconds] # If var_time is True, the last parameter is the evolution time self._result._optimized_controls = self.actions.copy() - self._result._final_states = (self._result._final_states if self._result._final_states is not None else []) + [self.state] self._result.start_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.start_local_time) # Convert to a string self._result.end_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.end_local_time) # Convert to a string self._result._guess_controls = [] From 318081f1d34ac17e484622cce117290756a57f9b Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Mon, 26 Aug 2024 10:28:31 +0200 Subject: [PATCH 11/15] edit configuration files --- .pre-commit-config.yaml | 2 ++ requirements.txt | 2 ++ setup.cfg | 2 ++ 3 files changed, 6 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0bcf23a..0c027ee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -92,3 +92,5 @@ repos: - jaxlib - diffrax - pytest + - gymnasium + - stable-baselines3 diff --git a/requirements.txt b/requirements.txt index a08565e..9687463 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,5 @@ qutip>=5.0.1 qutip-qtrl qutip-jax pre-commit +gymnasium>=0.29.1 +stable-baselines3>=2.3.2 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 3903195..7872023 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,6 +35,8 @@ install_requires = qutip-qtrl qutip-jax numpy>=1.16.6,<2.0 + gymnasium>=0.29.1 + stable-baselines3>=2.3.2 setup_requires = cython>=1.0 packaging From 4485f9d42685364962ac4b436d4810200e2e1a26 Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Sat, 14 Sep 2024 20:17:11 +0200 Subject: [PATCH 12/15] Docstring fixes, use of __time__ parameter instead of shorter_pulses, added underscore for internal variables, args is now passed as a parameter to _infid(), changes in callback function --- src/qutip_qoc/_rl.py | 126 +++++++++++++++++------------------ src/qutip_qoc/pulse_optim.py | 10 +-- tests/test_result.py | 9 ++- 3 files changed, 69 insertions(+), 76 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index ede6d5e..66b8474 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -52,27 +52,34 @@ def __init__( self._Hd_lst.append(objective.H[0]) self._Hc_lst.append([H[0] if isinstance(H, list) else H for H in objective.H[1:]]) - # create the QobjEvo with Hd, Hc and controls(args) - self.args = {f"alpha{i+1}": (1) for i in range(len(self._Hc_lst[0]))} # set the control parameters to 1 for all the Hc + def create_pulse_func(idx): + """ + Create a control pulse lambda function for a given index. + """ + return lambda t, args: self._pulse(t, args, idx+1) + + # create the QobjEvo with Hd, Hc and controls(args) self._H_lst = [self._Hd_lst[0]] + dummy_args = {f'alpha{i+1}': 1.0 for i in range(len(self._Hc_lst[0]))} for i, Hc in enumerate(self._Hc_lst[0]): - self._H_lst.append([Hc, lambda t, args: self.pulse(t, self.args, i+1)]) - self._H = qt.QobjEvo(self._H_lst, self.args) + self._H_lst.append([Hc, create_pulse_func(i)]) + self._H = qt.QobjEvo(self._H_lst, args=dummy_args) + + self.shorter_pulses = False if time_options == {} else True # lengthen the training to look for pulses of shorter duration, therefore episodes with fewer steps # extract bounds for control_parameters bounds = [] for key in control_parameters.keys(): bounds.append(control_parameters[key].get("bounds")) - self.lbound = [b[0][0] for b in bounds] - self.ubound = [b[0][1] for b in bounds] + self._lbound = [b[0][0] for b in bounds] + self._ubound = [b[0][1] for b in bounds] self._alg_kwargs = alg_kwargs - self.shorter_pulses = self._alg_kwargs.get("shorter_pulses", False) # lengthen the training to look for pulses of shorter duration, therefore episodes with fewer steps self._initial = objectives[0].initial self._target = objectives[0].target - self.state = None - self.dim = self._initial.shape[0] + self._state = None + self._dim = self._initial.shape[0] self._result = Result( objectives = objectives, @@ -87,19 +94,19 @@ def __init__( self._step_penalty = 1 # To check if it exceeds the maximum number of steps in an episode - self.current_step = 0 + self._current_step = 0 self.terminated = False self.truncated = False - self.episode_info = [] # to contain some information from the latest episode + self._episode_info = [] # to contain some information from the latest episode self._fid_err_targ = alg_kwargs["fid_err_targ"] # inferred attributes self._norm_fac = 1 / self._target.norm() - self.temp_actions = [] # temporary list to save episode actions - self.actions = [] # list of actions(lists) of the last episode + self._temp_actions = [] # temporary list to save episode actions + self._actions = [] # list of actions(lists) of the last episode # integrator options self._integrator_kwargs = integrator_kwargs @@ -108,16 +115,16 @@ def __init__( self.max_episode_time = time_interval.evo_time # maximum time for an episode self.max_steps = time_interval.n_tslots # maximum number of steps in an episode - self.step_duration = time_interval.tslots[-1] / time_interval.n_tslots # step duration for mesvole + self._step_duration = time_interval.tslots[-1] / time_interval.n_tslots # step duration for mesvole self.max_episodes = alg_kwargs["max_iter"] # maximum number of episodes for training - self.total_timesteps = self.max_episodes * self.max_steps # for learn() of gym + self._total_timesteps = self.max_episodes * self.max_steps # for learn() of gym self.current_episode = 0 # To keep track of the current episode # Define action and observation spaces (Gym) if self._initial.isket: - obs_shape = (2 * self.dim,) + obs_shape = (2 * self._dim,) else: # for unitary operators - obs_shape = (2 * self.dim * self.dim,) + obs_shape = (2 * self._dim * self._dim,) self.action_space = spaces.Box(low=-1, high=1, shape=(len(self._Hc_lst[0]),), dtype=np.float32) # Continuous action space from -1 to +1, as suggested from gym self.observation_space = spaces.Box(low=-1, high=1, shape=obs_shape, dtype=np.float32) # Observation space @@ -129,13 +136,14 @@ def __init__( self._fid_type = self._alg_kwargs.get("fid_type", "PSU") self._solver = qt.SESolver(H=self._H, options=self._integrator_kwargs) - def pulse(self, t, args, idx): + def _pulse(self, t, args, idx): """ Returns the control pulse value at time t for a given index. """ - return 1*args[f"alpha{idx}"] + alpha = args[f"alpha{idx}"] + return alpha - def save_episode_info(self): + def _save_episode_info(self): """ Save the information of the last episode before resetting the environment. """ @@ -144,19 +152,19 @@ def save_episode_info(self): "final_infidelity": self._result.infidelity, "terminated": self.terminated, "truncated": self.truncated, - "steps_used": self.current_step, + "steps_used": self._current_step, "elapsed_time": time.mktime(time.localtime()) } - self.episode_info.append(episode_data) + self._episode_info.append(episode_data) - def _infid(self, params=None): + def _infid(self, args): """ The agent performs a step, then calculate infidelity to be minimized of the current state against the target state. """ X = self._solver.run( - self.state, [0.0, self.step_duration], args={"p": params} + self._state, [0.0, self._step_duration], args=args ).final_state - self.state = X + self._state = X if self._fid_type == "TRACEDIFF": diff = X - self._target @@ -177,20 +185,18 @@ def step(self, action): Perform a single time step in the environment, applying the scaled action (control pulse) chosen by the RL agent. Updates the system's state and computes the reward. """ - alphas = [((action[i] + 1) / 2 * (self.ubound[0] - self.lbound[0])) + self.lbound[0] for i in range(len(action))] - - for i, value in enumerate(alphas): - self.args[f"alpha{i+1}"] = value + alphas = [((action[i] + 1) / 2 * (self._ubound[0] - self._lbound[0])) + self._lbound[0] for i in range(len(action))] - infidelity = self._infid() + args = {f"alpha{i+1}": value for i, value in enumerate(alphas)} + _infidelity = self._infid(args) - self.current_step += 1 - self.temp_actions.append(alphas) - self._result.infidelity = infidelity - reward = (1 - infidelity) - self._step_penalty + self._current_step += 1 + self._temp_actions.append(alphas) + self._result.infidelity = _infidelity + reward = (1 - _infidelity) - self._step_penalty - self.terminated = infidelity <= self._fid_err_targ # the episode ended reaching the goal - self.truncated = self.current_step >= self.max_steps # if the episode ended without reaching the goal + self.terminated = _infidelity <= self._fid_err_targ # the episode ended reaching the goal + self.truncated = self._current_step >= self.max_steps # if the episode ended without reaching the goal observation = self._get_obs() return observation, reward, bool(self.terminated), bool(self.truncated), {} @@ -200,7 +206,7 @@ def _get_obs(self): Get the current state observation for the RL agent. Converts the system's quantum state or matrix into a real-valued NumPy array suitable for RL algorithms. """ - rho = self.state.full().flatten() + rho = self._state.full().flatten() obs = np.concatenate((np.real(rho), np.imag(rho))) return obs.astype(np.float32) # Gymnasium expects the observation to be of type float32 @@ -208,18 +214,18 @@ def reset(self, seed=None): """ Reset the environment to the initial state, preparing for a new episode. """ - self.save_episode_info() + self._save_episode_info() - time_diff = self.episode_info[-1]["elapsed_time"] - (self.episode_info[-2]["elapsed_time"] if len(self.episode_info) > 1 else time.mktime(self._result.start_local_time)) + time_diff = self._episode_info[-1]["elapsed_time"] - (self._episode_info[-2]["elapsed_time"] if len(self._episode_info) > 1 else time.mktime(self._result.start_local_time)) self._result.iter_seconds.append(time_diff) - self.current_step = 0 # Reset the step counter + self._current_step = 0 # Reset the step counter self.current_episode += 1 # Increment episode counter - self.actions = self.temp_actions.copy() + self._actions = self._temp_actions.copy() self.terminated = False self.truncated = False - self.temp_actions = [] - self._result._final_states = [self.state] - self.state = self._initial + self._temp_actions = [] + self._result._final_states = [self._state] + self._state = self._initial return self._get_obs(), {} def result(self): @@ -229,8 +235,8 @@ def result(self): """ self._result.end_local_time = time.localtime() self._result.n_iters = len(self._result.iter_seconds) - self._result.optimized_params = self.actions.copy() + [self._result.total_seconds] # If var_time is True, the last parameter is the evolution time - self._result._optimized_controls = self.actions.copy() + self._result.optimized_params = self._actions.copy() + [self._result.total_seconds] # If var_time is True, the last parameter is the evolution time + self._result._optimized_controls = self._actions.copy() self._result.start_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.start_local_time) # Convert to a string self._result.end_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.end_local_time) # Convert to a string self._result._guess_controls = [] @@ -252,7 +258,7 @@ def train(self): stop_callback = EarlyStopTraining(verbose=1) # Train the model - model.learn(total_timesteps = self.total_timesteps, callback=stop_callback) + model.learn(total_timesteps = self._total_timesteps, callback=stop_callback) class EarlyStopTraining(BaseCallback): """ @@ -267,33 +273,20 @@ def _on_step(self) -> bool: This method is required by the BaseCallback class. We use it to stop the training. - Stop training if the maximum number of episodes is reached. - Stop training if it finds an episode with infidelity <= than target infidelity + - If all of the last 100 episodes have infidelity below the target and use the same number of steps, stop training. """ env = self.training_env.envs[0].unwrapped # Check if we need to stop training - if self.stop_train: - return False # Stop training - elif env.current_episode >= env.max_episodes: + if env.current_episode >= env.max_episodes: env._result.message = f"Reached {env.max_episodes} episodes, stopping training." return False # Stop training elif (env._result.infidelity <= env._fid_err_targ) and not(env.shorter_pulses): env._result.message = f"Stop training because an episode with infidelity <= target infidelity was found" return False # Stop training - return True # Continue training - - def _on_rollout_start(self) -> None: - """ - This method is called before the rollout starts (before collecting new samples). - Checks: - - If all of the last 100 episodes have infidelity below the target and use the same number of steps, stop training. - """ - #could be moved to on_step - - env = self.training_env.envs[0].unwrapped - #Only if specified in alg_kwargs, the algorithm will search for shorter pulses, resulting in episodes with fewer steps. - if env.shorter_pulses: - if len(env.episode_info) >= 100: - last_100_episodes = env.episode_info[-100:] + elif env.shorter_pulses: + if len(env._episode_info) >= 100: + last_100_episodes = env._episode_info[-100:] min_steps = min(info['steps_used'] for info in last_100_episodes) steps_condition = all(ep['steps_used'] == min_steps for ep in last_100_episodes) @@ -301,4 +294,5 @@ def _on_rollout_start(self) -> None: if steps_condition and infid_condition: env._result.message = "Training finished. No episode in the last 100 used fewer steps and infidelity was below target infid." - self.stop_train = True # Stop training \ No newline at end of file + return False # Stop training + return True # Continue training \ No newline at end of file diff --git a/src/qutip_qoc/pulse_optim.py b/src/qutip_qoc/pulse_optim.py index 6964b9a..8a1f4dc 100644 --- a/src/qutip_qoc/pulse_optim.py +++ b/src/qutip_qoc/pulse_optim.py @@ -41,16 +41,17 @@ def optimize_pulses( control_id : dict - guess: ndarray, shape (n,) + For RL you don't need to specify the guess. Initial guess. Array of real elements of size (n,), where ``n`` is the number of independent variables. - bounds : sequence, optional - For RL you don't need to specify the guess. Sequence of ``(min, max)`` pairs for each element in `guess`. None is used to specify no bound. __time__ : dict, optional - Only supported by GOAT and JOPT. + Only supported by GOAT, JOPT and RL. + For RL the values of guess and bounds are not relevant. If given the pulse duration is treated as optimization parameter. It must specify both: @@ -84,11 +85,6 @@ def optimize_pulses( Global steps default to 0 (no global optimization). Can be overridden by specifying in minimizer_kwargs. - - shorter_pulses : bool, optional - If set to True, allows the algorithm to search for shorter control - pulses that can achieve the desired fidelity target using fewer steps. - By default, it is set to False, only attempting to reach the target infidelity. - Algorithm specific keywords for GRAPE,CRAB can be found in :func:`qutip_qtrl.pulseoptim.optimize_pulse`. diff --git a/tests/test_result.py b/tests/test_result.py index 1ffa3df..b74a211 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -168,14 +168,17 @@ def sin_z_jax(t, r, **kwargs): state2state_rl = Case( objectives=[Objective(initial, H, target)], control_parameters = { - "p": {"bounds": [(-13, 13)],} + "p": {"bounds": [(-13, 13)]}, + "__time__": { + "guess": np.array([0.0]), #dummy value + "bounds": [(0.0, 0.0)] #dummy value + } }, tlist=np.linspace(0, 10, 100), algorithm_kwargs={ "fid_err_targ": 0.01, "alg": "RL", - "max_iter": 300, - "shorter_pulses": True, + "max_iter": 20000, }, optimizer_kwargs={} ) From 3889a63d688e11a67ce69b382a1049725e79972f Mon Sep 17 00:00:00 2001 From: LegionAtol Date: Mon, 30 Sep 2024 17:17:00 +0200 Subject: [PATCH 13/15] added _backup_result to keep track of the result even if the algorithm continues to search for solutions with shorter pulses --- src/qutip_qoc/_rl.py | 64 ++++++++++++++++++++++++++++++++++---------- tests/test_result.py | 12 +++++++++ 2 files changed, 62 insertions(+), 14 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index 66b8474..3bdb1fc 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -88,7 +88,19 @@ def create_pulse_func(idx): n_iters = 0, # Number of iterations(episodes) until convergence iter_seconds = [], # list containing the time taken for each iteration(episode) of the optimization var_time = True, # Whether the optimization was performed with variable time + guess_params=[] ) + + self._backup_result = Result( # used as a backup in case the algorithm with shorter_pulses does not find an episode with infid bool: """ @@ -279,12 +308,18 @@ def _on_step(self) -> bool: # Check if we need to stop training if env.current_episode >= env.max_episodes: - env._result.message = f"Reached {env.max_episodes} episodes, stopping training." + if env._use_backup_result == True: + env._backup_result.message = f"Reached {env.max_episodes} episodes, stopping training. Return the last founded episode with infid < target_infid" + else: + env._result.message = f"Reached {env.max_episodes} episodes, stopping training." return False # Stop training elif (env._result.infidelity <= env._fid_err_targ) and not(env.shorter_pulses): env._result.message = f"Stop training because an episode with infidelity <= target infidelity was found" return False # Stop training elif env.shorter_pulses: + if(env._result.infidelity <= env._fid_err_targ): # if it finds an episode with infidelity lower than target infidelity, I'll save it in the meantime + env._use_backup_result = True + env._save_result() if len(env._episode_info) >= 100: last_100_episodes = env._episode_info[-100:] @@ -293,6 +328,7 @@ def _on_step(self) -> bool: infid_condition = all(ep['final_infidelity'] <= env._fid_err_targ for ep in last_100_episodes) if steps_condition and infid_condition: + env._use_backup_result = False env._result.message = "Training finished. No episode in the last 100 used fewer steps and infidelity was below target infid." return False # Stop training return True # Continue training \ No newline at end of file diff --git a/tests/test_result.py b/tests/test_result.py index b74a211..9bb5496 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -190,6 +190,18 @@ def sin_z_jax(t, r, **kwargs): unitary_rl = state2state_rl._replace( objectives=[Objective(initial, H, target)], + control_parameters = { + "p": {"bounds": [(-13, 13)]}, + "__time__": { + "guess": np.array([0.0]), #dummy value + "bounds": [(0.0, 0.0)] #dummy value + } + }, + algorithm_kwargs={ + "fid_err_targ": 0.01, + "alg": "RL", + "max_iter": 300, + }, ) From 8fe527ba5affa1a544546fb526cb91560b7ed418 Mon Sep 17 00:00:00 2001 From: flowerthrower Date: Wed, 2 Oct 2024 11:25:51 +0200 Subject: [PATCH 14/15] back to 'shorter_pulses' --- src/qutip_qoc/_rl.py | 2 +- src/qutip_qoc/pulse_optim.py | 3 +-- tests/test_result.py | 10 ++-------- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index 3bdb1fc..cdcaa01 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -65,7 +65,7 @@ def create_pulse_func(idx): self._H_lst.append([Hc, create_pulse_func(i)]) self._H = qt.QobjEvo(self._H_lst, args=dummy_args) - self.shorter_pulses = False if time_options == {} else True # lengthen the training to look for pulses of shorter duration, therefore episodes with fewer steps + self.shorter_pulses = alg_kwargs.get("shorter_pulses", False) # lengthen the training to look for pulses of shorter duration, therefore episodes with fewer steps # extract bounds for control_parameters bounds = [] diff --git a/src/qutip_qoc/pulse_optim.py b/src/qutip_qoc/pulse_optim.py index 8a1f4dc..d320799 100644 --- a/src/qutip_qoc/pulse_optim.py +++ b/src/qutip_qoc/pulse_optim.py @@ -50,8 +50,7 @@ def optimize_pulses( `guess`. None is used to specify no bound. __time__ : dict, optional - Only supported by GOAT, JOPT and RL. - For RL the values of guess and bounds are not relevant. + Only supported by GOAT, JOPT (for RL use `algorithm_kwargs: 'shorter_pulses'`). If given the pulse duration is treated as optimization parameter. It must specify both: diff --git a/tests/test_result.py b/tests/test_result.py index 9bb5496..7b9cda3 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -169,16 +169,13 @@ def sin_z_jax(t, r, **kwargs): objectives=[Objective(initial, H, target)], control_parameters = { "p": {"bounds": [(-13, 13)]}, - "__time__": { - "guess": np.array([0.0]), #dummy value - "bounds": [(0.0, 0.0)] #dummy value - } }, tlist=np.linspace(0, 10, 100), algorithm_kwargs={ "fid_err_targ": 0.01, "alg": "RL", "max_iter": 20000, + "shorter_pulses": True, }, optimizer_kwargs={} ) @@ -192,15 +189,12 @@ def sin_z_jax(t, r, **kwargs): objectives=[Objective(initial, H, target)], control_parameters = { "p": {"bounds": [(-13, 13)]}, - "__time__": { - "guess": np.array([0.0]), #dummy value - "bounds": [(0.0, 0.0)] #dummy value - } }, algorithm_kwargs={ "fid_err_targ": 0.01, "alg": "RL", "max_iter": 300, + "shorter_pulses": True, }, ) From 604b12e2f827d82f8823cc4c141cd0e247ea27d1 Mon Sep 17 00:00:00 2001 From: flowerthrower Date: Wed, 2 Oct 2024 11:37:49 +0200 Subject: [PATCH 15/15] pre-commit formating --- requirements.txt | 2 +- src/qutip_qoc/_rl.py | 227 +++++++++++++++++++++-------------- src/qutip_qoc/pulse_optim.py | 2 +- tests/test_result.py | 18 +-- 4 files changed, 150 insertions(+), 99 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9687463..50da21d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,4 @@ qutip-qtrl qutip-jax pre-commit gymnasium>=0.29.1 -stable-baselines3>=2.3.2 \ No newline at end of file +stable-baselines3>=2.3.2 diff --git a/src/qutip_qoc/_rl.py b/src/qutip_qoc/_rl.py index cdcaa01..9a75a55 100644 --- a/src/qutip_qoc/_rl.py +++ b/src/qutip_qoc/_rl.py @@ -1,10 +1,10 @@ """ -This module contains functions that implement quantum optimal control -using reinforcement learning (RL) techniques, allowing for the optimization +This module contains functions that implement quantum optimal control +using reinforcement learning (RL) techniques, allowing for the optimization of control pulse sequences in quantum systems. """ import qutip as qt -from qutip import Qobj, QobjEvo +from qutip import Qobj from qutip_qoc import Result import numpy as np @@ -17,12 +17,13 @@ import time + class _RL(gym.Env): """ - Class for storing a control problem and implementing quantum optimal - control using reinforcement learning. This class defines a custom - Gym environment that models the dynamics of quantum systems - under various control pulses, and uses RL algorithms to optimize the + Class for storing a control problem and implementing quantum optimal + control using reinforcement learning. This class defines a custom + Gym environment that models the dynamics of quantum systems + under various control pulses, and uses RL algorithms to optimize the parameters of these pulses. """ @@ -39,33 +40,37 @@ def __init__( qtrl_optimizers, ): """ - Initialize the reinforcement learning environment for quantum - optimal control. Sets up the system Hamiltonian, control parameters, + Initialize the reinforcement learning environment for quantum + optimal control. Sets up the system Hamiltonian, control parameters, and defines the observation and action spaces for the RL agent. """ - super(_RL,self).__init__() - + super(_RL, self).__init__() + self._Hd_lst, self._Hc_lst = [], [] for objective in objectives: # extract drift and control Hamiltonians from the objective self._Hd_lst.append(objective.H[0]) - self._Hc_lst.append([H[0] if isinstance(H, list) else H for H in objective.H[1:]]) + self._Hc_lst.append( + [H[0] if isinstance(H, list) else H for H in objective.H[1:]] + ) def create_pulse_func(idx): """ Create a control pulse lambda function for a given index. """ - return lambda t, args: self._pulse(t, args, idx+1) - - # create the QobjEvo with Hd, Hc and controls(args) + return lambda t, args: self._pulse(t, args, idx + 1) + + # create the QobjEvo with Hd, Hc and controls(args) self._H_lst = [self._Hd_lst[0]] - dummy_args = {f'alpha{i+1}': 1.0 for i in range(len(self._Hc_lst[0]))} + dummy_args = {f"alpha{i+1}": 1.0 for i in range(len(self._Hc_lst[0]))} for i, Hc in enumerate(self._Hc_lst[0]): self._H_lst.append([Hc, create_pulse_func(i)]) self._H = qt.QobjEvo(self._H_lst, args=dummy_args) - self.shorter_pulses = alg_kwargs.get("shorter_pulses", False) # lengthen the training to look for pulses of shorter duration, therefore episodes with fewer steps + self.shorter_pulses = alg_kwargs.get( + "shorter_pulses", False + ) # lengthen the training to look for pulses of shorter duration, therefore episodes with fewer steps # extract bounds for control_parameters bounds = [] @@ -82,27 +87,29 @@ def create_pulse_func(idx): self._dim = self._initial.shape[0] self._result = Result( - objectives = objectives, - time_interval = time_interval, - start_local_time = time.localtime(), # initial optimization time - n_iters = 0, # Number of iterations(episodes) until convergence - iter_seconds = [], # list containing the time taken for each iteration(episode) of the optimization - var_time = True, # Whether the optimization was performed with variable time - guess_params=[] + objectives=objectives, + time_interval=time_interval, + start_local_time=time.localtime(), # initial optimization time + n_iters=0, # Number of iterations(episodes) until convergence + iter_seconds=[], # list containing the time taken for each iteration(episode) of the optimization + var_time=True, # Whether the optimization was performed with variable time + guess_params=[], + ) + + self._backup_result = Result( # used as a backup in case the algorithm with shorter_pulses does not find an episode with infid= self.max_steps # if the episode ended without reaching the goal + self.terminated = ( + _infidelity <= self._fid_err_targ + ) # the episode ended reaching the goal + self.truncated = ( + self._current_step >= self.max_steps + ) # if the episode ended without reaching the goal observation = self._get_obs() return observation, reward, bool(self.terminated), bool(self.truncated), {} def _get_obs(self): """ - Get the current state observation for the RL agent. Converts the system's + Get the current state observation for the RL agent. Converts the system's quantum state or matrix into a real-valued NumPy array suitable for RL algorithms. """ rho = self._state.full().flatten() obs = np.concatenate((np.real(rho), np.imag(rho))) - return obs.astype(np.float32) # Gymnasium expects the observation to be of type float32 - + return obs.astype( + np.float32 + ) # Gymnasium expects the observation to be of type float32 + def reset(self, seed=None): """ Reset the environment to the initial state, preparing for a new episode. """ self._save_episode_info() - time_diff = self._episode_info[-1]["elapsed_time"] - (self._episode_info[-2]["elapsed_time"] if len(self._episode_info) > 1 else time.mktime(self._result.start_local_time)) + time_diff = self._episode_info[-1]["elapsed_time"] - ( + self._episode_info[-2]["elapsed_time"] + if len(self._episode_info) > 1 + else time.mktime(self._result.start_local_time) + ) self._result.iter_seconds.append(time_diff) - self._current_step = 0 # Reset the step counter - self.current_episode += 1 # Increment episode counter + self._current_step = 0 # Reset the step counter + self.current_episode += 1 # Increment episode counter self._actions = self._temp_actions.copy() self.terminated = False self.truncated = False @@ -239,61 +268,74 @@ def reset(self, seed=None): self._result._final_states = [self._state] self._state = self._initial return self._get_obs(), {} - + def _save_result(self): """ - Save the results of the optimization process, including the optimized + Save the results of the optimization process, including the optimized pulse sequences, final states, and performance metrics. """ result_obj = self._backup_result if self._use_backup_result else self._result - if(self._use_backup_result): + if self._use_backup_result: self._backup_result.iter_seconds = self._result.iter_seconds.copy() self._backup_result._final_states = self._result._final_states.copy() self._backup_result.infidelity = self._result.infidelity result_obj.end_local_time = time.localtime() - result_obj.n_iters = len(self._result.iter_seconds) - result_obj.optimized_params = self._actions.copy() + [self._result.total_seconds] # If var_time is True, the last parameter is the evolution time + result_obj.n_iters = len(self._result.iter_seconds) + result_obj.optimized_params = self._actions.copy() + [ + self._result.total_seconds + ] # If var_time is True, the last parameter is the evolution time result_obj._optimized_controls = self._actions.copy() result_obj._guess_controls = [] result_obj._optimized_H = [self._H] - def result(self): """ Final conversions and return of optimization results - """ + """ if self._use_backup_result: - self._backup_result.start_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._backup_result.start_local_time) # Convert to a string - self._backup_result.end_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._backup_result.end_local_time) # Convert to a string + self._backup_result.start_local_time = time.strftime( + "%Y-%m-%d %H:%M:%S", self._backup_result.start_local_time + ) # Convert to a string + self._backup_result.end_local_time = time.strftime( + "%Y-%m-%d %H:%M:%S", self._backup_result.end_local_time + ) # Convert to a string return self._backup_result else: self._save_result() - self._result.start_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.start_local_time) # Convert to a string - self._result.end_local_time = time.strftime("%Y-%m-%d %H:%M:%S", self._result.end_local_time) # Convert to a string + self._result.start_local_time = time.strftime( + "%Y-%m-%d %H:%M:%S", self._result.start_local_time + ) # Convert to a string + self._result.end_local_time = time.strftime( + "%Y-%m-%d %H:%M:%S", self._result.end_local_time + ) # Convert to a string return self._result def train(self): """ - Train the RL agent on the defined quantum control problem using the specified + Train the RL agent on the defined quantum control problem using the specified reinforcement learning algorithm. Checks environment compatibility with Gym API. """ # Check if the environment follows Gym API check_env(self, warn=True) # Create the model - model = PPO('MlpPolicy', self, verbose=1) # verbose = 1 to display training progress and statistics in the terminal - + model = PPO( + "MlpPolicy", self, verbose=1 + ) # verbose = 1 to display training progress and statistics in the terminal + stop_callback = EarlyStopTraining(verbose=1) - + # Train the model - model.learn(total_timesteps = self._total_timesteps, callback=stop_callback) + model.learn(total_timesteps=self._total_timesteps, callback=stop_callback) + class EarlyStopTraining(BaseCallback): """ A callback to stop training based on specific conditions (steps, infidelity, max iterations) """ + def __init__(self, verbose: int = 0): super(EarlyStopTraining, self).__init__(verbose) @@ -304,31 +346,40 @@ def _on_step(self) -> bool: - Stop training if it finds an episode with infidelity <= than target infidelity - If all of the last 100 episodes have infidelity below the target and use the same number of steps, stop training. """ - env = self.training_env.envs[0].unwrapped + env = self.training_env.get_attr("unwrapped")[0] # Check if we need to stop training if env.current_episode >= env.max_episodes: - if env._use_backup_result == True: + if env._use_backup_result is True: env._backup_result.message = f"Reached {env.max_episodes} episodes, stopping training. Return the last founded episode with infid < target_infid" else: - env._result.message = f"Reached {env.max_episodes} episodes, stopping training." - return False # Stop training - elif (env._result.infidelity <= env._fid_err_targ) and not(env.shorter_pulses): - env._result.message = f"Stop training because an episode with infidelity <= target infidelity was found" + env._result.message = ( + f"Reached {env.max_episodes} episodes, stopping training." + ) + return False # Stop training + elif (env._result.infidelity <= env._fid_err_targ) and not (env.shorter_pulses): + env._result.message = "Stop training because an episode with infidelity <= target infidelity was found" return False # Stop training elif env.shorter_pulses: - if(env._result.infidelity <= env._fid_err_targ): # if it finds an episode with infidelity lower than target infidelity, I'll save it in the meantime + if ( + env._result.infidelity <= env._fid_err_targ + ): # if it finds an episode with infidelity lower than target infidelity, I'll save it in the meantime env._use_backup_result = True env._save_result() if len(env._episode_info) >= 100: last_100_episodes = env._episode_info[-100:] - min_steps = min(info['steps_used'] for info in last_100_episodes) - steps_condition = all(ep['steps_used'] == min_steps for ep in last_100_episodes) - infid_condition = all(ep['final_infidelity'] <= env._fid_err_targ for ep in last_100_episodes) + min_steps = min(info["steps_used"] for info in last_100_episodes) + steps_condition = all( + ep["steps_used"] == min_steps for ep in last_100_episodes + ) + infid_condition = all( + ep["final_infidelity"] <= env._fid_err_targ + for ep in last_100_episodes + ) if steps_condition and infid_condition: env._use_backup_result = False env._result.message = "Training finished. No episode in the last 100 used fewer steps and infidelity was below target infid." return False # Stop training - return True # Continue training \ No newline at end of file + return True # Continue training diff --git a/src/qutip_qoc/pulse_optim.py b/src/qutip_qoc/pulse_optim.py index d320799..9e8ecc3 100644 --- a/src/qutip_qoc/pulse_optim.py +++ b/src/qutip_qoc/pulse_optim.py @@ -79,7 +79,7 @@ def optimize_pulses( - max_iter : int, optional Maximum number of iterations to perform. - Referes to local minimizer steps or in the context of + Referes to local minimizer steps or in the context of `alg: "RL"` to the max. number of episodes. Global steps default to 0 (no global optimization). Can be overridden by specifying in minimizer_kwargs. diff --git a/tests/test_result.py b/tests/test_result.py index 7b9cda3..7146b04 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -156,18 +156,18 @@ def sin_z_jax(t, r, **kwargs): # state to state transfer initial = qt.basis(2, 0) -target = (qt.basis(2, 0) + qt.basis(2, 1)).unit() # |+⟩ +target = (qt.basis(2, 0) + qt.basis(2, 1)).unit() # |+⟩ -H_c = [qt.sigmax(), qt.sigmay(), qt.sigmaz()] # control Hamiltonians +H_c = [qt.sigmax(), qt.sigmay(), qt.sigmaz()] # control Hamiltonians w, d, y = 0.1, 1.0, 0.1 -H_d = 1 / 2 * (w * qt.sigmaz() + d * qt.sigmax()) # drift Hamiltonian +H_d = 1 / 2 * (w * qt.sigmaz() + d * qt.sigmax()) # drift Hamiltonian -H = [H_d] + H_c # total Hamiltonian +H = [H_d] + H_c # total Hamiltonian state2state_rl = Case( objectives=[Objective(initial, H, target)], - control_parameters = { + control_parameters={ "p": {"bounds": [(-13, 13)]}, }, tlist=np.linspace(0, 10, 100), @@ -177,17 +177,17 @@ def sin_z_jax(t, r, **kwargs): "max_iter": 20000, "shorter_pulses": True, }, - optimizer_kwargs={} + optimizer_kwargs={}, ) # no big difference for unitary evolution -initial = qt.qeye(2) # Identity -target = qt.gates.hadamard_transform() +initial = qt.qeye(2) # Identity +target = qt.gates.hadamard_transform() unitary_rl = state2state_rl._replace( objectives=[Objective(initial, H, target)], - control_parameters = { + control_parameters={ "p": {"bounds": [(-13, 13)]}, }, algorithm_kwargs={