In [1]:
from roboqueue import StationaryTarget, TargetList, Telescope, Conditions, TelescopeSchedulingEnv
from spacerocks.time import Time

import numpy as np
from tabulate import tabulate
import gymnasium as gym
from stable_baselines3 import PPO

In [2]:
telescope = Telescope(obscode='W84', filter='r')
conditions = Conditions(seeing=1, moon=7)

In [3]:
star_positions = [( 24.42813204, -57.23666007), 
                  ( 68.98000195,  16.50976164),
                  ( 78.63446353,  -8.20163919), 
                  ( 79.17206517,  45.99902927),
                  ( 88.79287161,   7.40703634), 
                  ( 95.98787763, -52.69571799),
                  (101.28854105, -16.71314306), 
                  (114.82724194,   5.22750767),
                  (186.64975585, -63.09905586), 
                  (210.95601898, -60.3729784 ),
                  (219.92041034, -60.83514707)]

# convert to radians    
star_positions = [(np.radians(ra), np.radians(dec)) for ra, dec in star_positions]

In [15]:
targets = TargetList()

for idx, star in enumerate(star_positions):
    target = StationaryTarget(name=f"star-{idx}", ra=star[0], dec=star[1], mag=1, required_snr=3_000_000)
    targets.add_target(target)


In [16]:
epoch = Time.now().epoch

In [17]:
ra, dec, alt, az, exptime, is_done = targets.at(epoch=epoch, telescope=telescope, conditions=conditions)
print(tabulate(zip(ra, dec, alt, az, exptime, is_done), headers=['RA', 'DEC', 'ALT', 'AZ', 'EXPTIME', 'IS_DONE'], tablefmt='pretty'))
sky_state = np.array([ra, dec, alt, az, exptime, is_done]).T
print(f'targets: {targets}')


+---------------------+---------------------+----------------------+--------------------+-------------------+---------+
|         RA          |         DEC         |         ALT          |         AZ         |      EXPTIME      | IS_DONE |
+---------------------+---------------------+----------------------+--------------------+-------------------+---------+
| 0.42635133420991916 | -0.9989681710662681 | 0.043972075568720265 | 3.4699839268711115 | 41189.85757682151 |  False  |
|  1.203928152059609  | 0.28814969933745876 | -0.2740825479532326  | 4.891880319043666  |   10000000000.0   |  False  |
|  1.372430294137903  | -0.1431456079261008 |  0.0826028439546877  | 4.594627245900428  | 6783.17365479463  |  False  |
|  1.381813212820024  | 0.8028345134827437  | -0.3535251277651851  | 5.449704461401602  |   10000000000.0   |  False  |
| 1.5497279618950985  | 0.12927717194787017 | 0.09816331591034762  | 4.9203331196347815 | 4480.781958636829 |  False  |
| 1.6753045066449115  | -0.9197137806279

In [18]:
# Tune the reward weights
reward_config = { 'exptime': 1, 'n_observed_targets': 1 }

# Create training environment
max_duration = 4 / 24     # [s] fix it later
start_epoch = Time.now().epoch
# def __init__(self, targets: TargetList, telescope: Telescope, conditions: Conditions, start_epoch: Time, max_duration: float, reward_config: dict):
env = TelescopeSchedulingEnv(targets, telescope, conditions, start_epoch, max_duration, reward_config)

# Reset the environment
# env.reset()


In [8]:
model = PPO("MlpPolicy", env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [9]:
# Select an action based on the current observation
model.learn(total_timesteps=25000)
model.save("roboqueue")

[INFO] Skipping target 1
[INFO] Skipping target 3
[INFO] Skipping target 1
[INFO] Skipping target 4
[INFO] Skipping target 9
[INFO] Skipping target 3
[INFO] Skipping target 3
[INFO] Skipping target 7
[INFO] Skipping target 4
[INFO] Skipping target 5
[INFO] Skipping target 10
[INFO] Skipping target 8
[INFO] Skipping target 9
[INFO] Skipping target 0
[INFO] Skipping target 10
[INFO] Skipping target 0
[INFO] Skipping target 0
[INFO] Skipping target 3
[INFO] Skipping target 3
[INFO] Skipping target 8
[INFO] Skipping target 8
[INFO] Skipping target 10
[INFO] Skipping target 8
[INFO] Skipping target 0
[INFO] Skipping target 3
[INFO] Skipping target 5
[INFO] Skipping target 8
[INFO] Skipping target 9
[INFO] Skipping target 0
[INFO] Skipping target 10
[INFO] Skipping target 2
[INFO] Skipping target 6
[INFO] Skipping target 4
[INFO] Skipping target 6
[INFO] Skipping target 4
[INFO] Skipping target 9
[INFO] Skipping target 4
[INFO] Skipping target 0
[INFO] Skipping target 0
[INFO] Skipping targe

In [13]:
env.reset()

array([[ 4.26351334e-01, -9.98968171e-01,  9.82796509e-01,
         2.65606710e+00,  4.36249175e+02,  0.00000000e+00],
       [ 1.20392815e+00,  2.88149699e-01,  9.90304160e-02,
         1.17327391e+00,  4.39200922e+03,  0.00000000e+00],
       [ 1.37243029e+00, -1.43145608e-01,  1.79981382e-01,
         1.63321178e+00,  1.45632104e+03,  0.00000000e+00],
       [ 1.38181321e+00,  8.02834513e-01, -2.93938365e-01,
         8.05062561e-01,  1.00000000e+10,  0.00000000e+00],
       [ 1.54972796e+00,  1.29277172e-01, -1.08750268e-01,
         1.48398022e+00,  1.00000000e+10,  0.00000000e+00],
       [ 1.67530451e+00, -9.19713781e-01,  3.10333953e-01,
         2.46451827e+00,  7.81078609e+02,  0.00000000e+00],
       [ 1.76781854e+00, -2.91699375e-01, -7.70529024e-02,
         1.95804909e+00,  1.00000000e+10,  0.00000000e+00],
       [ 2.00411344e+00,  9.12372205e-02, -4.81764585e-01,
         1.75501910e+00,  1.00000000e+10,  0.00000000e+00],
       [ 3.25765279e+00, -1.10128628e+00,  6.108

In [19]:
done = False
while not done:
    # Take a step in the environment using the selected action
    # print("new step")
    action, _states = model.predict(env.sky_state)
    observation, reward, done, info = env.step(action)
    print(action, env.epoch)
    
    # print(f'observations: {observation}')
print(env.epoch - epoch)

9 2460386.68156754
6 2460386.6883293567
[INFO] Skipping target 6
6 2460386.689023801
7 2460386.6961193536
[INFO] Skipping target 7
7 2460386.696813798
[INFO] Skipping target 2
2 2460386.6975082424
[INFO] Skipping target 7
7 2460386.698202687
8 2460386.7032545633
[INFO] Skipping target 6
6 2460386.7039490077
[INFO] Skipping target 8
8 2460386.704643452
[INFO] Skipping target 1
1 2460386.7053378965
[INFO] Skipping target 1
1 2460386.706032341
[INFO] Skipping target 6
6 2460386.7067267853
[INFO] Skipping target 8
8 2460386.7074212297
[INFO] Skipping target 4
4 2460386.708115674
5 2460386.7147810096
10 2460386.719974618
[INFO] Skipping target 0
0 2460386.7206690623
[INFO] Skipping target 3
3 2460386.7213635067
[INFO] Skipping target 5
5 2460386.722057951
[INFO] Skipping target 2
2 2460386.7227523956
[INFO] Skipping target 4
4 2460386.72344684
[INFO] Skipping target 0
0 2460386.7241412844
[INFO] Skipping target 3
3 2460386.7248357288
11 2460386.724847303
[INFO] Skipping target 3
3 2460386.7

In [12]:
reward

-6330.058501507574

In [None]:
# pip install shimmy