In [1]:
# confirm Python 3 paths and versions due to multiple environments
import sys
print(sys.path)
import pettingzoo
print("PettingZoo version:", pettingzoo.__version__)

['C:\\Users\\zhech\\AppData\\Local\\Programs\\Python\\Python310\\python310.zip', 'C:\\Users\\zhech\\AppData\\Local\\Programs\\Python\\Python310\\DLLs', 'C:\\Users\\zhech\\AppData\\Local\\Programs\\Python\\Python310\\lib', 'C:\\Users\\zhech\\AppData\\Local\\Programs\\Python\\Python310', '', 'C:\\Users\\zhech\\AppData\\Roaming\\Python\\Python310\\site-packages', 'C:\\Users\\zhech\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages', 'C:\\Program Files\\Basilisk\\bsk_rl\\src', 'C:\\Users\\zhech\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\win32', 'C:\\Users\\zhech\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\win32\\lib', 'C:\\Users\\zhech\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\Pythonwin']
PettingZoo version: 1.24.3


#### Test gym library for RL.
Set up gym with basilisk environment and all imports. Test config and library set up. Perform initil test from the official docs.

In [2]:
import gymnasium as gym
import numpy as np
from bsk_rl import act, data, obs, scene, sats
from bsk_rl.sim import dyn, fsw

from Basilisk.architecture import bskLogging
bskLogging.setDefaultLogLevel(bskLogging.BSK_WARNING)

Gymnasium API: https://github.com/Farama-Foundation/Gymnasium  
BSK-RL API: https://avslab.github.io/bsk_rl/examples/simple_environment.html

In this case the satellite is the agent. We get the custom gym environemnt which in this case is the Basilisc RL environment. Documentation test implementation:

In [3]:
class MyScanningSatellite(sats.AccessSatellite):
    observation_spec = [
        obs.SatProperties(
            dict(prop="storage_level_fraction"),
            dict(prop="battery_charge_fraction")
        ),
        obs.Eclipse(),
    ]
    action_spec = [
        act.Scan(duration=60.0),  # Scan for 1 minute
        act.Charge(duration=600.0),  # Charge for 10 minutes
    ]
    dyn_type = dyn.ContinuousImagingDynModel
    fsw_type = fsw.ContinuousImagingFSWModel

In [4]:
MyScanningSatellite.default_sat_args()

{'hs_min': 0.0,
 'maxCounterValue': 4,
 'thrMinFireTime': 0.02,
 'desatAttitude': 'sun',
 'controlAxes_B': [1, 0, 0, 0, 1, 0, 0, 0, 1],
 'thrForceSign': 1,
 'K': 7.0,
 'Ki': -1,
 'P': 35.0,
 'imageAttErrorRequirement': 0.01,
 'imageRateErrorRequirement': None,
 'inst_pHat_B': [0, 0, 1],
 'utc_init': 'this value will be set by the world model',
 'batteryStorageCapacity': 288000.0,
 'storedCharge_Init': <function bsk_rl.sim.dyn.BasicDynamicsModel.<lambda>()>,
 'disturbance_vector': None,
 'dragCoeff': 2.2,
 'imageTargetMaximumRange': -1,
 'instrumentBaudRate': 8000000.0,
 'instrumentPowerDraw': -30.0,
 'basePowerDraw': 0.0,
 'wheelSpeeds': <function bsk_rl.sim.dyn.BasicDynamicsModel.<lambda>()>,
 'maxWheelSpeed': inf,
 'u_max': 0.2,
 'rwBasePower': 0.4,
 'rwMechToElecEfficiency': 0.0,
 'rwElecToMechEfficiency': 0.5,
 'panelArea': 1.0,
 'panelEfficiency': 0.2,
 'nHat_B': array([ 0,  0, -1]),
 'mass': 330,
 'width': 1.38,
 'depth': 1.04,
 'height': 1.58,
 'sigma_init': <function bsk_rl.sim

In [5]:
sat_args = {}

# Set some parameters as constants
sat_args["imageAttErrorRequirement"] = 0.05
sat_args["dataStorageCapacity"] = 1e10
sat_args["instrumentBaudRate"] = 1e7
sat_args["storedCharge_Init"] = 50000.0

# Randomize the initial storage level on every reset
sat_args["storageInit"] = lambda: np.random.uniform(0.25, 0.75) * 1e10

# Make the satellite
sat = MyScanningSatellite(name="EO1", sat_args=sat_args)

Create the environment with scenario and reward. Reward: https://avslab.github.io/bsk_rl/api_reference/data/index.html

In [6]:
env = gym.make(
    "SatelliteTasking-v1",
    satellite=sat,
    scenario=scene.UniformNadirScanning(),
    rewarder=data.ScanningTimeReward(),
    time_limit=5700.0,  # approximately 1 orbit
    log_level="INFO",
)

[90;3m2025-01-19 09:51:25,469 [0m[mgym                            [0m[mINFO       [0m[mCalling env.reset() to get observation space[0m
[90;3m2025-01-19 09:51:25,470 [0m[mgym                            [0m[mINFO       [0m[mResetting environment with seed=2974421340[0m
[90;3m2025-01-19 09:51:25,588 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<0.00> [0m[36mEO1: [0m[mFinding opportunity windows from 0.00 to 5700.00 seconds[0m
[90;3m2025-01-19 09:51:25,602 [0m[mgym                            [0m[mINFO       [0m[33m<0.00> [0m[mEnvironment reset[0m


In [12]:
observation, info = env.reset(seed=1)

[90;3m2025-01-18 18:08:51,887 [0m[mgym                            [0m[mINFO       [0m[mResetting environment with seed=1[0m
[90;3m2025-01-18 18:08:52,115 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<0.00> [0m[36mEO1: [0m[mFinding opportunity windows from 0.00 to 5700.00 seconds[0m
[90;3m2025-01-18 18:08:52,137 [0m[mgym                            [0m[mINFO       [0m[33m<0.00> [0m[mEnvironment reset[0m
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


In [13]:
print("Initial data level:", observation[0], "(randomized by sat_args)")
for _ in range(3):
    observation, reward, terminated, truncated, info = env.step(action=0)
print("  Final data level:", observation[0])

[90;3m2025-01-18 18:09:17,925 [0m[mgym                            [0m[mINFO       [0m[33m<0.00> [0m[93;1m=== STARTING STEP ===[0m
[90;3m2025-01-18 18:09:17,926 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<0.00> [0m[36mEO1: [0m[maction_nadir_scan tasked for 60.0 seconds[0m
[90;3m2025-01-18 18:09:17,927 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<0.00> [0m[36mEO1: [0m[msetting timed terminal event at 60.0[0m
[90;3m2025-01-18 18:09:17,936 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<60.00> [0m[36mEO1: [0m[mtimed termination at 60.0 for action_nadir_scan[0m
[90;3m2025-01-18 18:09:17,937 [0m[mdata.base                      [0m[mINFO       [0m[33m<60.00> [0m[mData reward: {}[0m
[90;3m2025-01-18 18:09:17,939 [0m[mcomm.communication             [0m[mINFO       [0m[33m<60.00> [0m[mOptimizing data communication between all pairs of satellites[0m
[90;3m2025-01-18 18:09:17,940 [0

Initial data level: 0.7341307878 (randomized by sat_args)
  Final data level: 0.8241307878


In [14]:
while not truncated:
    observation, reward, terminated, truncated, info = env.step(action=1)
    print(f"Charge level: {observation[1]:.3f} ({env.unwrapped.simulator.sim_time:.1f} seconds)\n\tEclipse: start: {observation[2]:.1f} end: {observation[3]:.1f}")

[90;3m2025-01-18 18:11:18,918 [0m[mgym                            [0m[mINFO       [0m[33m<180.00> [0m[93;1m=== STARTING STEP ===[0m
[90;3m2025-01-18 18:11:18,920 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<180.00> [0m[36mEO1: [0m[maction_charge tasked for 600.0 seconds[0m
[90;3m2025-01-18 18:11:18,920 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<180.00> [0m[36mEO1: [0m[msetting timed terminal event at 780.0[0m
[90;3m2025-01-18 18:11:19,010 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<780.00> [0m[36mEO1: [0m[mtimed termination at 780.0 for action_charge[0m
[90;3m2025-01-18 18:11:19,011 [0m[mdata.base                      [0m[mINFO       [0m[33m<780.00> [0m[mData reward: {}[0m
[90;3m2025-01-18 18:11:19,012 [0m[mcomm.communication             [0m[mINFO       [0m[33m<780.00> [0m[mOptimizing data communication between all pairs of satellites[0m
[90;3m2025-01-18 18:11:19,013

Charge level: 0.339 (780.0 seconds)
	Eclipse: start: 5340.0 end: 1800.0
Charge level: 0.337 (1380.0 seconds)
	Eclipse: start: 4740.0 end: 1200.0
Charge level: 0.334 (1980.0 seconds)
	Eclipse: start: 4140.0 end: 600.0


[90;3m2025-01-18 18:11:19,215 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<2580.00> [0m[36mEO1: [0m[mtimed termination at 2580.0 for action_charge[0m
[90;3m2025-01-18 18:11:19,215 [0m[mdata.base                      [0m[mINFO       [0m[33m<2580.00> [0m[mData reward: {}[0m
[90;3m2025-01-18 18:11:19,215 [0m[mcomm.communication             [0m[mINFO       [0m[33m<2580.00> [0m[mOptimizing data communication between all pairs of satellites[0m
[90;3m2025-01-18 18:11:19,215 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<2580.00> [0m[36mEO1: [0m[mSatellite EO1 requires retasking[0m
[90;3m2025-01-18 18:11:19,238 [0m[mgym                            [0m[mINFO       [0m[33m<2580.00> [0m[mStep reward: 0.0[0m
[90;3m2025-01-18 18:11:19,240 [0m[mgym                            [0m[mINFO       [0m[33m<2580.00> [0m[93;1m=== STARTING STEP ===[0m
[90;3m2025-01-18 18:11:19,242 [0m[36msats.satellite.EO1        

Charge level: 0.354 (2580.0 seconds)
	Eclipse: start: 3540.0 end: 5670.0
Charge level: 0.942 (3180.0 seconds)
	Eclipse: start: 2940.0 end: 5070.0


[90;3m2025-01-18 18:11:19,416 [0m[mdata.base                      [0m[mINFO       [0m[33m<3780.00> [0m[mData reward: {}[0m
[90;3m2025-01-18 18:11:19,418 [0m[mcomm.communication             [0m[mINFO       [0m[33m<3780.00> [0m[mOptimizing data communication between all pairs of satellites[0m
[90;3m2025-01-18 18:11:19,419 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<3780.00> [0m[36mEO1: [0m[mSatellite EO1 requires retasking[0m
[90;3m2025-01-18 18:11:19,421 [0m[mgym                            [0m[mINFO       [0m[33m<3780.00> [0m[mStep reward: 0.0[0m
[90;3m2025-01-18 18:11:19,423 [0m[mgym                            [0m[mINFO       [0m[33m<3780.00> [0m[93;1m=== STARTING STEP ===[0m
[90;3m2025-01-18 18:11:19,425 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<3780.00> [0m[36mEO1: [0m[maction_charge tasked for 600.0 seconds[0m
[90;3m2025-01-18 18:11:19,426 [0m[36msats.satellite.EO1             [

Charge level: 1.000 (3780.0 seconds)
	Eclipse: start: 2340.0 end: 4470.0
Charge level: 1.000 (4380.0 seconds)
	Eclipse: start: 1740.0 end: 3870.0
Charge level: 1.000 (4980.0 seconds)
	Eclipse: start: 1140.0 end: 3270.0


[90;3m2025-01-18 18:11:19,675 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<5580.00> [0m[36mEO1: [0m[mtimed termination at 5580.0 for action_charge[0m
[90;3m2025-01-18 18:11:19,675 [0m[mdata.base                      [0m[mINFO       [0m[33m<5580.00> [0m[mData reward: {}[0m
[90;3m2025-01-18 18:11:19,685 [0m[mcomm.communication             [0m[mINFO       [0m[33m<5580.00> [0m[mOptimizing data communication between all pairs of satellites[0m
[90;3m2025-01-18 18:11:19,687 [0m[36msats.satellite.EO1             [0m[mINFO       [0m[33m<5580.00> [0m[36mEO1: [0m[mSatellite EO1 requires retasking[0m
[90;3m2025-01-18 18:11:19,687 [0m[mgym                            [0m[mINFO       [0m[33m<5580.00> [0m[mStep reward: 0.0[0m
[90;3m2025-01-18 18:11:19,687 [0m[mgym                            [0m[mINFO       [0m[33m<5580.00> [0m[93;1m=== STARTING STEP ===[0m
[90;3m2025-01-18 18:11:19,687 [0m[36msats.satellite.EO1        

Charge level: 1.000 (5580.0 seconds)
	Eclipse: start: 540.0 end: 2670.0
Charge level: 1.000 (5700.0 seconds)
	Eclipse: start: 420.0 end: 2550.0
