# 1. Test Random Environment with OpenAI Gym

In [171]:
from gym import Env
from gym.spaces import *
import numpy as np
import random

In [172]:
"""
State of satellite
P: power of satellite
S1: memory in sensor 1
S2: memory in sensor 2
S3: memory in sensor 3
E: electronics of satellite
O: orbital motion of satellite
T: time of the day-> ionosphere behavior
State Space:
S = (P, S1, S2, S3, E, O, T)
P = {100, 99, 98, …, 3, 2, 1, 0}
S1 = {100, 99, 98, …, 3, 2, 1, 0}
S2 = {100, 99, 98, …, 3, 2, 1, 0}
S3 = {100, 99, 98, …, 3, 2, 1, 0}
E = {0, 1} not working / working
O = {0, 1} not in range / in range
T = {0, 1, 2, 3, 4} night, dawn, morning, afternoon, dusk
Action Space:
A = {transmit, not transmit} = {1, 0}
Transition Function:
If a1= trasmit = 1, then P(state=s) transitions to a lower value P(state=s’), where s’< s
Ex: P=100 -> a1 (tramit) -> P=90
Reward: (in progress).
"""

class SatelliteEnv(Env):
    def __init__(self):
        # A = {transmit, not transmit} = {1, 0}
        self.action_space = Discrete(2)
        # S = (P, S1, S2, S3, E, O, T)
        self.observation_space = Tuple((Box(0, 100, shape=(4,)),MultiDiscrete([ 2, 2, 4 ])))
        # Set start states
        self.state = np.zeros(7)
        for i in range(7): 
            if i<4:
                self.state[i] = random.randint(90,100)
            elif i<6:
                self.state[i] = 1
            else:
                self.state[i] = 1
        
        #Transmission Time 60 mins
        self.transmission_time = 60 
        
    def step(self, action):
        # Apply action
        # 0  don't transmit
        # 1  transmit
        if action==1:
            self.state[0] += -5
             
        # Reduce shower length by 1 second
        self.transmission_time -= 1 
        
        # Calculate reward
        if self.state[0] >=15:
            reward = 1
        else: 
            reward = -1
        
        # Check if transmission time is done
        if self.transmission_time <= 0: 
            done = True
        else:
            done = False
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        # Reset states
        self.state = np.zeros(7)
        for i in range(7): 
            if i<4:
                self.state[i] = random.randint(90,100)
            elif i<6:
                self.state[i] = 1
            else:
                self.state[i] = 1
        # Reset time
        self.transmission_time = 60 
        return self.state
    

In [173]:
env = SatelliteEnv()

In [174]:
env.observation_space.sample()

(array([71.79943 , 91.31828 , 63.450966, 93.567566], dtype=float32),
 array([1, 0, 3], dtype=int64))

In [175]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:-4
Episode:2 Score:20
Episode:3 Score:8
Episode:4 Score:6
Episode:5 Score:-4
Episode:6 Score:-6
Episode:7 Score:14
Episode:8 Score:4
Episode:9 Score:8
Episode:10 Score:2
