# Satellite Environment with OpenAI Gym

In [232]:
from gym import Env
from gym.spaces import *
import numpy as np
import random

In [233]:
"""
State of satellite
P: power of satellite
S1: memory in sensor 1
S2: memory in sensor 2
S3: memory in sensor 3
E: electronics of satellite
O: orbital motion of satellite
T: time of the day-> ionosphere behavior

State Space:
S = (P, S1, S2, S3, E, O, T)
P = {100, 99, 98, …, 3, 2, 1, 0}
S1 = {100, 99, 98, …, 3, 2, 1, 0}
S2 = {100, 99, 98, …, 3, 2, 1, 0}
S3 = {100, 99, 98, …, 3, 2, 1, 0}
E = {0, 1} not working / working
O = {0, 1} not in range / in range
T = {0, 1, 2, 3, 4} night, dawn, morning, afternoon, dusk

Action Space:
A = {transmit, not transmit} = {1, 0}

Step Function:
*If a1= transmit = 1, then:
    P(state=s) transitions to a lower value P(state=s’) ---> power discharges
    Ex: P=100 -> a1 (transmit) -> P=95
    S1(state=s) transitions to a lower value S1(state=s’) ---> free memory in sensor 1 decreases (data transferred)
    Ex: S1=100 -> a1 (transmit) -> S1=96.5

*If a1= not transmit = 0, then:
    P(state=s) transitions to a higher value P(state=s’) ---> power charges
    Ex: P=70 -> a1 (not transmit) -> P=73
    S1(state=s) remains the same S1(state=s’) ---> free memory in sensor 1 constant (no data transferred)
    Ex: S1=70 -> a1 (not transmit) -> S1=70

Reward:
*If a1=1 (transmit), then:    ---> For each transmission we get reward
    reward +=5 
*If P(state=s’)>=30, then:    ---> Rewards for maintaining power more than 30%
    reward +=2 
*If we transmit all the data within the time satellite is within range then:
    reward += 5*(remaining time for satellite in range)  --->More rewards for quicker transmission

Termination:
An action can occur every minute for an hour (when the satellite is within range) or until the sensor memory is full i.e. terminate if:
*S1(state=s) = 0 --->Transmission complete
*O(state=s) = 0  --->Satellite out of range

"""

class SatelliteEnv(Env):
    def __init__(self):
        # A = {transmit, not transmit} = {1, 0}
        self.action_space = Discrete(2)
        # S = (P, S1, S2, S3, E, O, T)
        #Note currently only using P,S1,O
        self.observation_space = Tuple((Box(0, 100, shape=(4,)),MultiDiscrete([ 2, 2, 5 ])))
        # Set start states
        self.state = np.zeros(7)
        for i in range(7): 
            if i<4:
                self.state[i] = 100
            elif i<6:
                self.state[i] = 1
            else:
                self.state[i] = 2
        
        #Transmission Time 60 mins
        self.transmission_time = 60
        
    def step(self, action):
        #A = {transmit, not transmit} = {1, 0}
        #Update States

        if self.state[0]>=5: #no action 1 if P < 5 
            if action==1:
                self.state[0] += -5   #discharging
                self.state[1] += -3.5 #update free memory
        if action==0:
            self.state[0] += +3 #charging
            
        #Check States within bounds
        for i in range(2):
            if self.state[i]>100:
                self.state[i] = 100  
            elif self.state[i]<0:
                self.state[i] = 0
             
        # Reduce transmission time by 1 minute
        self.transmission_time -= 1
        
        # Calculate reward
        reward = 0
        if self.state[0]>=30:
            reward += 2
        else: 
            reward += 0
        if action==1:
            reward += 5
        if self.state[1]==0:
            reward += (60 - self.transmission_time)*5
        
        # Check if transmission time is done
        if self.transmission_time <= 0:
            self.state[5] = 0
    
        if 0 in self.state[[1,5]]:
            done = True
        else:
            done = False
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        # Reset states
        self.state = np.zeros(7)
        for i in range(7): 
            if i<4:
                self.state[i] = 100 #random.randint(90,100)
            elif i<6:
                self.state[i] = 1
            else:
                self.state[i] = 2
        # Reset time
        self.transmission_time = 60 
        return self.state
    

In [234]:
env = SatelliteEnv()

In [235]:
env.observation_space.sample()

(array([26.054426 ,  5.2657714, 93.55094  , 61.757233 ], dtype=float32),
 array([0, 1, 1], dtype=int64))

In [242]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    a=[]
    
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        a.append(action)
        score+=reward
    out_states = ','.join(map('{:6}'.format, n_state[[0,1,5]]))
    print('Episode:{0:3}   Score:{1:5,.1f}   Transmissions:{2:4}   Total Time:{3:4}   Final States(P,S,O):{4:5}'.format(episode,score,a.count(1),np.size(a),out_states))

Episode:  1   Score:513.0   Transmissions:  29   Total Time:  54   Final States(P,S,O):  11.0,   0.0,   1.0
Episode:  2   Score:521.0   Transmissions:  29   Total Time:  54   Final States(P,S,O):  26.0,   0.0,   1.0
Episode:  3   Score:521.0   Transmissions:  29   Total Time:  54   Final States(P,S,O):  30.0,   0.0,   1.0
Episode:  4   Score:551.0   Transmissions:  29   Total Time:  58   Final States(P,S,O):  42.0,   0.0,   1.0
Episode:  5   Score:260.0   Transmissions:  28   Total Time:  60   Final States(P,S,O):  52.0,   2.0,   0.0
Episode:  6   Score:551.0   Transmissions:  29   Total Time:  58   Final States(P,S,O):  38.0,   0.0,   1.0
Episode:  7   Score:439.0   Transmissions:  29   Total Time:  46   Final States(P,S,O):   6.0,   0.0,   1.0
Episode:  8   Score:565.0   Transmissions:  29   Total Time:  60   Final States(P,S,O):  48.0,   0.0,   0.0
Episode:  9   Score:542.0   Transmissions:  29   Total Time:  57   Final States(P,S,O):  39.0,   0.0,   1.0
Episode: 10   Score:526.0   