In [1]:
#   ____                            _____     _____                   __          __        _        _                 
#  / __ \                     /\   |_   _|   / ____|                  \ \        / /       | |      | |                
# | |  | |_ __   ___ _ __    /  \    | |    | |  __ _   _ _ __ ___     \ \  /\  / /__  _ __| | _____| |__   ___  _ __  
# | |  | | '_ \ / _ \ '_ \  / /\ \   | |    | | |_ | | | | '_ ` _ \     \ \/  \/ / _ \| '__| |/ / __| '_ \ / _ \| '_ \ 
# | |__| | |_) |  __/ | | |/ ____ \ _| |_   | |__| | |_| | | | | | |     \  /\  / (_) | |  |   <\__ \ | | | (_) | |_) |
#  \____/| .__/ \___|_| |_/_/    \_\_____|   \_____|\__, |_| |_| |_|      \/  \/ \___/|_|  |_|\_\___/_| |_|\___/| .__/ 
#        | |                                         __/ |                                                      | |    
#        |_|                                        |___/                                                       |_|    

In [2]:
!pip install gym
#!pip install torch==1.5.0+cpu -f https://download.pytorch.org/whl/torch_stable.html



In [3]:
import json
from itertools import count

import gym
from gym import spaces

import torch
from torch import Tensor

import random
from random import randint

from typing import Dict, List, Optional, Callable, Any, Tuple, Union, Type

In [4]:
class TxWirelessModel:
  def __init__(self, n_freqs: int, horizon: int):
    self.hor = horizon  # model horizon
    self.tx_freq: Tensor = torch.randint(n_freqs, size=(horizon, ))  # random Tx frequencies w.r.t. time-step

  def rx_reward(self, t: int, freq: int):
    if t >= self.hor:  # horizon reached, always return 0
      return 0.0
    return float(self.tx_freq[t] == freq)  # float(True) = 1.0 if Tx and Rx frequencies are the same

In [5]:
class TxWirelessGym(gym.Env):
  def __init__(self, n_freqs: int, horizon: int):
    super(TxWirelessGym, self).__init__()  # initialize gym.Env base class
    self.action_space: spaces.Discrete = spaces.Discrete(n_freqs)  # action space {0, 1, ..., n_freqs - 1}
    self.observation_space: spaces.Discrete = spaces.Discrete(horizon + 1)  # observation space {0, 1, ..., horizon}
    self.hor = horizon  # gym horizon to know when we are DONE
    self.tx_freq = torch.randint(n_freqs, size=(horizon,))  # random Tx frequencies w.r.t. time-step
    self.t = 0  # initial time-step / observation 

  def step(self, action: int):  # step function to interact with the gym
    if self.t < self.hor:  # non-terminal observation, horizon not reached
      r = float(action == self.tx_freq[self.t])  # float(True) = 1.0 if Tx and Rx frequencies are the same
    else:  # gym horizon reached
      r = 0.0
    self.t += 1  # increment our time-step / observation
    o = self.t  # observation that will return
    done = (self.t == self.hor)  # is terminal gym state reached
    return o, r, done, {'a': 1}  # gyms always returns <obs, reward, if terminal obs reached, debug/info dictionary>

  def reset(self):  # reset our gym for a new episode
    self.t = 0  # initial time-step
    o = self.t  # initial observation
    return o

  def render(self, mode='human'):  # gym visual rendering (e.g. text, image, plot, 3D frame, etc.)
    a_r = []
    for freq in range(self.action_space.n):  # try every freqs
      if self.t < self.hor:
        a_r.append(float(freq == self.tx_freq[self.t]))  # float(True) = 1.0 if Tx and Rx frequencies are the same
      else:
        a_r.append(0.0)
    # print all action-reward for this time-step
    print(
      't={o:2}, tx_freq:{tx_freq}, tx_rewards:{action_rewards}'.format(
        o=self.t, tx_freq=[i for i in range(len(a_r)) if a_r[i] == 1.0],
        action_rewards=a_r
      )
    )

In [6]:
random.seed(1234)  # python random number generator seed
torch.manual_seed(1234)  # pytorch random number generator seed

n_freqs = 10  # number of frequencies
horizon = 15  # horizon of our gym (episodes)

gym_env = TxWirelessGym(n_freqs, horizon)  # our gym environment

for episode_i in count(): # training loop
  print('starting episode {episode_i}...'.format(episode_i=episode_i))
  o = gym_env.reset()  # reset gym every episode, return initial observation
  for t in count():
    gym_env.render()  # show gym rendering
    a = gym_env.action_space.sample()  # random action from action-space
    o, r, done, _ = gym_env.step(a)  # interact with the gym

    if done:
      break  # terminal gym observation reached, out of horizon

  gym_env.render()  # show gym rendering
  gym_env.step(gym_env.action_space.sample())  # interact with the gym with random action

  if episode_i >= 2:  # exit training after 2 episodes
    break

gym_env.close()  # close gym

starting episode 0...
t= 0, tx_freq:[5], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 1, tx_freq:[1], tx_rewards:[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 2, tx_freq:[6], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
t= 3, tx_freq:[5], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 4, tx_freq:[6], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
t= 5, tx_freq:[4], tx_rewards:[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 6, tx_freq:[2], tx_rewards:[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 7, tx_freq:[5], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 8, tx_freq:[5], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 9, tx_freq:[9], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
t=10, tx_freq:[3], tx_rewards:[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t=11, tx_freq:[1], tx_rewards:[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t=12, 