In [1]:
#   ____                            _____     _____                   __          __        _        _                 
#  / __ \                     /\   |_   _|   / ____|                  \ \        / /       | |      | |                
# | |  | |_ __   ___ _ __    /  \    | |    | |  __ _   _ _ __ ___     \ \  /\  / /__  _ __| | _____| |__   ___  _ __  
# | |  | | '_ \ / _ \ '_ \  / /\ \   | |    | | |_ | | | | '_ ` _ \     \ \/  \/ / _ \| '__| |/ / __| '_ \ / _ \| '_ \ 
# | |__| | |_) |  __/ | | |/ ____ \ _| |_   | |__| | |_| | | | | | |     \  /\  / (_) | |  |   <\__ \ | | | (_) | |_) |
#  \____/| .__/ \___|_| |_/_/    \_\_____|   \_____|\__, |_| |_| |_|      \/  \/ \___/|_|  |_|\_\___/_| |_|\___/| .__/ 
#        | |                                         __/ |                                                      | |    
#        |_|                                        |___/                                                       |_|    

Setting up the environment with everything we need

In [2]:
!pip install gym
#!pip install torch==1.5.0+cpu -f https://download.pytorch.org/whl/torch_stable.html



we import the python packages that we need

In [3]:
import random
from random import randint

import torch
from torch import Tensor
from typing import Dict, List

first let's make a very simple model as a classic python script 

In [4]:
random.seed(1234)  # python random number generator seed
torch.manual_seed(1234)  # pytorch random number generator seed

n_freqs = 10  # e.g. tx/rx frequencies in our spectrum
horizon = 15  # e.i. number of timesteps per episode
rewards: Dict[int, Tensor] = dict()  # action-reward space
for t in range(horizon):  # t is our observation
  obs_rewards = torch.zeros(n_freqs)  # 1d tensor of zeros
  obs_rewards[randint(0, n_freqs - 1)] = 1.0  # e.g. rx frequency
  rewards[t] = obs_rewards

rewards[horizon] = torch.zeros(n_freqs)  # terminal obs (always zeros)

for o, a_r in rewards.items():
  print('t={obs:2}, tx_freq={tx_freq}, tx_rewards={action_rewards}'.format(
    obs=o, tx_freq=[i for i in range(len(a_r)) if a_r[i] == 1.0],
    action_rewards=a_r.tolist())
  )

t= 0, tx_freq=[7], tx_rewards=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]
t= 1, tx_freq=[1], tx_rewards=[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 2, tx_freq=[0], tx_rewards=[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 3, tx_freq=[1], tx_rewards=[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 4, tx_freq=[9], tx_rewards=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
t= 5, tx_freq=[0], tx_rewards=[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 6, tx_freq=[1], tx_rewards=[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 7, tx_freq=[1], tx_rewards=[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 8, tx_freq=[5], tx_rewards=[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 9, tx_freq=[3], tx_rewards=[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t=10, tx_freq=[0], tx_rewards=[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t=11, tx_freq=[0], tx_rewards=[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t=12, tx_freq=[0], tx_reward

In [5]:
random.seed(1234)  # python random number generator seed
torch.manual_seed(1234)  # pytorch random number generator seed

class TxWirelessModel:
  def __init__(self, n_freqs: int, horizon: int):
    self.hor = horizon  # model horizon
    self.tx_freq: Tensor = torch.randint(n_freqs, size=(horizon, ))  # random Tx frequencies w.r.t. time-step

  def rx_reward(self, t: int, freq: int):
    if t >= self.hor:  # horizon reached, always return 0
      return 0.0
    return float(self.tx_freq[t] == freq)  # True if Tx and Rx frequencies are the same

n_freqs = 10  # number of frequencies
horizon = 15  # model horizon

model = TxWirelessModel(n_freqs, horizon)  # create our model as an object

for t in range(horizon + 1):  # including terminal observation
  a_r: List[float] = [model.rx_reward(t, freq) for freq in range(n_freqs)]  # try every freqs
  # print all action-reward for this time-step
  print(
    't={obs:2}, tx_freq:{tx_freq}, tx_rewards:{action_rewards}'.format(
      obs=t, tx_freq=[i for i in range(len(a_r)) if a_r[i] == 1.0],
      action_rewards=a_r
    )
  )

t= 0, tx_freq:[5], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 1, tx_freq:[1], tx_rewards:[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 2, tx_freq:[6], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
t= 3, tx_freq:[5], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 4, tx_freq:[6], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
t= 5, tx_freq:[4], tx_rewards:[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 6, tx_freq:[2], tx_rewards:[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t= 7, tx_freq:[5], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 8, tx_freq:[5], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
t= 9, tx_freq:[9], tx_rewards:[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
t=10, tx_freq:[3], tx_rewards:[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t=11, tx_freq:[1], tx_rewards:[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t=12, tx_freq:[4], tx_reward