In [None]:
#   ____                            _____     _____                   __          __        _        _                 
#  / __ \                     /\   |_   _|   / ____|                  \ \        / /       | |      | |                
# | |  | |_ __   ___ _ __    /  \    | |    | |  __ _   _ _ __ ___     \ \  /\  / /__  _ __| | _____| |__   ___  _ __  
# | |  | | '_ \ / _ \ '_ \  / /\ \   | |    | | |_ | | | | '_ ` _ \     \ \/  \/ / _ \| '__| |/ / __| '_ \ / _ \| '_ \ 
# | |__| | |_) |  __/ | | |/ ____ \ _| |_   | |__| | |_| | | | | | |     \  /\  / (_) | |  |   <\__ \ | | | (_) | |_) |
#  \____/| .__/ \___|_| |_/_/    \_\_____|   \_____|\__, |_| |_| |_|      \/  \/ \___/|_|  |_|\_\___/_| |_|\___/| .__/ 
#        | |                                         __/ |                                                      | |    
#        |_|                                        |___/                                                       |_|    

# 15 minutes break




In [None]:
!pip install gym
#!pip install torch==1.5.0+cpu -f https://download.pytorch.org/whl/torch_stable.html



In [None]:
import json
from itertools import count

import gym
from gym import spaces

import torch
from torch import Tensor

import random
from random import randint

from typing import Dict, List, Optional, Callable, Any, Tuple, Union, Type

import pathlib
import collections

![alt text](https://www.taitradioacademy.com/wp-content/uploads/2014/10/Image-25-800x450.png)

In [None]:
class TDMAGym(gym.Env):
  NO_ACTION = 0  # no action on sub-frame
  UPLINK_ACTION = 1  # use sub-frame as up-link role (user [Tx] --> base-station [Rx])
  DOWNLINK_ACTION = 2  # use sub-frame as down-link role (base-station [Tx] --> user [Rx])

  def __init__(self, n_freqs: int, n_subframes: int, horizon: int):
    super(TDMAGym, self).__init__()  # initialize gym.Env base class
    if n_subframes % 2 != 0:  # since a link is separated in up and down link we need even number of subframe
      raise ValueError('n_subframes must be an even number')

    # action-space dict includes frequency and sub-frames action
    self.action_space = spaces.Dict(
      {
        'freq': spaces.Discrete(n_freqs),  # frequency used
        'subframes': spaces.MultiDiscrete([3] * n_subframes)  # sub-frames role actions
      }
    )
    # observation-space dict includes the time-step, last action and its sub-frames successes
    self.observation_space = spaces.Dict(
      {
        'time_step': spaces.Discrete(horizon + 1),  # include terminal state
        'last_a': self.action_space,  # last action done by the agent
        'last_success': spaces.MultiDiscrete([2] * n_subframes)  # what sub-frames received an acknowledgement
      }
    )
    self.reward_range = 0.0, float(n_subframes)  # action-reward range between 0 and the number of sub-frames

    self.freq_sf: Dict[int, Tuple[int, Tensor]] = dict()  # frequency and sub-frames role assignation w.r.t. to time-step
    for t in range(horizon):  # do not include terminal state
      freq = randint(0, n_freqs - 1)  # choose random frequency to do Tx
      ul_subframes = random.sample(range(n_subframes), k=n_subframes // 2)  # choose which sub-frames will be used as up-link
      dl_subframes = [
        sf for sf in range(n_subframes) if sf not in ul_subframes  # choose the rest of sub-frames as down-link
      ]
      sf = torch.zeros(n_subframes, dtype=torch.int)  # initialize sub-frames roles
      sf.fill_(self.NO_ACTION)  # set all sub-frames to no-action
      sf[ul_subframes] = self.UPLINK_ACTION  # set all up-link sub-frames to uplink-action value
      sf[dl_subframes] = self.DOWNLINK_ACTION  # set all down-link sub-frames to downlink-action value
      self.freq_sf[t] = freq, sf  # set sub-frames role w.r.t. time-step

    self.hor = horizon  # gym horizon to know when we are DONE
    self.t = 0  # initial time-step / observation

  def step(self, action: Dict) -> Tuple[  # step function to interact with the gym
    Dict, float, bool, Dict[str, Any]
  ]:
    if self.t >= self.hor:  # terminal state reached
      return collections.OrderedDict(), 0.0, True, {}  # return empty interaction

    assert self.action_space.contains(action)  # action is within gym action-space assertion

    freq, sf = self.freq_sf[self.t]  # get frequency and sub-frames role for current time-step
    a_freq, a_sf = action['freq'], action['subframes']  # fetch frequency and sub-frames role from the action

    if freq == a_freq:  # gym and action frequency are the same
      sf_success = [
        int(gym_sf == act_sf) for gym_sf, act_sf in zip(sf, a_sf)  # action sub-frames role is correct w.r.t. the gym's sub-frames
      ]
    else:
      sf_success = [0] * len(self.action_space['subframes'].nvec)  # wrong frequency, no sub-frames successes

    self.t += 1  # increment our time-step / observation
    o = collections.OrderedDict(
      time_step=self.t, last_a=action,   # observation that we will return, time-step, last action
      last_success=[int(sf_s) for sf_s in sf_success]  # and successes from last action
    )
    r = float(sum(sf_success))  # sum successes to give the reward
    return o, r, False, {}   # gyms always returns <obs, reward, if terminal obs reached, debug/info dictionary>

  def reset(self) -> Dict:  # reset our gym for a new episode
    self.t = 0  # initial time-step
    obs = collections.OrderedDict(time_step=self.t)  # initial observation
    return obs

  def render(self, mode='human'):  # gym visual rendering (e.g. text, image, plot, 3D frame, etc.)
    if self.t < self.hor:  # terminal gym state not reached
      # show current time-step, frequency and sub-frames role
      print(
        'rendering={{time_step={time_step}, freq={freq}, sf={sf}}}'
          .format(
          time_step=self.t, freq=self.freq_sf[self.t][0],
          sf=str(self.freq_sf[self.t][1].tolist()).replace(' ', '')
        )
      )
    else:  # terminal gym state reached and out of horizon
      print('rendering={terminal state reached, out of horizon}')

In [None]:
random.seed(1234)  # python random number generator seed
torch.manual_seed(1234)  # pytorch random number generator seed

n_freqs = 4  # number of frequencies
n_subframes = 4  # number of sub-frames within a time-step frame
horizon = 15  # horizon of our gym (episodes)

gym_env = TDMAGym(n_freqs, n_subframes, horizon)  # our gym environment

for episode_i in count(): # training loop
  print('starting episode {episode_i}...'.format(episode_i=episode_i))
  o = gym_env.reset()  # reset gym every episode, return initial observation
  for t in count():
    gym_env.render()  # show gym rendering
    a = gym_env.action_space.sample()  # random action from action-space
    o, r, done, _ = gym_env.step(a)  # interact with the gym with random 

    if done:
      break  # terminal gym observation reached, out of horizon

  if episode_i >= 2:  # exit training after 2 episodes
    break

gym_env.close()  # close gym

starting episode 0...
rendering={time_step=0, freq=3, sf=[1,2,2,1]}
rendering={time_step=1, freq=0, sf=[1,2,1,2]}
rendering={time_step=2, freq=0, sf=[1,1,2,2]}
rendering={time_step=3, freq=1, sf=[1,2,2,1]}
rendering={time_step=4, freq=0, sf=[2,2,1,1]}
rendering={time_step=5, freq=3, sf=[1,2,2,1]}
rendering={time_step=6, freq=0, sf=[2,1,1,2]}
rendering={time_step=7, freq=0, sf=[1,2,1,2]}
rendering={time_step=8, freq=3, sf=[1,1,2,2]}
rendering={time_step=9, freq=3, sf=[1,2,1,2]}
rendering={time_step=10, freq=0, sf=[1,1,2,2]}
rendering={time_step=11, freq=0, sf=[2,1,1,2]}
rendering={time_step=12, freq=2, sf=[1,2,1,2]}
rendering={time_step=13, freq=1, sf=[2,2,1,1]}
rendering={time_step=14, freq=0, sf=[1,2,2,1]}
rendering={terminal state reached, out of horizon}
starting episode 1...
rendering={time_step=0, freq=3, sf=[1,2,2,1]}
rendering={time_step=1, freq=0, sf=[1,2,1,2]}
rendering={time_step=2, freq=0, sf=[1,1,2,2]}
rendering={time_step=3, freq=1, sf=[1,2,2,1]}
rendering={time_step=4, fr