### Installation

In [None]:
import os
!git clone --branch=main https://github.com/muhd-umer/rl-wireless.git rl-wireless
assert os.path.exists('./rl-wireless'), "No rl-wireless folder found."
%cd ./rl-wireless

!pip install -r requirements.txt

### Necessary Imports

In [1]:
import warnings
import numpy as np
import gymnasium as gym
from network import MassiveMIMOEnv
import ray
from ray import air, tune
from ray.tune.registry import get_trainable_cls

# disable warnings
warnings.filterwarnings("ignore")

### Registering the Environment

In [2]:
# Set the parameters
global N, M, K, Ns, asd_degs, min_P, max_P, num_P, num_episodes, dtype, seed
N = 7
M = 32
K = 10
Ns = 10
asd_degs = [
    30,
]
min_P = -20
max_P = 23
num_P = 10
dtype = np.float32
seed = 0

# Register and create the environment
gym.register(id="MassiveMIMO-v0", entry_point=MassiveMIMOEnv)

env = gym.make(
    "MassiveMIMO-v0",
    N=N,
    M=M,
    K=K,
    Ns=Ns,
    min_P=min_P,
    max_P=max_P,
    num_P=num_P,
    dtype=dtype,
)

In [3]:
from ray.tune.registry import register_env

# register the predefined scenario with RLlib
register_env("MassiveMIMO-v0", lambda cfg: env)

In [4]:
# init ray with available CPUs (and GPUs)
ray.init(
    num_cpus=4,
    num_gpus=1,
    include_dashboard=False,
    ignore_reinit_error=True,
    log_to_driver=False,
)

2023-05-08 01:34:00,146	INFO worker.py:1625 -- Started a local Ray instance.


0,1
Python version:,3.9.16
Ray version:,2.4.0


### Training PPO Agent

In [None]:
config = (
    get_trainable_cls("PPO")  # RLlib algorithm to use
    .get_default_config()
    .environment("MassiveMIMO-v0")
    .framework("torch")
    .resources(
        num_gpus=0.5,
        num_gpus_per_worker=0.0,
    )
    .rollouts(
        num_rollout_workers=1,
        num_envs_per_worker=1,
        create_env_on_local_worker=True,
    )
    .training(lr=tune.grid_search([0.005, 0.0001]))
)

stop = {
    "timesteps_total": 100000,
}

In [None]:
results = tune.Tuner(
    "PPO",
    param_space=config.to_dict(),
    run_config=air.RunConfig(stop=stop, local_dir="./results"),
).fit()

### Training DQN Agent

In [None]:
config = (
    get_trainable_cls("DQN")  # RLlib algorithm to use
    .get_default_config()
    .environment("MassiveMIMO-v0")
    .framework("torch")
    .resources(
        num_gpus=1,
        num_gpus_per_worker=0.0,
    )
    .training(lr=0.001)
    .evaluation(
        evaluation_interval=5,
    )
)

stop = {
    "timesteps_total": 80000,
}

In [None]:
results = tune.Tuner(
    "DQN",
    param_space=config.to_dict(),
    run_config=air.RunConfig(stop=stop, local_dir="./results"),
).fit()

### Training R2D2 Agent

In [None]:
config = (
    get_trainable_cls("R2D2")  # RLlib algorithm to use
    .get_default_config()
    .environment("MassiveMIMO-v0")
    .framework("torch")
    .resources(
        num_gpus=1,
        num_gpus_per_worker=0.0,
    )
)

stop = {
    "timesteps_total": 80000,
}

In [None]:
config.model["use_attention"] = True

results = tune.Tuner(
    "R2D2",
    param_space=config.to_dict(),
    run_config=air.RunConfig(stop=stop, local_dir="./results"),
).fit()

### Computing Actions

In [5]:
from ray.rllib.algorithms.algorithm import Algorithm

algo = Algorithm.from_checkpoint("results/DQN/DQN_MassiveMIMO/checkpoint")

In [7]:
episode_reward = 0
terminated = truncated = False

obs, info = env.reset()

while not terminated and not truncated:
    action = algo.compute_single_action(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    episode_reward += reward

print(f"Episode Reward: {episode_reward} bits/s/Hz")

Episode Reward: 20.178660950852812 bits/s/Hz
