### Installation

In [None]:
import os
!git clone --branch=main https://github.com/muhd-umer/rl-wireless.git .
assert os.path.exists('./rl-wireless'), "No rl-wireless folder found."
%cd ./rl-wireless

%%capture
!pip install -r requirements.txt

### Necessary Imports

In [None]:
import os
import numpy as np
import gymnasium as gym
from gymnasium.wrappers.record_episode_statistics import RecordEpisodeStatistics
import time

os.environ['TUNE_RESULT_DIR'] = './results'

### Registering the Environment

In [None]:
# Set the parameters
global N, M, K, Ns, asd_degs, min_P, max_P, num_P, num_episodes, dtype, seed
N = 7
M = 32
K = 10
Ns = 10
asd_degs = [
    30,
]
min_P = -20
max_P = 23
num_P = 10
dtype = np.float32
seed = 0

# Register and create the environment
gym.register(id="MassiveMIMO-v0", entry_point="network:MassiveMIMOEnv")

env = gym.make(
    "MassiveMIMO-v0",
    N=N,
    M=M,
    K=K,
    Ns=Ns,
    min_P=min_P,
    max_P=max_P,
    num_P=num_P,
    dtype=dtype,
)

In [None]:
from ray.tune.registry import register_env

# register the predefined scenario with RLlib
register_env("MassiveMIMO-v0", lambda config: env)

### RL with Ray RLlib

In [None]:
import ray
import ray.tune as tune

# init ray with available CPUs (and GPUs) and init ray
ray.init(
    num_cpus=4,  # change to your available number of CPUs
    num_gpus=1,  # change to your available number of GPUs
    include_dashboard=False,
    ignore_reinit_error=True,
    log_to_driver=False,
)

In [None]:
from ray.rllib.algorithms.ppo import PPOConfig

config = PPOConfig().environment("MassiveMIMO-v0")

# train the agent
results = tune.run(
    "PPO",
    config=config,
    stop={"timesteps_total": 100000},
    checkpoint_at_end=True,
    checkpoint_freq=10,
    checkpoint_score_attr="episode_reward_mean",
    verbose=1,
)

In [None]:
%tensorboard --logdir results --port 6006