# DNN-based TSCH scheduling

Notebook to retrieve a dataset of inputs and outputs of a scheduling algorithm, train a DNN model and test its performance.

## 0. Import PyTorch and setup device agnostic code

In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
import h5py
import numpy as np
import os

torch.__version__

'2.2.1'

In [2]:
# setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [3]:
# see available gpu (if available)
if device=="cuda":
  !nvidia-smi

## 1. Get data

In [4]:
#from google.colab import drive
#drive.mount('/content/drive')

In [5]:
# Define the TASADataset class

class TASADataset(Dataset):
    def __init__(self, file_path):
        self.file_path = file_path
        self.hdf5_file = h5py.File(self.file_path, 'r')
        self.inputs = self.hdf5_file['inputs']
        self.outputs = self.hdf5_file['outputs']

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_data = torch.tensor(self.inputs[idx], dtype=torch.float32)
        output_data = torch.tensor(self.outputs[idx], dtype=torch.float32)

        return input_data, output_data

In [6]:
 # Define the file path to the HDF5 database
hdf5_file_path = 'database.h5'

# Create a TASADataset instance
tasa_dataset = TASADataset(hdf5_file_path)

# Define the train and test sizes
train_size = 0.8  # 80% for training, adjust as needed
test_size = 1 - train_size

# Split the dataset into train and test subsets
train_indices, test_indices = train_test_split(range(len(tasa_dataset)), test_size=test_size, random_state=42)

# Create train and test datasets using Subset
train_dataset = Subset(tasa_dataset, train_indices)
test_dataset = Subset(tasa_dataset, test_indices)

# Create a DataLoader for batching and shuffling
batch_size = 32
print(f"Creating DataLoader's with batch size {batch_size}")
shuffle = True

# Create train and test data loaders
train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=shuffle)
test_dataloader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle = False)

# Example usage:
for inputs, outputs in train_dataloader:
    # Send batches to the device
    inputs = inputs.to(device)
    outputs = outputs.to(device)
    print(inputs.shape)
    print(outputs.shape)
    break

print("\nDataLoaders created!!!!!!!")


Creating DataLoader's with batch size 32
torch.Size([32, 12])
torch.Size([32, 420])

DataLoaders created!!!!!!!


In [7]:
# store input and ouput lengths
for inputs, outputs in train_dataloader:
  input_len = inputs.shape[1]
  output_len = outputs.shape[1]
  break

input_len, output_len

(12, 420)

## 2. DNN Model

### 2.1 Create model

In [8]:
class DNN_Scheduler(nn.Module):
    """
    model architecture for a DNN-based TSCH Scheduler
    using simple MLP architecture with non-linear activations
    """
    def __init__(self, input_features: int, output_features: int, hidden_units: int = 8) -> None:
        """initializes all required hyperparameters

        Args:
            input_features (int): number of input features to the model
            out_features (int): number of output features of the model
            hidden_units (int): number of hidden units between layers, default 8
        """
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=400),
            nn.ReLU(),
            nn.Linear(in_features=400, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512, out_features=450),
            nn.ReLU(),
            nn.Linear(in_features=450, out_features=output_features),
        )

    def forward(self, x: torch.Tensor):
      return self.linear_layer_stack(x)

torch.manual_seed(42)
model_0 = DNN_Scheduler(input_features=input_len, # number of links to schedule
                  hidden_units=50,
                  output_features=output_len).to(device) #number of cells in schedule (4 channel offsets, 5 timeslots)
model_0

DNN_Scheduler(
  (linear_layer_stack): Sequential(
    (0): Linear(in_features=12, out_features=800, bias=True)
    (1): ReLU()
    (2): Linear(in_features=800, out_features=1024, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1024, out_features=1024, bias=True)
    (5): ReLU()
    (6): Linear(in_features=1024, out_features=800, bias=True)
    (7): ReLU()
    (8): Linear(in_features=800, out_features=420, bias=True)
  )
)

In [9]:
# try a forward pass to test the model

# get a batch
input_batch, output_batch = next(iter(train_dataloader))

# get a single sample from batch
input, output = input_batch[0], output_batch[0]

# do a forward pass on a single sample
model_0.eval()
with torch.inference_mode():
  pred = model_0(input.to(device))

# print out what is happening
print(f"Output logits:\n{pred}\n")
print(f"Rounded outputs:\n{torch.round(torch.clamp_min(pred, min=0))}\n")
print(f"Actual output:\n{output}\n")

Output logits:
tensor([ 0.0715,  0.0286,  0.1109,  0.0179, -0.0207, -0.0523,  0.0233, -0.0607,
        -0.0076, -0.0325, -0.0674, -0.0293,  0.0601,  0.0052, -0.0437, -0.0190,
         0.0637, -0.0220, -0.0080, -0.0098,  0.0640,  0.0261,  0.0161,  0.0004,
         0.0585, -0.0417,  0.0114,  0.0503, -0.0140, -0.0415,  0.0403, -0.0372,
         0.0143, -0.0334, -0.0509, -0.0121, -0.0327,  0.0046,  0.1009, -0.0676,
        -0.0100, -0.0238, -0.0999, -0.0515,  0.0792,  0.0748,  0.0309, -0.0238,
         0.0194, -0.1153,  0.0211, -0.0752,  0.0834, -0.0677, -0.0557,  0.0029,
        -0.0460, -0.0626, -0.0454,  0.0190, -0.0239, -0.0394,  0.0149, -0.0420,
        -0.0032, -0.0817,  0.0109, -0.0286, -0.0299,  0.0944, -0.1000,  0.0276,
         0.0070, -0.0020, -0.0388,  0.0502,  0.0288,  0.0119,  0.0349,  0.0336,
         0.0267, -0.0364,  0.0010, -0.0452, -0.0749,  0.0109,  0.0218, -0.0080,
         0.0389, -0.0272, -0.0994,  0.0002, -0.0299,  0.0504,  0.0473, -0.0140,
        -0.0455,  0.1103,

## 2.2 Creating a loss function and optimizer

In [10]:
loss_fn = nn.MSELoss()
# loss_fn = nn.L1Loss()
#optimizer = torch.optim.SGD(model_0.parameters(),
                            #lr = 0.01)
optimizer = torch.optim.Adam(model_0.parameters(),
                             lr = 0.001)

### 2.3 Get model info

In [11]:
try:
    import torchinfo
except:
    !pip install torchinfo
    import torchinfo

from torchinfo import summary

summary(model_0, input_size=[input_len]) # do a test pass through of an example input size

Layer (type:depth-idx)                   Output Shape              Param #
DNN_Scheduler                            [420]                     --
├─Sequential: 1-1                        [420]                     --
│    └─Linear: 2-1                       [800]                     10,400
│    └─ReLU: 2-2                         [800]                     --
│    └─Linear: 2-3                       [1024]                    820,224
│    └─ReLU: 2-4                         [1024]                    --
│    └─Linear: 2-5                       [1024]                    1,049,600
│    └─ReLU: 2-6                         [1024]                    --
│    └─Linear: 2-7                       [800]                     820,000
│    └─ReLU: 2-8                         [800]                     --
│    └─Linear: 2-9                       [420]                     336,420
Total params: 3,036,644
Trainable params: 3,036,644
Non-trainable params: 0
Total mult-adds (Units.GIGABYTES): 2.72
Input size (M

### 5.6 Create train and test loops

In [12]:
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer):
    # put model in train mode
    model.train()

    # setup train loss and train accuracy values
    train_loss, train_acc = 0, 0

    # Create a tqdm progress bar for the data loader
    dataloader_iter = tqdm(enumerate(dataloader), desc="Training", total=len(dataloader))

    # loop through data loader data batches
    for batch, (X, y) in dataloader_iter:
        # send data to target device
        X, y = X.to(device), y.to(device)

        # 1. forward pass
        y_pred = model(X)

        # 2. calculate  and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # 3. optimizer zero grad
        optimizer.zero_grad()

        # 4. loss backward
        loss.backward()

        # 5. optimizer step
        optimizer.step()

        # calculate and accumulate accuracy metric across all batches
        y_pred_round = torch.round(torch.clamp_min(y_pred, min=0))
        train_acc += ((y_pred_round == y).sum(dim=1)/y_pred_round.shape[1]).mean().item()

        # Update tqdm progress bar description
        dataloader_iter.set_postfix({'loss': loss.item(), 'accuracy': ((y_pred_round == y).sum(dim=1)/y_pred_round.shape[1]).mean().item()})

    # adjust metrics to get average loss and accuracy per batch
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_loss, train_acc

In [13]:
def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module):
    # put model in eval mode
    model.eval()

    # setup test loss and test accuracy values
    test_loss, test_acc = 0, 0

    # turn on inference context manager
    with torch.inference_mode():
        # loop through DataLoader batches
        for batch, (X, y) in enumerate(dataloader):
            # send data to target device
            X, y = X.to(device), y.to(device)

            # 1. forward pass
            test_pred = model(X)

            # 2. calculate and accumulate loss
            loss = loss_fn(test_pred, y)
            test_loss += loss.item()

            # calculate and accumulate accuracy
            test_pred_round = torch.round(torch.clamp_min(test_pred, min=0))
            test_acc += ((test_pred_round == y).sum(dim=1)/test_pred_round.shape[1]).mean().item()


    # Adjust metrics to get average loss and accuracy per batch
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

In [14]:
from tqdm.auto import tqdm

# 1. take in various parameters required for training and test steps
def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int = 5):

    # 2. create empty results dictionary
    results = {"train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    # 3. loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer)
        test_loss, test_acc = test_step(model=model,
                                        dataloader=test_dataloader,
                                        loss_fn=loss_fn)

        # 4. print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # 5. update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    # 6. return the filled results at the end of the epochs
    return results

In [15]:
print(len(train_dataloader))

50000


### 5.7 Train and evaluate DNN scheduler

In [None]:
# Set random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Set number of epochs
NUM_EPOCHS = 100

# Start the timer
from timeit import default_timer as timer
start_time = timer()

# Train model
model_0_results = train(model=model_0,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=NUM_EPOCHS)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

  0%|          | 0/100 [00:00<?, ?it/s]

Training:   0%|          | 0/50000 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.1833 | train_acc: 0.9298 | test_loss: 0.0934 | test_acc: 0.9502


Training:   0%|          | 0/50000 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 0.0825 | train_acc: 0.9543 | test_loss: 0.0802 | test_acc: 0.9539


Training:   0%|          | 0/50000 [00:00<?, ?it/s]