In [42]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split
import matplotlib.pyplot as plt

Load Behavioural Model

In [52]:
# === Behavioural Model ===
class Behavioural(nn.Module):
    def __init__(self, input_dim=5, hidden_dim=32, output_dim=2):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.model(x)
behavior_pi = Behavioural(input_dim=5, hidden_dim=32, output_dim=2)

# 2. Load the weights into it
behavior_pi.load_state_dict(torch.load("./Behavioural_model_2.pth"))

<All keys matched successfully>

Perturbed Policy Setup

In [53]:
class Policy(nn.Module):
    def __init__(self, input_dim=5, hidden_dim=32, output_dim=2):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.model(x)

Pertubation  $\theta = \theta' (1 + \epsilon), \epsilon \backsim N(0,\sigma^2)$

In [50]:
def perturb_multi(pi, sigma=0.5):
    # Clone the network
    pi_perturbed = Policy(input_dim=5, hidden_dim=64, output_dim=2)
    pi_perturbed.load_state_dict(pi.state_dict())  # deep copy of weights

    # Apply multiplicative Gaussian noise: θ ← θ * (1 + ε)
    with torch.no_grad():
        for param in pi_perturbed.parameters():
            if param.requires_grad:
                noise = torch.randn_like(param) * sigma
                param.mul_(1 + noise)

    return pi_perturbed

perturb_size = 1000
sigma = 0.05
pi_list = []
for i in range(perturb_size):
    pi_perturbed = perturb_multi(pi = behavior_pi, sigma=sigma)
    torch.save(pi_perturbed, f"./Policys/Perturbed_model_{i}.pth")
    # pi_list.append(pi_perturbed)

In [55]:
def perturb_add(pi, sigma=0.5):
    # Clone the network
    pi_perturbed = Policy()
    pi_perturbed.load_state_dict(pi.state_dict())  # deep copy of weights

    # Apply multiplicative Gaussian noise: θ ← θ * (1 + ε)
    with torch.no_grad():
        for param in pi_perturbed.parameters():
            if param.requires_grad:
                noise = torch.randn_like(param) * sigma
                param.add_(noise)

    return pi_perturbed

perturb_size = 1000
sigma = 0.4
pi_list = []
for i in range(perturb_size):
    pi_perturbed = perturb_add(pi = behavior_pi, sigma=sigma)
    torch.save(pi_perturbed, f"./Policys/Perturbed_model_{i}.pth")
    # pi_list.append(pi_perturbed)