📝 **Author:** Amirhossein Heydari - 📧 **Email:** amirhosseinheydari78@gmail.com - 📍 **Linktree:** [linktr.ee/mr_pylin](https://linktr.ee/mr_pylin)

---

# Dependencies

In [67]:
import matplotlib.pyplot as plt
import torch
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from torchinfo import summary
from torchmetrics import Accuracy

In [68]:
# set a seed for deterministic results
random_state = 0
torch.manual_seed(random_state)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [69]:
# check if cuda is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

# Pre-Processing

## Load Dataset

In [70]:
# generate a 2D classification dataset
n_samples = 250
n_classes = 3

X, y = make_classification(
    n_samples=n_samples,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=n_classes,
    n_clusters_per_class=1,
    random_state=random_state
)

## Split dataset into trainset & testset

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)

## Normalization

In [72]:
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(X_train)

# log
print(f"min of trainset: {X_train.min(axis=0)}")
print(f"max of trainset: {X_train.max(axis=0)}")

min of trainset: [-3.12250035 -2.02918108]
max of trainset: [1.81947391 2.65150478]


In [73]:
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

In [None]:
# plot
plt.scatter(X_train[:, 0], X_train[:, 1], marker='o', c=y_train, s=25, edgecolor='k', label='trainset')
plt.scatter(X_test[:, 0] , X_test[:, 1] , marker=',', c=y_test, s=25 , edgecolor='k', label='testset')
plt.legend()
plt.title(f"2D dataset with {n_samples} samples")
plt.xlabel("feature 1")
plt.ylabel("feature 2")
plt.show()

## Dataset

In [75]:
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test  = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.int64)
y_test  = torch.tensor(y_test, dtype=torch.int64)

trainset = TensorDataset(X_train, y_train)
testset  = TensorDataset(X_test, y_test)

## Dataloader

In [76]:
batch_size = 4

trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
testloader  = DataLoader(dataset=testset , batch_size=batch_size, shuffle=False)

# Radial Basis Function Networks
   - Radial Basis Function (RBF) networks are indeed a type of artificial neural network.
   - They use radial basis functions as activation functions.
   - The output of the network is a linear combination of radial basis functions of the inputs and neuron parameters.

<figure style="text-align: center;">
    <img src="../assets/images/original/rbf/radial-basis-function-networks.svg" alt="radial-basis-function-networks.svg" style="width: 80%;">
    <figcaption>Radial Basis Functions Network Model</figcaption>
</figure>

<table style="margin-left:auto;margin-right:auto;text-align:center;">
  <thead>
    <tr>
      <th colspan="2">hidden parameters</th>
      <th colspan="2">logits parameters</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>μ</td>
      <td>σ</td>
      <td>Weights</td>
      <td>Biases</td>
    </tr>
    <tr>
      <td>A × B</td>
      <td>B</td>
      <td>B × C</td>
      <td>C</td>
    </tr>
  </tbody>
  <tfoot>
    <tr>
      <td colspan="2">A × B + B</td>
      <td colspan="2">B × C + C</td>
    </tr>
  </tfoot>
</table>


$
X = \begin{bmatrix}
        x_{1}^1 & x_{1}^2 & \cdots & x_{1}^N \\
        x_{2}^1 & x_{2}^2 & \cdots & x_{2}^N \\
        \vdots & \vdots & \ddots & \vdots \\
        x_{Q}^1 & x_{Q}^2 & \cdots & x_{Q}^N \\
    \end{bmatrix}_{Q \times N} \quad \text{(Q: number of samples, N: number of features)} \\
$

$
\phi(X) = 
        % \begin{bmatrix}
        % 1 \\
        % \phi_{1} \\
        % \phi_{2} \\
        % \vdots \\
        % \phi_{N} \\
        % \end{bmatrix}_{(N+1) \times 1}
        \begin{bmatrix}
        1 & 1 & \cdots & 1 \\
        \phi_1(X_1) & \phi_1(X_2) & \cdots & \phi_1(X_Q) \\
        \phi_2(X_1) & \phi_2(X_2) & \cdots & \phi_2(X_Q) \\
        \vdots & \vdots & \ddots & \vdots \\
        \phi_N(X_1) & \phi_N(X_2) & \cdots & \phi_N(X_Q) \\
        \end{bmatrix}_{(N+1) \times Q}
    = 
        \begin{bmatrix}
        1 & 1 & \cdots & 1 \\
        \exp\left(-\frac{\|\mathbf{X_1} - \mathbf{\mu}_1\|^2}{2\sigma_1^2}\right) & \exp\left(-\frac{\|\mathbf{X_2} - \mathbf{\mu}_1\|^2}{2\sigma_1^2}\right) & \cdots & \exp\left(-\frac{\|\mathbf{X_Q} - \mathbf{\mu}_1\|^2}{2\sigma_1^2}\right) \\
        \exp\left(-\frac{\|\mathbf{X_1} - \mathbf{\mu}_2\|^2}{2\sigma_2^2}\right) & \exp\left(-\frac{\|\mathbf{X_2} - \mathbf{\mu}_2\|^2}{2\sigma_2^2}\right) & \cdots & \exp\left(-\frac{\|\mathbf{X_Q} - \mathbf{\mu}_2\|^2}{2\sigma_2^2}\right) \\
        \vdots & \vdots & \ddots & \vdots \\
        \exp\left(-\frac{\|\mathbf{X_1} - \mathbf{\mu}_N\|^2}{2\sigma_N^2}\right) & \exp\left(-\frac{\|\mathbf{X_2} - \mathbf{\mu}_N\|^2}{2\sigma_N^2}\right) & \cdots & \exp\left(-\frac{\|\mathbf{X_Q} - \mathbf{\mu}_N\|^2}{2\sigma_N^2}\right) \\
        \end{bmatrix}_{(N+1) \times Q}
$

$
W = \begin{bmatrix}
        w_{0}^1 & w_{0}^2 & \cdots & w_{0}^{C} \\
        w_{1}^1 & w_{1}^2 & \cdots & w_{1}^{C} \\
        w_{2}^1 & w_{2}^2 & \cdots & w_{2}^{C} \\
        \vdots & \vdots & \ddots & \vdots \\
        w_{N}^1 & w_{N}^2 & \cdots & w_{N}^{C} \\
    \end{bmatrix}_{(N+1) \times C} \quad \text{(N: number of kernels, C: number of output neurons)}
$

$$f(X) = \phi(X)^TW$$

In [77]:
class RBF(nn.Module):
    def __init__(self, in_features, out_features, kernel='gaussian'):
        super(RBF, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.centers = nn.Parameter(torch.Tensor(out_features, in_features))
        self.sigmas = nn.Parameter(torch.Tensor(out_features))
        self.kernel = kernel
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.uniform_(self.centers, -1.0, 1.0)
        nn.init.uniform_(self.sigmas ,  0.0, 1.0)

    def forward(self, input):
        size = (input.size(0), self.out_features, self.in_features)
        x = input.unsqueeze(1).expand(size)
        c = self.centers.unsqueeze(0).expand(size)
        distances = (x - c).pow(2).sum(-1).pow(0.5)

        if self.kernel == 'gaussian':
            return torch.exp(-1.0 * distances.pow(2) / (2 * self.sigmas.unsqueeze(0) ** 2))

        elif self.kernel == 'linear':
            return 1.0 - distances

        else:
            raise ValueError(f"Unknown kernel type {self.kernel}")


class RBFNet(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, kernel='gaussian'):
        super(RBFNet, self).__init__()
        self.rbf = RBF(in_features, hidden_features, kernel)
        self.linear = nn.Linear(hidden_features, out_features)

    def forward(self, input):
        return self.linear(self.rbf(input))

In [78]:
in_features = trainset[0][0].shape[0]
num_kernels = 3
out_features = n_classes

model = RBFNet(in_features, num_kernels, out_features)
model.to(device)

RBFNet(
  (rbf): RBF()
  (linear): Linear(in_features=3, out_features=3, bias=True)
)

In [79]:
summary(model, input_size=(batch_size, in_features))

Layer (type:depth-idx)                   Output Shape              Param #
RBFNet                                   [4, 3]                    --
├─RBF: 1-1                               [4, 3]                    9
├─Linear: 1-2                            [4, 3]                    12
Total params: 21
Trainable params: 21
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

# Set up remaining Hyper-Parameters

In [80]:
lr = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = Adam(params=model.parameters(), lr=lr)
num_epochs = 10

# Train & Validation Loop

In [81]:
# plot
def plot():
    centers = model.rbf.centers.detach().cpu().numpy()
    radii = model.rbf.sigmas.detach().cpu().numpy()

    for center, radius in zip(centers, radii):
        circle = plt.Circle(center, radius, color='b', fill=False)
        plt.gca().add_artist(circle)

    plt.scatter(X_train[:, 0], X_train[:, 1], marker='o', c=y_train, s=25, edgecolor='k', label='trainset')
    plt.scatter(X_test[:, 0], X_test[:, 1], marker=',', c=y_test, s=25, edgecolor='k', label='testset')
    plt.legend()
    plt.title(f"2D dataset with {n_samples} samples")
    plt.xlabel("feature 1")
    plt.ylabel("feature 2")
    plt.show()

In [82]:
train_acc_per_epoch = []
train_loss_per_epoch = []

train_acc = Accuracy(task='multiclass', num_classes=n_classes, top_k=1).to(device)

In [83]:
for epoch in range(num_epochs):

# train loop
    model.train()
    train_loss = 0

    for x, y in trainloader:

        # send data to GPU
        x, y_true = x.to(device), y.to(device)

        # forward
        y_pred = model(x)
        loss = criterion(y_pred, y_true)

        # backward
        loss.backward()

        # update parameters
        optimizer.step()
        optimizer.zero_grad()

        # log loss & accuracy
        train_loss += loss.item() * len(x)
        train_acc.update(y_pred, y_true)

    train_loss_per_epoch.append(train_loss / len(trainset))
    train_acc_per_epoch.append(train_acc.compute().item())
    train_acc.reset()

    # log
    print(f"epoch {epoch:>2}  ->  train[loss: {train_loss_per_epoch[epoch]:.5f} - acc: {train_acc_per_epoch[epoch]:.2f}]")
    print(f"μ :\n{model.rbf.centers.detach().cpu().numpy()}\n")
    print(f"σ :\n{model.rbf.sigmas.detach().cpu().numpy()}\n")
    print(f"Weights :\n{model.linear.weight.detach().cpu()}\n")
    print(f"Bias :\n{model.linear.bias.detach().cpu()}")

    # plot
    plot()

epoch  0  ->  train[loss: 1.05382 - acc: 0.40]
μ :
[[-0.27636856  0.3377376 ]
 [-0.69113076 -0.9321433 ]
 [-0.5146856   0.16163845]]

σ :
[0.4854712  0.8633912  0.37439775]

Weights :
tensor([[ 0.5078,  0.0963,  0.1998],
        [-0.5220, -0.6815, -0.5694],
        [-0.3646,  0.0385,  0.0449]])

Bias :
tensor([-0.2953, -0.0906, -0.0096])
epoch  1  ->  train[loss: 0.89808 - acc: 0.70]
μ :
[[-0.37970242  0.29226032]
 [-0.5599097  -0.93971384]
 [-0.497       0.15001203]]

σ :
[0.5136477  0.84330523 0.47712082]

Weights :
tensor([[ 0.8028,  0.1665,  0.4933],
        [-0.5527, -0.9453, -0.8725],
        [-0.6875,  0.1162, -0.2371]])

Bias :
tensor([-0.4319,  0.0710, -0.0578])
epoch  2  ->  train[loss: 0.81084 - acc: 0.79]
μ :
[[-0.41706774  0.37004578]
 [-0.2719009  -0.95400065]
 [-0.5335405   0.2047996 ]]

σ :
[0.50358945 0.89065677 0.48440617]

Weights :
tensor([[ 1.0246,  0.1028,  0.7300],
        [-0.5789, -1.2537, -1.1003],
        [-0.9351,  0.3676, -0.4634]])

Bias :
tensor([-0.6336,

# Test Loop

In [84]:
test_acc = Accuracy(task='multiclass', num_classes=n_classes, top_k=1).to(device)

In [85]:
# test loop
model.eval()
test_loss = 0
predictions = []
targets = []

with torch.no_grad():
    for x, y in testloader:

        # send data to GPU
        x, y_true = x.to(device), y.to(device)

        # forward
        y_pred = model(x)
        loss = criterion(y_pred, y_true)

        # log loss & accuracy
        test_loss += loss.item() * len(x)
        test_acc.update(y_pred, y_true)

        predictions.extend(y_pred.argmax(dim=1).cpu())
        targets.extend(y_true.cpu())

# log
print(f"test[loss: {test_loss / len(testset):.5f} - acc: {test_acc.compute().item():.2f}]")

test[loss: 0.24104 - acc: 1.00]
