📝 **Author:** Amirhossein Heydari - 📧 **Email:** <amirhosseinheydari78@gmail.com> - 📍 **Origin:** [mr-pylin/pytorch-workshop](https://github.com/mr-pylin/pytorch-workshop)

---


**Table of contents**<a id='toc0_'></a>    
- [Dependencies](#toc1_)    
- [Pre-Processing](#toc2_)    
  - [Load Dataset](#toc2_1_)    
  - [Split dataset into trainset & testset](#toc2_2_)    
  - [Normalization](#toc2_3_)    
  - [Dataset](#toc2_4_)    
  - [Dataloader](#toc2_5_)    
- [Radial Basis Function Networks](#toc3_)    
- [Set up remaining Hyperparameters](#toc4_)    
- [Train & Validation Loop](#toc5_)    
- [Test Loop](#toc6_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

# <a id='toc1_'></a>[Dependencies](#toc0_)


In [1]:
import matplotlib.pyplot as plt
import torch
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from torchinfo import summary
from torchmetrics.classification import MulticlassAccuracy

In [2]:
# set a seed for deterministic results
seed = 0
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
# check if cuda is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# log
device

# <a id='toc2_'></a>[Pre-Processing](#toc0_)


## <a id='toc2_1_'></a>[Load Dataset](#toc0_)


In [4]:
# generate a 2D classification dataset
n_samples = 250
n_classes = 3

X, y = make_classification(
    n_samples=n_samples,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=n_classes,
    n_clusters_per_class=1,
    random_state=seed,
)

## <a id='toc2_2_'></a>[Split dataset into trainset & testset](#toc0_)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=seed)

## <a id='toc2_3_'></a>[Normalization](#toc0_)


In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(X_train)

# log
print(f"min of trainset: {X_train.min(axis=0)}")
print(f"max of trainset: {X_train.max(axis=0)}")

In [7]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# plot
plt.scatter(X_train[:, 0], X_train[:, 1], marker="o", c=y_train, s=25, edgecolor="k", label="trainset")
plt.scatter(X_test[:, 0], X_test[:, 1], marker=",", c=y_test, s=25, edgecolor="k", label="testset")
plt.legend()
plt.title(f"2D dataset with {n_samples} samples")
plt.xlabel("feature 1")
plt.ylabel("feature 2")
plt.show()

## <a id='toc2_4_'></a>[Dataset](#toc0_)


In [9]:
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.int64)
y_test = torch.tensor(y_test, dtype=torch.int64)

trainset = TensorDataset(X_train, y_train)
testset = TensorDataset(X_test, y_test)

## <a id='toc2_5_'></a>[Dataloader](#toc0_)


In [10]:
batch_size = 4

trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)

# <a id='toc3_'></a>[Radial Basis Function Networks](#toc0_)

- Radial Basis Function (RBF) networks are indeed a type of artificial neural network.
- They use radial basis functions as activation functions.
- The output of the network is a linear combination of radial basis functions of the inputs and neuron parameters.


<figure style="text-align: center;">
  <img src="../assets/images/original/rbf/radial-basis-function-networks.svg" alt="radial-basis-function-networks.svg" style="width: 80%;">
  <figcaption>Radial Basis Functions Network Model</figcaption>
</figure>


<table style="margin: 0 auto; text-align:center;">
  <thead>
    <tr>
      <th colspan="2">hidden parameters</th>
      <th colspan="2">logits parameters</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>μ</td>
      <td>σ</td>
      <td>Weights</td>
      <td>Biases</td>
    </tr>
    <tr>
      <td>A × B</td>
      <td>B</td>
      <td>B × C</td>
      <td>C</td>
    </tr>
  </tbody>
  <tfoot>
    <tr>
      <td colspan="2">A × B + B</td>
      <td colspan="2">B × C + C</td>
    </tr>
  </tfoot>
</table>


$
X = \begin{bmatrix}
        x_{1}^1 & x_{1}^2 & \cdots & x_{1}^N \\
        x_{2}^1 & x_{2}^2 & \cdots & x_{2}^N \\
        \vdots & \vdots & \ddots & \vdots \\
        x_{Q}^1 & x_{Q}^2 & \cdots & x_{Q}^N \\
    \end{bmatrix}_{Q \times N} \quad \text{(Q: number of samples, N: number of features)} \\
$


$
\phi(X) = 
        % \begin{bmatrix}
        % 1 \\
        % \phi_{1} \\
        % \phi_{2} \\
        % \vdots \\
        % \phi_{N} \\
        % \end{bmatrix}_{(N+1) \times 1}
        \begin{bmatrix}
        1 & 1 & \cdots & 1 \\
        \phi_1(X_1) & \phi_1(X_2) & \cdots & \phi_1(X_Q) \\
        \phi_2(X_1) & \phi_2(X_2) & \cdots & \phi_2(X_Q) \\
        \vdots & \vdots & \ddots & \vdots \\
        \phi_N(X_1) & \phi_N(X_2) & \cdots & \phi_N(X_Q) \\
        \end{bmatrix}_{(N+1) \times Q}
    = 
        \begin{bmatrix}
        1 & 1 & \cdots & 1 \\
        \exp\left(-\frac{\|\mathbf{X_1} - \mathbf{\mu}_1\|^2}{2\sigma_1^2}\right) & \exp\left(-\frac{\|\mathbf{X_2} - \mathbf{\mu}_1\|^2}{2\sigma_1^2}\right) & \cdots & \exp\left(-\frac{\|\mathbf{X_Q} - \mathbf{\mu}_1\|^2}{2\sigma_1^2}\right) \\
        \exp\left(-\frac{\|\mathbf{X_1} - \mathbf{\mu}_2\|^2}{2\sigma_2^2}\right) & \exp\left(-\frac{\|\mathbf{X_2} - \mathbf{\mu}_2\|^2}{2\sigma_2^2}\right) & \cdots & \exp\left(-\frac{\|\mathbf{X_Q} - \mathbf{\mu}_2\|^2}{2\sigma_2^2}\right) \\
        \vdots & \vdots & \ddots & \vdots \\
        \exp\left(-\frac{\|\mathbf{X_1} - \mathbf{\mu}_N\|^2}{2\sigma_N^2}\right) & \exp\left(-\frac{\|\mathbf{X_2} - \mathbf{\mu}_N\|^2}{2\sigma_N^2}\right) & \cdots & \exp\left(-\frac{\|\mathbf{X_Q} - \mathbf{\mu}_N\|^2}{2\sigma_N^2}\right) \\
        \end{bmatrix}_{(N+1) \times Q}
$


$
W = \begin{bmatrix}
        w_{0}^1 & w_{0}^2 & \cdots & w_{0}^{C} \\
        w_{1}^1 & w_{1}^2 & \cdots & w_{1}^{C} \\
        w_{2}^1 & w_{2}^2 & \cdots & w_{2}^{C} \\
        \vdots & \vdots & \ddots & \vdots \\
        w_{N}^1 & w_{N}^2 & \cdots & w_{N}^{C} \\
    \end{bmatrix}_{(N+1) \times C} \quad \text{(N: number of kernels, C: number of output neurons)}
$


$$f(X) = \phi(X)^TW$$


In [11]:
class RBF(nn.Module):
    def __init__(self, in_features: int, out_features: int, kernel: str = "gaussian"):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.centers = nn.Parameter(torch.Tensor(out_features, in_features))
        self.sigmas = nn.Parameter(torch.Tensor(out_features))
        self.kernel = kernel
        self.reset_parameters()

    def reset_parameters(self) -> None:
        nn.init.uniform_(self.centers, -1.0, 1.0)
        nn.init.uniform_(self.sigmas, 0.0, 1.0)

    def forward(self, input: torch.Tensor) -> torch.Tensor:
        size = (input.size(0), self.out_features, self.in_features)
        x = input.unsqueeze(1).expand(size)
        c = self.centers.unsqueeze(0).expand(size)
        distances = (x - c).pow(2).sum(-1).pow(0.5)

        if self.kernel == "gaussian":
            return torch.exp(-1.0 * distances.pow(2) / (2 * self.sigmas.unsqueeze(0) ** 2))

        elif self.kernel == "linear":
            return 1.0 - distances

        else:
            raise ValueError(f"Unknown kernel type {self.kernel}")


class RBFNet(nn.Module):
    def __init__(self, in_features: int, hidden_features: int, out_features: int, kernel: str = "gaussian"):
        super().__init__()
        self.rbf = RBF(in_features, hidden_features, kernel)
        self.linear = nn.Linear(hidden_features, out_features)

    def forward(self, input: torch.Tensor) -> torch.Tensor:
        return self.linear(self.rbf(input))

In [None]:
in_features = trainset[0][0].shape[0]
num_kernels = 3
out_features = n_classes

# initialize the model
model = RBFNet(in_features, num_kernels, out_features).to(device)

# log
model

In [None]:
summary(model, input_size=(batch_size, in_features))

# <a id='toc4_'></a>[Set up remaining Hyperparameters](#toc0_)


In [14]:
lr = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = Adam(params=model.parameters(), lr=lr)
num_epochs = 10

# <a id='toc5_'></a>[Train & Validation Loop](#toc0_)


In [15]:
# plot
def plot():
    centers = model.rbf.centers.detach().cpu().numpy()
    radii = model.rbf.sigmas.detach().cpu().numpy()

    for center, radius in zip(centers, radii):
        circle = plt.Circle(center, radius, color="b", fill=False)
        plt.gca().add_artist(circle)

    plt.scatter(X_train[:, 0], X_train[:, 1], marker="o", c=y_train, s=25, edgecolor="k", label="trainset")
    plt.scatter(X_test[:, 0], X_test[:, 1], marker=",", c=y_test, s=25, edgecolor="k", label="testset")
    plt.legend()
    plt.title(f"2D dataset with {n_samples} samples")
    plt.xlabel("feature 1")
    plt.ylabel("feature 2")
    plt.show()

In [16]:
train_acc_per_epoch = []
train_loss_per_epoch = []

train_acc = MulticlassAccuracy(num_classes=n_classes, top_k=1).to(device)

In [None]:
for epoch in range(num_epochs):

    # train loop
    model.train()
    train_loss = 0

    for x, y in trainloader:

        # send data to GPU
        x, y_true = x.to(device), y.to(device)

        # forward
        y_pred = model(x)
        loss = criterion(y_pred, y_true)

        # backward
        loss.backward()

        # update parameters
        optimizer.step()
        optimizer.zero_grad()

        # store loss and accuracy per iteration
        train_loss += loss.item() * len(x)
        train_acc.update(y_pred, y_true)

    # store loss and accuracy per epoch
    train_loss_per_epoch.append(train_loss / len(trainset))
    train_acc_per_epoch.append(train_acc.compute().item())
    train_acc.reset()

    # log
    print(
        f"epoch {epoch+1:0{len(str(num_epochs))}}/{num_epochs} -> train[loss: {train_loss_per_epoch[epoch]:7.5f} - acc: {train_acc_per_epoch[epoch]*100:5.2f}%]"
    )

    # plot
    plot()

# <a id='toc6_'></a>[Test Loop](#toc0_)


In [18]:
test_acc = MulticlassAccuracy(num_classes=n_classes, top_k=1).to(device)

In [None]:
# test loop
model.eval()
test_loss = 0
predictions = []
targets = []

with torch.no_grad():
    for x, y in testloader:

        # send data to GPU
        x, y_true = x.to(device), y.to(device)

        # forward
        y_pred = model(x)
        loss = criterion(y_pred, y_true)

        # store loss and accuracy per iteration
        test_loss += loss.item() * len(x)
        test_acc.update(y_pred, y_true)

        predictions.extend(y_pred.argmax(dim=1).cpu())
        targets.extend(y_true.cpu())

# log
print(f"test[loss: {test_loss / len(testset):.5f} - acc: {test_acc.compute().item()*100:5.2f}%]")