In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
x = np.random.normal(0, 1, 5000)                  #creating 5000 data points (X) 5000 with a normal distribution N(0, 1)
eps = np.random.normal(0, 0.25, 5000)             #creating a vector(eps) with 5000 observation drawn with a normal distribution N(0, 0.25)
y = -1 + 0.5 * x - 2 * x**2 + 0.3 * x**3 + eps    #Generating the given Y using vectors X and eps

Plotting the input data

In [7]:
import plotly.graph_objects as go

data_trace = go.Scatter(x=x, y=y, mode='markers')
fig = go.Figure(data=[data_trace])
fig.show()

# Adaline and Sigmoid Models

In [8]:
class Adaline(nn.Module):                   # Define the Adaline class
    def __init__(self):
        super(Adaline, self).__init__()     # Call the parent class
        self.linear = nn.Linear(1, 1)       # Create a linear layer

    def forward(self, x):                   # Define the forward propagation function
        return self.linear(x)               # calculate the weighted sum of inputs plus the bias term


class SigmoidNeuron(nn.Module):                   # Define the SigmoidNeuron class
    def __init__(self):
        super(SigmoidNeuron, self).__init__()     # Call the parent class
        self.linear = nn.Linear(1, 1)             # Create a linear layer
        self.sigmoid = nn.Sigmoid()               # Create a Sigmoid function layer

    def forward(self, x):                         # Define the forward propagation function
        x = self.linear(x)                        # calculate the weighted sum of inputs plus the bias term
        x = self.sigmoid(x)
        return x

## Checking models

In [9]:
X_tensor = torch.from_numpy(x.reshape(-1, 1)).float()  # Convert the numpy array to a PyTorch tensor
y_tensor = torch.from_numpy(y.reshape(-1, 1)).float()  # Convert the numpy array to a PyTorch tensor

In [10]:
model_adaline_zero = Adaline()                                     # Instantiate the Adaline model
criterion = nn.MSELoss()                                           # Define the loss function
optimizer = optim.SGD(model_adaline_zero.parameters(), lr=0.01)    # Define the optimization algorithm

In [11]:
loss_memory = []

for epoch in range(1000):
    # Forward pass
    outputs = model_adaline_zero(X_tensor)                         # compute the predicted outputs
    loss = criterion(outputs, y_tensor)                            # Compute the loss
    loss_memory.append(loss.item())

    # Backward pass and optimization
    optimizer.zero_grad()                                          # Zero the gradients to reset
    loss.backward()                                                # compute the gradient of the loss
    optimizer.step()

    # Print the loss after each epoch
    print(f"Epoch {epoch+1}/{1000}, Loss: {loss.item()}")

Epoch 1/1000, Loss: 14.3125
Epoch 2/1000, Loss: 14.063434600830078
Epoch 3/1000, Loss: 13.824137687683105
Epoch 4/1000, Loss: 13.59422492980957
Epoch 5/1000, Loss: 13.37332534790039
Epoch 6/1000, Loss: 13.161090850830078
Epoch 7/1000, Loss: 12.95717716217041
Epoch 8/1000, Loss: 12.761260032653809
Epoch 9/1000, Loss: 12.57302474975586
Epoch 10/1000, Loss: 12.392171859741211
Epoch 11/1000, Loss: 12.218411445617676
Epoch 12/1000, Loss: 12.051464080810547
Epoch 13/1000, Loss: 11.891061782836914
Epoch 14/1000, Loss: 11.736949920654297
Epoch 15/1000, Loss: 11.588882446289062
Epoch 16/1000, Loss: 11.446619987487793
Epoch 17/1000, Loss: 11.3099365234375
Epoch 18/1000, Loss: 11.17861270904541
Epoch 19/1000, Loss: 11.052436828613281
Epoch 20/1000, Loss: 10.931208610534668
Epoch 21/1000, Loss: 10.81473445892334
Epoch 22/1000, Loss: 10.702827453613281
Epoch 23/1000, Loss: 10.595307350158691
Epoch 24/1000, Loss: 10.49200439453125
Epoch 25/1000, Loss: 10.392753601074219
Epoch 26/1000, Loss: 10.29739

In [12]:
data_trace = go.Scatter(x=np.arange(1000), y=loss_memory, mode='markers')
fig = go.Figure(data=[data_trace])

fig.update_layout(
    xaxis_title="epochs",
    yaxis_title="loss"
)

fig.show()

# Adaline
## Stochastic Gradient Descent

In [13]:
from sklearn.model_selection import KFold

criterion = nn.MSELoss()                      # Define the loss function
kfold = KFold(n_splits=10, shuffle=True)      # Initialize a KFold object

In [14]:
eval_results = []                # for storing the test loss for each fold
epoch_num = 10

for fold, (train_indices, test_indices) in enumerate(kfold.split(x)):
    print(f"Fold: {fold + 1}")
    
    # creating the indexes for the i fold
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    # converting the data to tensors
    X_train_tensor = torch.from_numpy(x_train.reshape(-1, 1)).float()
    X_test_tensor = torch.from_numpy(x_test.reshape(-1, 1)).float()
    y_train_tensor = torch.from_numpy(y_train.reshape(-1, 1)).float()
    y_test_tensor = torch.from_numpy(y_test.reshape(-1, 1)).float()
    
    # creating the dataloader to help us implement SGD and BGD
    dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

    # creating the Adaline model
    model_adaline = Adaline()
    optimizer = torch.optim.SGD(model_adaline.parameters(), lr=0.01)

    for epoch in range(epoch_num):                            # Begin the training process
        for inputs, targets in data_loader:
            # Forward pass
            outputs = model_adaline(inputs)
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            break

    # Evaluate the model on the test set
    with torch.no_grad():
        model_adaline.eval()
        test_outputs = model_adaline(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor)
        eval_results.append(test_loss.item())

    print(f"Test Loss: {test_loss.item()}")

# Calculate and print the average test loss across all folds
print("Average Test Loss:", sum(eval_results) / epoch_num)

Fold: 1
Test Loss: 11.917847633361816
Fold: 2
Test Loss: 12.234872817993164
Fold: 3
Test Loss: 15.90283203125
Fold: 4
Test Loss: 10.124515533447266
Fold: 5
Test Loss: 9.21599292755127
Fold: 6
Test Loss: 18.89227294921875
Fold: 7
Test Loss: 12.119914054870605
Fold: 8
Test Loss: 16.614957809448242
Fold: 9
Test Loss: 22.531484603881836
Fold: 10
Test Loss: 17.454965591430664
Average Test Loss: 14.70096559524536


In [15]:
data_trace = go.Bar(x=np.arange(10), y=eval_results)
fig = go.Figure(data=[data_trace])

fig.update_layout(
    title='Loss for Adaline with SGD',
    xaxis_title="epoch number",
    yaxis_title="loss"
)

fig.show()

# Adaline
## Batch Gradient Descent (Default GD)

In [16]:
eval_results = []
epoch_num = 10

for fold, (train_indices, test_indices) in enumerate(kfold.split(x)):
    print(f"Fold: {fold + 1}")

    # creating the indexes for the i fold
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    # converting the data to tensors
    X_train_tensor = torch.from_numpy(x_train.reshape(-1, 1)).float()
    X_test_tensor = torch.from_numpy(x_test.reshape(-1, 1)).float()
    y_train_tensor = torch.from_numpy(y_train.reshape(-1, 1)).float()
    y_test_tensor = torch.from_numpy(y_test.reshape(-1, 1)).float()
    
    # creating the dataloader to help us implement SGD and BGD
    dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=len(X_train_tensor), shuffle=True)

    # creating the Adaline model
    model_adaline = Adaline()
    optimizer = torch.optim.SGD(model_adaline.parameters(), lr=0.01)

    for epoch in range(epoch_num):
        for inputs, targets in data_loader:
            # Forward pass
            outputs = model_adaline(inputs)
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Evaluate the model on the test set
    with torch.no_grad():
        model_adaline.eval()
        test_outputs = model_adaline(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor)
        eval_results.append(test_loss.item())

    print(f"Test Loss: {test_loss.item()}")

# Calculate and print the average test loss across all folds
print("Average Test Loss:", sum(eval_results) / epoch_num)

Fold: 1
Test Loss: 16.924732208251953
Fold: 2
Test Loss: 23.719890594482422
Fold: 3
Test Loss: 15.573141098022461
Fold: 4
Test Loss: 19.078615188598633
Fold: 5
Test Loss: 18.63629913330078
Fold: 6
Test Loss: 10.803889274597168
Fold: 7
Test Loss: 14.597661018371582
Fold: 8
Test Loss: 12.209345817565918
Fold: 9
Test Loss: 12.092461585998535
Fold: 10
Test Loss: 16.918485641479492
Average Test Loss: 16.055452156066895


In [17]:
data_trace = go.Bar(x=np.arange(10), y=eval_results)
fig = go.Figure(data=[data_trace])

fig.update_layout(
    title='Loss for Adaline with Batch GD',
    xaxis_title="epoch number",
    yaxis_title="loss"
)

fig.show()

# Sigmoid Neuron
## Stochastic Gradient Descent

In [18]:
eval_results = []
epoch_num = 10

for fold, (train_indices, test_indices) in enumerate(kfold.split(x)):
    print(f"Fold: {fold + 1}")
    
    # creating the indexes for the i fold
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    # converting the data to tensors
    X_train_tensor = torch.from_numpy(x_train.reshape(-1, 1)).float()
    X_test_tensor = torch.from_numpy(x_test.reshape(-1, 1)).float()
    y_train_tensor = torch.from_numpy(y_train.reshape(-1, 1)).float()
    y_test_tensor = torch.from_numpy(y_test.reshape(-1, 1)).float()
    
    # creating the dataloader to help us implement SGD and BGD
    dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

    # creating the Adaline model
    model_sigmoid = SigmoidNeuron()
    optimizer = torch.optim.SGD(model_sigmoid.parameters(), lr=0.01)

    for epoch in range(epoch_num):
        for inputs, targets in data_loader:
            # Forward pass
            outputs = model_sigmoid(inputs)
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            break

    # Evaluate the model on the test set
    with torch.no_grad():
        model_sigmoid.eval()
        test_outputs = model_sigmoid(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor)
        eval_results.append(test_loss.item())

    print(f"Test Loss: {test_loss.item()}")

# Calculate and print the average test loss across all folds
print("Average Test Loss:", sum(eval_results) / epoch_num)

Fold: 1
Test Loss: 18.717405319213867
Fold: 2
Test Loss: 20.688501358032227
Fold: 3
Test Loss: 25.538820266723633
Fold: 4
Test Loss: 20.75914764404297
Fold: 5
Test Loss: 24.09957504272461
Fold: 6
Test Loss: 20.33806037902832
Fold: 7
Test Loss: 24.509180068969727
Fold: 8
Test Loss: 22.918550491333008
Fold: 9
Test Loss: 20.004432678222656
Fold: 10
Test Loss: 19.410457611083984
Average Test Loss: 21.6984130859375


In [19]:
data_trace = go.Bar(x=np.arange(10), y=eval_results)
fig = go.Figure(data=[data_trace])

fig.update_layout(
    title='Loss for Sigmoid model with SGD',
    xaxis_title="epoch number",
    yaxis_title="loss"
)

fig.show()

# Sigmoid Neuron
## Batch Gradient Descent (Default GD)

In [20]:
eval_results = []
epoch_num = 10

for fold, (train_indices, test_indices) in enumerate(kfold.split(x)):
    print(f"Fold: {fold + 1}")

    # creating the indexes for the i fold
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    # converting the data to tensors
    X_train_tensor = torch.from_numpy(x_train.reshape(-1, 1)).float()
    X_test_tensor = torch.from_numpy(x_test.reshape(-1, 1)).float()
    y_train_tensor = torch.from_numpy(y_train.reshape(-1, 1)).float()
    y_test_tensor = torch.from_numpy(y_test.reshape(-1, 1)).float()
    
    # creating the dataloader to help us implement SGD and BGD
    dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=len(X_train_tensor), shuffle=True)

    # creating the Adaline model
    model_sigmoid = SigmoidNeuron()
    optimizer = torch.optim.SGD(model_sigmoid.parameters(), lr=0.01)

    for epoch in range(epoch_num):
        for inputs, targets in data_loader:
            # Forward pass
            outputs = model_sigmoid(inputs)
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Evaluate the model on the test set
    with torch.no_grad():
        model_sigmoid.eval()
        test_outputs = model_sigmoid(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor)
        eval_results.append(test_loss.item())

    print(f"Test Loss: {test_loss.item()}")

# Calculate and print the average test loss across all folds
print("Average Test Loss:", sum(eval_results) / epoch_num)

Fold: 1
Test Loss: 21.853601455688477
Fold: 2
Test Loss: 21.815397262573242
Fold: 3
Test Loss: 21.728952407836914
Fold: 4
Test Loss: 18.273632049560547
Fold: 5
Test Loss: 20.3897762298584
Fold: 6
Test Loss: 19.44029426574707
Fold: 7
Test Loss: 17.577478408813477
Fold: 8
Test Loss: 26.689802169799805
Fold: 9
Test Loss: 23.03757667541504
Fold: 10
Test Loss: 25.13196563720703
Average Test Loss: 21.59384765625


In [21]:
data_trace = go.Bar(x=np.arange(10), y=eval_results)
fig = go.Figure(data=[data_trace])

fig.update_layout(
    title='Loss for Sigmoid model with Batch GD',
    xaxis_title="epoch number",
    yaxis_title="loss"
)

fig.show()