<a href="https://colab.research.google.com/github/neel26desai/cmpe258_neural_network_advanced/blob/main/Neural_Networks_with_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

In [None]:
np.random.seed(42)

# Defining the data
n, d = 1000, 3  # 1000 records, 3 features

# Creating x, values will be between -1 and 1 and the shape will be n, d
x = np.random.uniform(-1, 1, (n, d))

# Creating weights that allow us to get the y_true values
weights_true = np.array([[1], [3], [1]], dtype=np.float32)
bias_true = np.array([1], dtype=np.float32)

# Creating an equation y_true using np.dot for matrix multiplication
y_true = np.dot(x**3, weights_true) + \
         np.dot(x**2, weights_true) + \
         np.dot(x, weights_true) + bias_true

print(f'x: {x.shape}, weights: {weights_true.shape}, bias: {bias_true.shape}, y: {y_true.shape}')


x: (1000, 3), weights: (3, 1), bias: (1,), y: (1000, 1)


In [None]:
#create a train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y_true, test_size=0.2, random_state=42)

# Base Model

In [None]:
import torch.nn as nn

# Define the model
model = nn.Sequential(
  nn.Linear(3, 128),  # First hidden layer
  nn.ReLU(),  # Activation function
  nn.Linear(128, 64),  # Second hidden layer
  nn.ReLU(),  # Activation function
  nn.Linear(64, 1)  # Output layer
)

# Print the model
print(model)


Sequential(
  (0): Linear(in_features=3, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=1, bias=True)
)


In [7]:
# Define the loss function
loss_fn = nn.L1Loss()

# Define the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

# Train the model
for epoch in range(10):
  model.train()  # Set the model to training mode
  y_pred = model(torch.tensor(X_train, dtype=torch.float32))
  loss = loss_fn(y_pred, torch.tensor(y_train, dtype=torch.float32))

  optimizer.zero_grad()  # Clear the gradients
  loss.backward()  # Compute the gradients
  optimizer.step()  # Update the parameters

  # Validate the model
  model.eval()  # Set the model to evaluation mode
  with torch.inference_mode():
    y_pred_test = model(torch.tensor(X_test, dtype=torch.float32))
    loss_test = loss_fn(y_pred_test, torch.tensor(y_test, dtype=torch.float32))

    print(f'Epoch: {epoch}, Train Loss: {loss:.3f}, Test Loss: {loss_test:.3f}')



Epoch: 0, Train Loss: 0.328, Test Loss: 0.414
Epoch: 1, Train Loss: 0.417, Test Loss: 0.277
Epoch: 2, Train Loss: 0.300, Test Loss: 0.401
Epoch: 3, Train Loss: 0.398, Test Loss: 0.312
Epoch: 4, Train Loss: 0.338, Test Loss: 0.428
Epoch: 5, Train Loss: 0.430, Test Loss: 0.279
Epoch: 6, Train Loss: 0.302, Test Loss: 0.379
Epoch: 7, Train Loss: 0.374, Test Loss: 0.317
Epoch: 8, Train Loss: 0.347, Test Loss: 0.434
Epoch: 9, Train Loss: 0.432, Test Loss: 0.297


#L1 Regularization

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the model
model = nn.Sequential(
    nn.Linear(3, 128),  # First hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(128, 64),  # Second hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(64, 1)  # Output layer
)

# Parameters
l1_lambda = 0.01
loss_fn = nn.L1Loss()
optimizer = optim.SGD(model.parameters(), lr=0.01)  # Define the optimizer

# Convert training and testing data to tensors once, assuming X_train, y_train, X_test, y_test are defined
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Calculate L1 regularization dynamically
    l1_reg = l1_lambda * sum(p.abs().sum() for p in model.parameters())

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor) + l1_reg

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor) + l1_reg

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')


Epoch: 0, Train Loss: 8.421, Test Loss: 8.107
Epoch: 1, Train Loss: 8.400, Test Loss: 8.088
Epoch: 2, Train Loss: 8.380, Test Loss: 8.070
Epoch: 3, Train Loss: 8.360, Test Loss: 8.052
Epoch: 4, Train Loss: 8.341, Test Loss: 8.034
Epoch: 5, Train Loss: 8.321, Test Loss: 8.016
Epoch: 6, Train Loss: 8.301, Test Loss: 7.999
Epoch: 7, Train Loss: 8.282, Test Loss: 7.981
Epoch: 8, Train Loss: 8.263, Test Loss: 7.964
Epoch: 9, Train Loss: 8.244, Test Loss: 7.947


# L2

In [12]:
# Define the model
model = nn.Sequential(
    nn.Linear(3, 128),  # First hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(128, 64),  # Second hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(64, 1)  # Output layer
)

# Parameters
l2_lambda = 0.01
loss_fn = nn.MSELoss()  # Use MSE loss for L2 regularization
optimizer = optim.SGD(model.parameters(), lr=0.01)  # Define the optimizer

# Convert training and testing data to tensors once, assuming X_train, y_train, X_test, y_test are defined
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Calculate L2 regularization dynamically
    l2_reg = l2_lambda / 2 * sum(p.pow(2).sum() for p in model.parameters())

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor) + l2_reg

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor) + l2_reg

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')


Epoch: 0, Train Loss: 19.683, Test Loss: 15.497
Epoch: 1, Train Loss: 18.601, Test Loss: 14.634
Epoch: 2, Train Loss: 17.582, Test Loss: 13.792
Epoch: 3, Train Loss: 16.576, Test Loss: 12.935
Epoch: 4, Train Loss: 15.543, Test Loss: 12.044
Epoch: 5, Train Loss: 14.459, Test Loss: 11.130
Epoch: 6, Train Loss: 13.332, Test Loss: 10.221
Epoch: 7, Train Loss: 12.196, Test Loss: 9.354
Epoch: 8, Train Loss: 11.094, Test Loss: 8.555
Epoch: 9, Train Loss: 10.066, Test Loss: 7.834


In [13]:
# Define the model
model = nn.Sequential(
    nn.Linear(3, 128),  # First hidden layer
    nn.ReLU(),  # Activation function
    nn.Dropout(p=0.2),  # Dropout layer with probability 0.2
    nn.Linear(128, 64),  # Second hidden layer
    nn.ReLU(),  # Activation function
    nn.Dropout(p=0.2),  # Dropout layer with probability 0.2
    nn.Linear(64, 1)  # Output layer
)

# Parameters
l2_lambda = 0.01
loss_fn = nn.MSELoss()  # Use MSE loss for L2 regularization
optimizer = optim.SGD(model.parameters(), lr=0.01)  # Define the optimizer

# Convert training and testing data to tensors once, assuming X_train, y_train, X_test, y_test are defined
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Calculate L2 regularization dynamically
    l2_reg = l2_lambda / 2 * sum(p.pow(2).sum() for p in model.parameters())

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor) + l2_reg

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor) + l2_reg

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')



Epoch: 0, Train Loss: 18.384, Test Loss: 14.341
Epoch: 1, Train Loss: 17.293, Test Loss: 13.447
Epoch: 2, Train Loss: 16.182, Test Loss: 12.542
Epoch: 3, Train Loss: 15.152, Test Loss: 11.622
Epoch: 4, Train Loss: 13.894, Test Loss: 10.659
Epoch: 5, Train Loss: 12.827, Test Loss: 9.730
Epoch: 6, Train Loss: 11.796, Test Loss: 8.852
Epoch: 7, Train Loss: 10.813, Test Loss: 8.048
Epoch: 8, Train Loss: 9.596, Test Loss: 7.309
Epoch: 9, Train Loss: 8.714, Test Loss: 6.648


Early Stop

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the model with dropout layers
model = nn.Sequential(
    nn.Linear(3, 128),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(64, 1)
)

# Loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Early stopping parameters
patience = 2
best_loss = float('inf')
trigger_times = 0

# Training loop with early stopping
for epoch in range(1500):  # Example with a higher number of maximum epochs
    model.train()
    optimizer.zero_grad()
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)
    loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor)

    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')

    # Check if the current loss is lower than the best one seen so far
    if test_loss < best_loss or test_loss<1:
        best_loss = test_loss
        trigger_times = 0  # reset the patience trigger
        # Save the best model, if needed
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        trigger_times += 1
        if trigger_times >= patience:
            print(f"Early stopping triggered at epoch {epoch} with test loss {test_loss:.3f}")
            break


Epoch: 0, Train Loss: 19.713, Test Loss: 15.080
Epoch: 1, Train Loss: 18.364, Test Loss: 14.042
Epoch: 2, Train Loss: 17.024, Test Loss: 13.046
Epoch: 3, Train Loss: 15.798, Test Loss: 12.070
Epoch: 4, Train Loss: 14.707, Test Loss: 11.131
Epoch: 5, Train Loss: 13.454, Test Loss: 10.215
Epoch: 6, Train Loss: 12.314, Test Loss: 9.354
Epoch: 7, Train Loss: 11.497, Test Loss: 8.589
Epoch: 8, Train Loss: 10.574, Test Loss: 7.911
Epoch: 9, Train Loss: 9.689, Test Loss: 7.277
Epoch: 10, Train Loss: 9.318, Test Loss: 6.695
Epoch: 11, Train Loss: 8.510, Test Loss: 6.099
Epoch: 12, Train Loss: 7.550, Test Loss: 5.548
Epoch: 13, Train Loss: 7.568, Test Loss: 5.023
Epoch: 14, Train Loss: 6.694, Test Loss: 4.530
Epoch: 15, Train Loss: 6.338, Test Loss: 4.095
Epoch: 16, Train Loss: 5.745, Test Loss: 3.701
Epoch: 17, Train Loss: 5.678, Test Loss: 3.369
Epoch: 18, Train Loss: 5.485, Test Loss: 3.053
Epoch: 19, Train Loss: 5.145, Test Loss: 2.805
Epoch: 20, Train Loss: 4.957, Test Loss: 2.613
Epoch: 2

#Monte Carlo Dropout

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define a custom dropout module that stays active during evaluation
class MCDropout(nn.Module):
    def __init__(self, p=0.5):
        super(MCDropout, self).__init__()
        self.p = p

    def forward(self, x):
        if self.training or not self.training:
            return F.dropout(x, p=self.p, training=True)
        return x

# Define the model using the custom MCDropout
model = nn.Sequential(
    nn.Linear(3, 128),
    nn.ReLU(),
    MCDropout(0.5),
    nn.Linear(128, 64),
    nn.ReLU(),
    MCDropout(0.5),
    nn.Linear(64, 1)
)

# Example of how to perform MC Dropout during inference
def mc_dropout_inference(model, input_tensor, iterations=100):
    model.train()  # Keep the dropout layers active
    predictions = [model(input_tensor) for _ in range(iterations)]
    predictions = torch.stack(predictions)
    mean_predictions = predictions.mean(0)
    std_predictions = predictions.std(0)
    return mean_predictions, std_predictions

# Example usage
model.eval()  # Not strictly necessary here since dropout is always active
mean_preds, std_preds = mc_dropout_inference(model, X_test_tensor)

print("Mean predictions:", mean_preds)
print("Standard deviation of predictions:", std_preds)


Mean predictions: tensor([[-0.1195],
        [-0.1349],
        [-0.0844],
        [-0.1241],
        [-0.1131],
        [-0.0887],
        [-0.1240],
        [-0.0955],
        [-0.0992],
        [-0.1021],
        [-0.1148],
        [-0.1645],
        [-0.0394],
        [-0.1072],
        [-0.1326],
        [-0.1387],
        [-0.1096],
        [-0.0549],
        [-0.0638],
        [-0.1221],
        [-0.1724],
        [-0.2202],
        [-0.1111],
        [-0.1143],
        [-0.1509],
        [-0.1692],
        [-0.1865],
        [-0.1004],
        [-0.0733],
        [-0.2221],
        [-0.0914],
        [-0.1570],
        [-0.1877],
        [-0.1072],
        [-0.0946],
        [-0.1353],
        [-0.1337],
        [-0.1299],
        [-0.1479],
        [-0.0997],
        [-0.1185],
        [-0.0699],
        [-0.1251],
        [-0.0856],
        [-0.0730],
        [-0.0547],
        [-0.1337],
        [-0.1142],
        [-0.0701],
        [-0.1393],
        [-0.1002],
        [-0.1

# Weight Initialization

## Constant/ Zero weight Initialization

In [24]:
# Define the model
model = nn.Sequential(
    nn.Linear(3, 128),  # First hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(128, 64),  # Second hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(64, 1)  # Output layer
)

# Initialize weights and biases with zeros
for layer in model.modules():
    if isinstance(layer, nn.Linear):
        nn.init.constant_(layer.weight, 0)
        nn.init.constant_(layer.bias, 0)
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    #ms

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor)

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')



Epoch: 0, Train Loss: 18.756, Test Loss: 15.308
Epoch: 1, Train Loss: 18.459, Test Loss: 15.065
Epoch: 2, Train Loss: 18.173, Test Loss: 14.833
Epoch: 3, Train Loss: 17.899, Test Loss: 14.611
Epoch: 4, Train Loss: 17.636, Test Loss: 14.398
Epoch: 5, Train Loss: 17.383, Test Loss: 14.195
Epoch: 6, Train Loss: 17.140, Test Loss: 14.000
Epoch: 7, Train Loss: 16.906, Test Loss: 13.814
Epoch: 8, Train Loss: 16.682, Test Loss: 13.637
Epoch: 9, Train Loss: 16.467, Test Loss: 13.466


## Uniform


In [27]:


# Define the model
model = nn.Sequential(
    nn.Linear(3, 128),  # First hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(128, 64),  # Second hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(64, 1)  # Output layer
)

# Initialize weights and biases with Uniform Distribution
for layer in model.modules():
    if isinstance(layer, nn.Linear):
        nn.init.uniform_(layer.weight, -0.5, 0.5)
        nn.init.uniform_(layer.bias, -0.5, 0.5)

# Define the loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor)

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')




Epoch: 0, Train Loss: 23.377, Test Loss: 10.077
Epoch: 1, Train Loss: 11.306, Test Loss: 7.638
Epoch: 2, Train Loss: 8.730, Test Loss: 5.915
Epoch: 3, Train Loss: 6.845, Test Loss: 4.610
Epoch: 4, Train Loss: 5.406, Test Loss: 3.616
Epoch: 5, Train Loss: 4.292, Test Loss: 2.859
Epoch: 6, Train Loss: 3.428, Test Loss: 2.293
Epoch: 7, Train Loss: 2.769, Test Loss: 1.875
Epoch: 8, Train Loss: 2.273, Test Loss: 1.572
Epoch: 9, Train Loss: 1.904, Test Loss: 1.355


## Normal

In [28]:
# Define the model
model = nn.Sequential(
    nn.Linear(3, 128),  # First hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(128, 64),  # Second hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(64, 1)  # Output layer
)

# Initialize weights and biases with Normal Distribution
for layer in model.modules():
    if isinstance(layer, nn.Linear):
        nn.init.normal_(layer.weight, mean=0, std=0.01)
        nn.init.normal_(layer.bias, mean=0, std=0.01)

# Define the loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor)

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')



Epoch: 0, Train Loss: 18.826, Test Loss: 15.362
Epoch: 1, Train Loss: 18.523, Test Loss: 15.115
Epoch: 2, Train Loss: 18.232, Test Loss: 14.878
Epoch: 3, Train Loss: 17.953, Test Loss: 14.653
Epoch: 4, Train Loss: 17.685, Test Loss: 14.437
Epoch: 5, Train Loss: 17.428, Test Loss: 14.230
Epoch: 6, Train Loss: 17.182, Test Loss: 14.032
Epoch: 7, Train Loss: 16.945, Test Loss: 13.843
Epoch: 8, Train Loss: 16.717, Test Loss: 13.663
Epoch: 9, Train Loss: 16.499, Test Loss: 13.490


##Xavier/Glorot Initialization

In [30]:
# Define the model
model = nn.Sequential(
    nn.Linear(3, 128),  # First hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(128, 64),  # Second hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(64, 1)  # Output layer
)

# Initialize weights and biases with Xavier/Glorot Initialization
for layer in model.modules():
    if isinstance(layer, nn.Linear):
        nn.init.xavier_uniform_(layer.weight)
        #nn.init.xavier_uniform_(layer.bias)

# Define the loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor)

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')


Epoch: 0, Train Loss: 19.201, Test Loss: 13.669
Epoch: 1, Train Loss: 16.529, Test Loss: 12.090
Epoch: 2, Train Loss: 14.642, Test Loss: 10.893
Epoch: 3, Train Loss: 13.162, Test Loss: 9.926
Epoch: 4, Train Loss: 11.927, Test Loss: 9.155
Epoch: 5, Train Loss: 10.906, Test Loss: 8.553
Epoch: 6, Train Loss: 10.082, Test Loss: 8.073
Epoch: 7, Train Loss: 9.415, Test Loss: 7.663
Epoch: 8, Train Loss: 8.858, Test Loss: 7.287
Epoch: 9, Train Loss: 8.368, Test Loss: 6.920


## He Initialization

In [32]:


# Define the model
model = nn.Sequential(
    nn.Linear(3, 128),  # First hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(128, 64),  # Second hidden layer
    nn.ReLU(),  # Activation function
    nn.Linear(64, 1)  # Output layer
)

# Initialize weights and biases with He Initialization
for layer in model.modules():
    if isinstance(layer, nn.Linear):
        nn.init.kaiming_uniform_(layer.weight)
        #nn.init.kaiming_uniform_(layer.bias)

# Define the loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor)

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')



Epoch: 0, Train Loss: 21.994, Test Loss: 6.157
Epoch: 1, Train Loss: 7.179, Test Loss: 3.608
Epoch: 2, Train Loss: 4.024, Test Loss: 2.349
Epoch: 3, Train Loss: 2.628, Test Loss: 1.591
Epoch: 4, Train Loss: 1.834, Test Loss: 1.160
Epoch: 5, Train Loss: 1.379, Test Loss: 0.923
Epoch: 6, Train Loss: 1.119, Test Loss: 0.789
Epoch: 7, Train Loss: 0.966, Test Loss: 0.712
Epoch: 8, Train Loss: 0.870, Test Loss: 0.665
Epoch: 9, Train Loss: 0.807, Test Loss: 0.633


#Batch Normalization

In [33]:
# Define the model with batch normalization
model = nn.Sequential(
    nn.Linear(3, 128),
    nn.BatchNorm1d(128),  # Batch normalization after first linear layer
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.BatchNorm1d(64),  # Batch normalization after second linear layer
    nn.ReLU(),
    nn.Linear(64, 1)
)

# Define the loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(10):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Forward pass and loss computation
    y_pred = model(X_train_tensor)
    loss = loss_fn(y_pred, y_train_tensor)

    # Backward pass and optimization
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the parameters

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference mode, no gradient tracking
        y_pred_test = model(X_test_tensor)
        test_loss = loss_fn(y_pred_test, y_test_tensor)

    # Print losses
    print(f'Epoch: {epoch}, Train Loss: {loss.item():.3f}, Test Loss: {test_loss.item():.3f}')


Epoch: 0, Train Loss: 20.253, Test Loss: 14.430
Epoch: 1, Train Loss: 10.908, Test Loss: 13.049
Epoch: 2, Train Loss: 6.110, Test Loss: 11.797
Epoch: 3, Train Loss: 3.254, Test Loss: 10.739
Epoch: 4, Train Loss: 1.725, Test Loss: 9.909
Epoch: 5, Train Loss: 0.991, Test Loss: 9.283
Epoch: 6, Train Loss: 0.654, Test Loss: 8.800
Epoch: 7, Train Loss: 0.490, Test Loss: 8.400
Epoch: 8, Train Loss: 0.400, Test Loss: 8.045
Epoch: 9, Train Loss: 0.342, Test Loss: 7.709
