#### Sequential
- Simple and concise for linear architecture
- Limited to layer stacking.
- Not good with skip connections, custom models.
#### Functional API
- Flexible but requires more code.
- Allows any custom computation logic.

# 1.Sequential

In [1]:
import torch
import torch.nn as nn

In [3]:
# Define the neural network using sequential

# Input as 20 features i.e. X
# Ouput is 5 Taget variable / classes i.e. y

sequential_model = nn.Sequential(
    nn.Linear(20,64), # Input layer (20 -> 64)
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 5),
)

print(sequential_model)

input_data = torch.randn(10, 20) # 10 rows, 20 is my feature size in each row
print(input_data)
print('='*40)
output = sequential_model(input_data)   # pass data through model
print(output)


Sequential(
  (0): Linear(in_features=20, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=32, bias=True)
  (3): ReLU()
  (4): Linear(in_features=32, out_features=5, bias=True)
)
tensor([[ 2.8063e-01, -7.9068e-01, -4.8522e-01,  2.2687e+00, -7.6368e-02,
         -3.3985e-02, -7.2436e-01,  9.4620e-01, -1.0664e+00,  9.4917e-01,
          8.5967e-01, -2.0032e-01,  4.4371e-01, -6.4870e-01, -2.4266e-01,
          1.1356e+00,  2.0589e-01,  2.7088e-01, -6.2396e-01,  1.1146e+00],
        [ 1.4537e-01,  3.7606e-01,  5.5491e-01,  8.5518e-01, -1.2878e+00,
         -1.0719e-02, -2.1461e+00,  8.7564e-01,  5.3515e-01,  1.0100e+00,
          5.3628e-01,  5.7761e-01,  1.5451e+00, -3.4532e-01, -1.3994e+00,
          2.9451e-02, -9.9007e-01, -1.5339e+00,  5.4602e-01, -5.4578e-01],
        [-7.6722e-01,  1.2141e+00,  7.4831e-01,  9.0141e-01,  1.8835e-01,
         -2.2946e-01,  3.5685e-01, -5.3332e-01,  8.4887e-02,  4.5192e-01,
         -9.6046e-01, -1.1967e+00,  2.9536e

We have just passed dataset through NN once and printed output.

In [7]:
print(f"Model weights {sequential_model[0].weight.data}")  # Print all weights
print(f"Model bias {sequential_model[0].bias.data}")       # Print all bias


Model weights tensor([[ 0.1145, -0.1727,  0.0190,  ..., -0.2148, -0.0940, -0.1924],
        [-0.2204,  0.1265,  0.0167,  ..., -0.1176, -0.0264, -0.0343],
        [ 0.2199, -0.1449,  0.0154,  ..., -0.1216,  0.0190,  0.1438],
        ...,
        [ 0.1527, -0.1184,  0.1733,  ..., -0.1287,  0.0823,  0.0137],
        [-0.0205,  0.1466,  0.0523,  ..., -0.2232,  0.0900,  0.0512],
        [-0.0390, -0.1327, -0.1616,  ..., -0.0469, -0.2150, -0.2081]])
Model bias tensor([-0.1180,  0.1718, -0.1929, -0.0837,  0.0298,  0.1231, -0.0505,  0.1697,
        -0.2143, -0.0058,  0.1539,  0.0247, -0.1034,  0.0411, -0.2034, -0.0031,
         0.0463,  0.1848,  0.0988,  0.0549, -0.1261, -0.1989, -0.0619, -0.1244,
        -0.0317, -0.0805,  0.0688,  0.1641,  0.1848,  0.1699,  0.1156,  0.1284,
         0.1026,  0.0458,  0.2203,  0.1285,  0.1728, -0.1079,  0.0267, -0.2059,
         0.0299,  0.0319,  0.0068, -0.1288,  0.0695,  0.1392,  0.0008,  0.1115,
         0.1476, -0.0194,  0.1476,  0.1157, -0.0771, -0.1931,

# Functional API

In [8]:
class FunctionalModel(nn.Module):       # defined class for building model. nn.Module is inherited
  def __init__(self):
    super(FunctionalModel, self).__init__()
    self.fc1 = nn.Linear(20, 64)            # define required layers with inputs and outputs, define relu, dropout, batchnorm, etc required layers
    self.fc2 = nn.Linear(64, 32)
    self.fc3 = nn.Linear(32, 5)
    self.relu = nn.ReLU()

  def forward(self, X):                   # Define forward pass
    x = self.relu(self.fc1(X))           # Pass input data through layer 1 and layer 1 through RELU
    x = self.relu(self.fc2(x))           # Pass output of layer 1 through layer 2 and layer 2 through RELU
    x = self.fc3(x)                      #
    return x

functional_model = FunctionalModel()     # Here we build model architecture

print(functional_model)

input_data = torch.randn(10, 20) # 10 rows, 20 is my feature size in each row
#print(input_data)
output = functional_model(input_data)
print(output)

FunctionalModel(
  (fc1): Linear(in_features=20, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=5, bias=True)
  (relu): ReLU()
)
tensor([[-0.0366, -0.0435, -0.0063,  0.1008, -0.0495],
        [-0.0318, -0.0608, -0.0030,  0.0106, -0.0833],
        [ 0.0032, -0.0469, -0.0633,  0.1335,  0.0071],
        [ 0.0801, -0.0230, -0.1440,  0.0350,  0.0261],
        [ 0.0892, -0.0017, -0.0420,  0.0923, -0.1022],
        [ 0.0695, -0.0452, -0.0458,  0.1448, -0.1315],
        [ 0.0918,  0.0297, -0.0281, -0.0763, -0.1093],
        [ 0.0217,  0.0766,  0.0268,  0.0911, -0.1068],
        [ 0.0550, -0.0559, -0.0565,  0.0579, -0.0282],
        [ 0.0969, -0.0128, -0.0287, -0.1143,  0.0251]],
       grad_fn=<AddmmBackward0>)


# Multi-Class Classification with PyTorch

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [10]:
# Set device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [11]:
# Load and preprocess the dataset
iris = load_iris()
X = iris.data  # Features (numerical data)
y = iris.target  # Labels (3 classes)

In [12]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [13]:
# Convert data to tensors
X = torch.tensor(X, dtype=torch.float32).to(device)
y = torch.tensor(y, dtype=torch.long).to(device)  # Multi-class requires LongTensor for target

In [14]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
print(X_test.shape)

print("="*60)

print(X_test[:,:])
print(y_test[:])

print("="*60)
print(X_test[0,:])
print(y_test[0])

torch.Size([30, 4])
tensor([[ 3.1100e-01, -5.9237e-01,  5.3541e-01,  8.7755e-04],
        [-1.7367e-01,  1.7096e+00, -1.1697e+00, -1.1838e+00],
        [ 2.2497e+00, -1.0528e+00,  1.7858e+00,  1.4488e+00],
        [ 1.8983e-01, -3.6218e-01,  4.2173e-01,  3.9577e-01],
        [ 1.1592e+00, -5.9237e-01,  5.9225e-01,  2.6414e-01],
        [-5.3718e-01,  7.8881e-01, -1.2834e+00, -1.0522e+00],
        [-2.9484e-01, -3.6218e-01, -8.9803e-02,  1.3251e-01],
        [ 1.2803e+00,  9.8217e-02,  7.6276e-01,  1.4488e+00],
        [ 4.3217e-01, -1.9736e+00,  4.2173e-01,  3.9577e-01],
        [-5.2506e-02, -8.2257e-01,  8.0709e-02,  8.7755e-04],
        [ 7.9567e-01,  3.2841e-01,  7.6276e-01,  1.0539e+00],
        [-1.2642e+00, -1.3198e-01, -1.3402e+00, -1.4471e+00],
        [-4.1601e-01,  1.0190e+00, -1.3971e+00, -1.3154e+00],
        [-1.1430e+00,  9.8217e-02, -1.2834e+00, -1.4471e+00],
        [-9.0068e-01,  1.7096e+00, -1.2834e+00, -1.1838e+00],
        [ 5.5333e-01,  5.5861e-01,  5.3541e-01,  5

**A. Lets build basic neural network**

In [16]:
# Define the neural network model
class NeuralNetBasic(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNetBasic, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # First fully connected layer
        self.relu = nn.ReLU()  # Activation function
        self.fc2 = nn.Linear(hidden_size, num_classes)  # Output layer for classification

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Model parameters
input_size = X_train.shape[1]  # Number of features (4 for Iris)
hidden_size = 16  # Arbitrary hidden layer size
num_classes = 3  # Number of output classes (3 for Iris)

# Instantiate the model
model = NeuralNetBasic(input_size, hidden_size, num_classes).to(device)   # build model and send it to device (CPU/GPU)

**B. Lets build advanced neural network**

(More number of neurons and with drop-out layers after hidden layer)

As we are chosing cross entropy as loss function it automatically selects softmax as activation function in output layer

(No need to define activation function in output layer)

In [17]:
# Neural network model with multiple layers
class NeuralNetAdvance(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, num_classes):
        super(NeuralNetAdvance, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)  # First hidden layer
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)  # Second hidden layer
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)  # Third hidden layer
        self.fc4 = nn.Linear(hidden_size3, num_classes)  # Output layer
        self.relu = nn.ReLU()  # ReLU activation function
        self.dropout1 = nn.Dropout(p=0.2)  # Dropout for regularization
        self.dropout2 = nn.Dropout(p=0.5)  # Dropout for regularization

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout1(x)  # Apply dropout to the first hidden layer

        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout2(x)  # Apply dropout to the second hidden layer

        x = self.fc3(x)
        x = self.relu(x)

        x = self.fc4(x)  # Output layer (no activation, as CrossEntropyLoss applies softmax)
        return x

# Model parameters
input_size = X_train.shape[1]  # Number of features (4 for Iris)
hidden_size1 = 32  # First hidden layer size
hidden_size2 = 64  # Second hidden layer size
hidden_size3 = 32  # Third hidden layer size
num_classes = 3  # Number of output classes (3 for Iris)

# Instantiate the model
model = NeuralNetAdvance(input_size, hidden_size1, hidden_size2, hidden_size3, num_classes).to(device)


In [18]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Suitable for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

In [19]:
# Train the model
num_epochs = 100  # Number of training iterations
batch_size = 16  # Batch size for training

In [20]:
def train_model(X_train, y_train):
    model.train()
    for epoch in range(num_epochs):
        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        # Backward pass and optimization
        optimizer.zero_grad()  # Clear gradients
        loss.backward()  # Backpropagation
        optimizer.step()  # Update model parameters

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# How the Model Handles Batches?

In the forward pass, PyTorch automatically handles multiple inputs (batches). This happens because the operations like matrix multiplication, addition, activation functions, etc., are all vectorized, meaning they are performed on the entire batch simultaneously.

In [21]:
# Train the model on the training data
train_model(X_train, y_train)

Epoch [10/100], Loss: 1.0625
Epoch [20/100], Loss: 1.0004
Epoch [30/100], Loss: 0.8940
Epoch [40/100], Loss: 0.7656
Epoch [50/100], Loss: 0.5939
Epoch [60/100], Loss: 0.4593
Epoch [70/100], Loss: 0.3865
Epoch [80/100], Loss: 0.3333
Epoch [90/100], Loss: 0.2856
Epoch [100/100], Loss: 0.2343


In [22]:
# Evaluate the model
model.eval()  # Set model to evaluation mode (no gradients)

NeuralNetAdvance(
  (fc1): Linear(in_features=4, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=3, bias=True)
  (relu): ReLU()
  (dropout1): Dropout(p=0.2, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
)

In [23]:
with torch.no_grad():  # No need to compute gradients during testing
    test_outputs = model(X_test)
    _, predicted = torch.max(test_outputs, 1)  # Get the class with highest probability
    accuracy = (predicted == y_test).sum().item() / y_test.size(0)
    print(f'Accuracy on the test set: {accuracy * 100:.2f}%')

Accuracy on the test set: 96.67%


In [24]:
# Example of prediction on new data
new_data = torch.tensor([[5.1, 3.5, 1.4, 0.2], [6.5, 3.0, 5.5, 1.8]], dtype=torch.float32).to(device)
new_data = torch.tensor(scaler.transform(new_data.cpu()), dtype=torch.float32).to(device)
print(new_data)

tensor([[-0.9007,  1.0190, -1.3402, -1.3154],
        [ 0.7957, -0.1320,  0.9901,  0.7907]])


* scaler.transform(new_data.cpu()) scales the data and returns a NumPy array.
* torch.tensor(...) converts the NumPy array back to a PyTorch tensor.
* .to(device) moves the tensor to the appropriate device (CPU or GPU).

In [25]:
with torch.no_grad():
    predictions = model(new_data)
    _, predicted_classes = torch.max(predictions, 1)
    print("Predicted classes for new data:", predicted_classes.cpu().numpy())

Predicted classes for new data: [0 2]
