In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# torch.nn module
The torch.nn module in PyTorch is a core library that provides a wide array of classes and
functions designed to help developers build neural networks efficiently and effectively. It
abstracts the complexity of creating and training neural networks by offering pre-built layers,
loss functions, activation functions, and other utilities, enabling you to focus on designing and
experimenting with model architectures.

## Key Components of torch.nn:
- Modules (Layers):
  - nn.Module: The base class for all neural network modules. Your custom models and
  layers should subclass this class.
  - Common Layers: Includes layers like nn.Linear (fully connected layer), nn.Conv2d
(convolutional layer), nn.LSTM (recurrent layer), and many others.
- Activation Functions: Functions like nn.ReLU, nn.Sigmoid, and nn.Tanh introduce non-linearities to the
model, allowing it to learn complex patterns.
- Loss Functions: Provides loss functions such as nn.CrossEntropyLoss, nn.MSELoss, and nn.NLLLoss to
quantify the difference between the model's predictions and the actual targets.
- Container Modules: nn.Sequential: A sequential container to stack layers in order.
- Regularization and Dropout: Layers like nn.Dropout and nn.BatchNorm2d help prevent overfitting and improve
the model's ability to generalize to new data.

In [2]:
import torch
import torch.nn as nn

In [3]:
# create model class
class Model(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.linear = nn.Linear(num_features, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, features):
        out = self.linear(features)
        out = self.sigmoid(out)
        return out

In [4]:
# create dataset
features = torch.rand((100, 5), dtype=torch.float32)

# create model instance
model = Model(features.shape[1])

In [5]:
# forward pass
## __call__ is overridden by PyTorch to call `forward` method when model(features) is executed. Otherwise model.forward(features) is also fine
model(features)

tensor([[0.4708],
        [0.4849],
        [0.5768],
        [0.4778],
        [0.5298],
        [0.5377],
        [0.5194],
        [0.5111],
        [0.5854],
        [0.5470],
        [0.5516],
        [0.5264],
        [0.5256],
        [0.5449],
        [0.4840],
        [0.6126],
        [0.6050],
        [0.5726],
        [0.5583],
        [0.4539],
        [0.5384],
        [0.5811],
        [0.6277],
        [0.5286],
        [0.6030],
        [0.5917],
        [0.4996],
        [0.5462],
        [0.5400],
        [0.5728],
        [0.5443],
        [0.5039],
        [0.5400],
        [0.5555],
        [0.5313],
        [0.5124],
        [0.5810],
        [0.5254],
        [0.5926],
        [0.5232],
        [0.4827],
        [0.5461],
        [0.5335],
        [0.5278],
        [0.5285],
        [0.5444],
        [0.5696],
        [0.5107],
        [0.5802],
        [0.5262],
        [0.5842],
        [0.5170],
        [0.5444],
        [0.5114],
        [0.5552],
        [0

In [6]:
# show model weights
model.linear.weight

Parameter containing:
tensor([[ 0.2094,  0.2147, -0.2108,  0.1772,  0.2564]], requires_grad=True)

In [7]:
# show model bias
model.linear.bias

Parameter containing:
tensor([-0.1581], requires_grad=True)

In [8]:
# to visualize the network, you can use torchinfo
!pip3 install torchinfo



In [9]:
from torchinfo import summary
summary(model, input_size=(100,5))

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [100, 1]                  --
├─Linear: 1-1                            [100, 1]                  6
├─Sigmoid: 1-2                           [100, 1]                  --
Total params: 6
Trainable params: 6
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

# Hidden layer neural network
We need to make minimal changes in order to improve the neural network by adding one hidden layer

In [10]:
class ModelHiddenLayer(nn.Module):
    def __init__(self, num_features):
        super().__init__()

        # input layer -> (num_features * 3) i.e. hidden layer comprises of 3 neurons
        self.linear1 = nn.Linear(num_features, 3)
        self.relu = nn.ReLU()

        # hidden layer -> (3 * 1) i.e. from hidden layer we get the output
        self.linear2 = nn.Linear(3, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, features):
        out = self.linear1(features)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.sigmoid(out)
        return out

In [11]:
hidden_layer_model = ModelHiddenLayer(features.shape[1])

In [12]:
hidden_layer_model(features)

tensor([[0.4758],
        [0.4769],
        [0.4907],
        [0.4845],
        [0.4779],
        [0.4932],
        [0.4667],
        [0.4932],
        [0.4932],
        [0.4916],
        [0.4883],
        [0.4842],
        [0.4627],
        [0.4694],
        [0.4738],
        [0.4940],
        [0.4876],
        [0.4694],
        [0.4915],
        [0.4776],
        [0.4817],
        [0.4819],
        [0.4818],
        [0.4762],
        [0.4850],
        [0.4940],
        [0.4585],
        [0.4743],
        [0.4709],
        [0.4784],
        [0.4921],
        [0.4857],
        [0.4741],
        [0.4881],
        [0.4702],
        [0.4783],
        [0.4870],
        [0.4866],
        [0.4798],
        [0.4684],
        [0.4736],
        [0.4815],
        [0.4758],
        [0.4906],
        [0.4858],
        [0.4849],
        [0.4841],
        [0.4796],
        [0.4855],
        [0.4721],
        [0.4924],
        [0.4730],
        [0.4764],
        [0.4731],
        [0.4716],
        [0

In [13]:
# layer 1 weights
hidden_layer_model.linear1.weight

Parameter containing:
tensor([[ 0.1699, -0.0333,  0.1229, -0.3370, -0.3473],
        [-0.2409, -0.0861,  0.1881,  0.3290,  0.1883],
        [ 0.1247,  0.0806,  0.4199, -0.2138,  0.0530]], requires_grad=True)

In [14]:
# layer 2 weights
hidden_layer_model.linear2.weight

Parameter containing:
tensor([[-0.0148, -0.0297, -0.2659]], requires_grad=True)

In [15]:
summary(hidden_layer_model, input_size=(100, 5))

Layer (type:depth-idx)                   Output Shape              Param #
ModelHiddenLayer                         [100, 1]                  --
├─Linear: 1-1                            [100, 3]                  18
├─ReLU: 1-2                              [100, 3]                  --
├─Linear: 1-3                            [100, 1]                  4
├─Sigmoid: 1-4                           [100, 1]                  --
Total params: 22
Trainable params: 22
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.01

# Sequential container
Instead of defining every layer separately we can group the layers into containers and execute them as one.

In [16]:
class SequentialContainerModel(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(num_features, 5),
            nn.ReLU(),
            nn.Linear(5, 2),
            nn.ReLU(),
            nn.Linear(2, 1),
            nn.Sigmoid()
        )
    def forward(self, features):
        out = self.network(features)
        return out

In [17]:
# construct dataset
seq_model_features = torch.rand((1000, 10), dtype=torch.float32)

# instantiate model
seq_model = SequentialContainerModel(seq_model_features.shape[1])

In [18]:
# forward pass
seq_model(seq_model_features)

tensor([[0.6177],
        [0.6168],
        [0.6189],
        [0.6193],
        [0.6164],
        [0.6150],
        [0.6190],
        [0.6196],
        [0.6198],
        [0.6154],
        [0.6191],
        [0.6214],
        [0.6189],
        [0.6185],
        [0.6220],
        [0.6188],
        [0.6209],
        [0.6179],
        [0.6213],
        [0.6194],
        [0.6185],
        [0.6215],
        [0.6199],
        [0.6142],
        [0.6172],
        [0.6214],
        [0.6187],
        [0.6193],
        [0.6204],
        [0.6188],
        [0.6201],
        [0.6205],
        [0.6182],
        [0.6195],
        [0.6148],
        [0.6209],
        [0.6216],
        [0.6162],
        [0.6195],
        [0.6194],
        [0.6216],
        [0.6206],
        [0.6173],
        [0.6173],
        [0.6175],
        [0.6203],
        [0.6167],
        [0.6190],
        [0.6181],
        [0.6175],
        [0.6188],
        [0.6162],
        [0.6191],
        [0.6194],
        [0.6194],
        [0

In [19]:
# layer 1 weights
seq_model.network[0].weight

Parameter containing:
tensor([[-0.1664,  0.2345, -0.0071, -0.0297, -0.1617, -0.2879, -0.1954, -0.2489,
          0.0878, -0.0175],
        [-0.0607, -0.1086, -0.0210,  0.3128,  0.2966, -0.0808, -0.0178, -0.2691,
          0.1088,  0.2548],
        [ 0.1215, -0.2674, -0.0120, -0.2665,  0.1027,  0.3068, -0.0850,  0.2138,
         -0.3135,  0.1931],
        [ 0.2721,  0.1405,  0.2086,  0.0420,  0.1005, -0.0737,  0.1891, -0.1844,
          0.2317, -0.1227],
        [ 0.1940, -0.0769,  0.1321, -0.2422, -0.2797,  0.1338,  0.2432, -0.0854,
          0.0285,  0.1507]], requires_grad=True)

In [20]:
# layer 2 weights
seq_model.network[2].weight

Parameter containing:
tensor([[ 0.2592, -0.2565,  0.2632, -0.2128, -0.1587],
        [ 0.2254,  0.1861,  0.3629, -0.1865, -0.0084]], requires_grad=True)

In [21]:
summary(seq_model, input_size=(1000, 10))

Layer (type:depth-idx)                   Output Shape              Param #
SequentialContainerModel                 [1000, 1]                 --
├─Sequential: 1-1                        [1000, 1]                 --
│    └─Linear: 2-1                       [1000, 5]                 55
│    └─ReLU: 2-2                         [1000, 5]                 --
│    └─Linear: 2-3                       [1000, 2]                 12
│    └─ReLU: 2-4                         [1000, 2]                 --
│    └─Linear: 2-5                       [1000, 1]                 3
│    └─Sigmoid: 2-6                      [1000, 1]                 --
Total params: 70
Trainable params: 70
Non-trainable params: 0
Total mult-adds (M): 0.07
Input size (MB): 0.04
Forward/backward pass size (MB): 0.06
Params size (MB): 0.00
Estimated Total Size (MB): 0.10