[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/khetansarvesh/Tabular-Cross-Sectional-Modelling/blob/main/modelling/regression/ANN.ipynb)

In [2]:
import torch
import torch.nn as nn

# **Data Processing**

In [3]:
x = torch.randn(10, 5) # creating sample dataset with just 5 independent features - x1,x2,x3,x4,x5 and 2 rows
x

tensor([[ 4.0891e-01, -4.4023e-01, -8.5300e-01,  9.7805e-01,  5.6652e-02],
        [ 5.5181e-01, -1.3340e+00,  1.2891e+00,  9.7943e-01,  1.3466e-01],
        [-1.6634e+00, -1.0699e+00, -6.9859e-01, -1.6662e+00,  1.3111e+00],
        [-8.4231e-02,  2.6872e-01,  6.7192e-01, -1.3386e+00,  5.7466e-01],
        [ 8.7959e-01, -2.2094e+00,  9.3218e-01,  2.1339e-01, -5.6719e-01],
        [ 8.3101e-01, -2.9186e-01,  6.3879e-01, -7.6856e-01,  1.0273e+00],
        [ 8.6117e-01, -1.3429e+00,  6.0611e-02,  9.8263e-02, -2.2480e-01],
        [-5.7655e-01, -1.7413e-01, -4.1327e-01, -7.0351e-01, -1.1554e+00],
        [ 3.0365e-02, -2.2806e+00, -6.8178e-01, -9.2001e-01, -9.4490e-01],
        [-6.3160e-01, -1.0600e+00, -2.4375e+00,  2.1277e-03, -3.5008e-01]])

In [4]:
# Create the y data
y = torch.randn(10, 1)
y

tensor([[ 0.1520],
        [-2.3906],
        [ 0.7636],
        [-0.4156],
        [-0.3451],
        [ 0.6496],
        [-1.1991],
        [ 0.5058],
        [-1.3132],
        [-0.1949]])

# **Modelling**

## **Without Using Library**

In [None]:
  for i in range(no_of_iterations):

    #-----------------------forward propagation/pass : moving from input layer to output layer-----------------------
    Z1 = Xp.dot(W1.T) + b1 #1xhidden_units
    A1 = sigmoid(Z1) #1xhidden_units
    Z2 = A1.dot(W.T) + b #1x10
    A2 = sigmoid(Z2) #1x10

    #-----------------------backward propagation/pass (BACKPROP) : moving from derivative of error wrt output weights to derivative of error wrt hidden weights-----------------------
    '''
    it is a big myth that backprop is used to find weights, it is used to find derivatives of loss function wrt weight ~BHANU SIR
    '''
    db = (-1)*(Yp/A2)*(sigmoid_derivative(Z2)) + ((1-Yp)/(1-A2))*(sigmoid_derivative(Z2)) #1X10
    dW = db.T.dot(A1)#10X8 = 10X1 X 1X8
    db1 = (db.dot(W))*sigmoid_derivative(Z1)#1X8 = (1X10 X 10X8) X 1X8
    dW1 = (db1.T).dot(Xp)# 8X6 = 8X1 X 1X6

    #-----------------------weight update rule-----------------------
    W = W - (learning_rate)*(dW)
    b = b - (learning_rate)*(db)
    W1 = W1 - (learning_rate)*(dW1)
    b1 = b1 - (learning_rate)*(db1)

## **Using Pytorch Library**

In [None]:
# creating a one hidden layer FFNN for regression
class SarveshANN(nn.Module):

  def __init__(self, input_size = 5, hidden_size = 10, output_size = 1): # 5 cause we have 5 features, 10 hidden units in hiddlen layer1 and just 1 output unit
    super(SarveshANN, self).__init__()
    self.model = nn.Sequential(
        nn.Linear(input_size, hidden_size),nn.ReLU(),
        nn.Linear(hidden_size, output_size),nn.Identity() # nn.Sigmoid() and nn.LeakyReLU()
    )

  def forward(self, x):
    return self.model(x)

#Here is an alternative way to define the same class. You can see that we can replace nn.Sequential by defining the individual layers in the __init__ method and connecting the in the forward function.
#class MultilayerPerceptron(nn.Module):

#  def __init__(self, input_size = 5, hidden_size = 10, output_size = 1):
#    super(MultilayerPerceptron, self).__init__()
#    self.linear = nn.Linear(input_size, hidden_size)
#    self.relu = nn.ReLU()
#    self.linear2 = nn.Linear(hidden_size, output_size)
#    self.sigmoid = nn.Sigmoid()

#  def forward(self, x):
#    linear = self.linear(x)
#    relu = self.relu(linear)
#    linear2 = self.linear2(relu)
#    output = self.sigmoid(linear2)
#    return output

In [None]:
model = SarveshANN()
print(model)

SarveshANN(
  (model): Sequential(
    (0): Linear(in_features=5, out_features=10, bias=True)
    (1): ReLU()
    (2): Linear(in_features=10, out_features=5, bias=True)
    (3): ReLU()
    (4): Linear(in_features=5, out_features=1, bias=True)
    (5): Identity()
  )
)


In [None]:
list(model.named_parameters()) #alternative to this is model.parameters() function -> these function gives the initial random parameters the model is taking

[('model.0.weight', Parameter containing:
  tensor([[-0.2559,  0.2649, -0.0537, -0.0286,  0.3346],
          [-0.0139,  0.4457,  0.3879,  0.4450, -0.3163],
          [ 0.2954,  0.3767,  0.4416,  0.1291, -0.0380],
          [-0.1830, -0.0354,  0.2354,  0.3366,  0.3842],
          [-0.3997, -0.2525, -0.3182,  0.2405,  0.3490],
          [-0.3522,  0.0324,  0.1795, -0.0540, -0.0234],
          [ 0.3768, -0.0812,  0.3992,  0.3240,  0.4382],
          [ 0.2240,  0.3083,  0.1131,  0.2188,  0.1478],
          [ 0.1752,  0.2188, -0.0049, -0.3777,  0.4376],
          [-0.2581,  0.0958,  0.3011, -0.3008,  0.2002]], requires_grad=True)),
 ('model.0.bias', Parameter containing:
  tensor([-0.2308,  0.1004, -0.3939,  0.1638, -0.0436, -0.1687, -0.0675,  0.2156,
           0.4307, -0.4149], requires_grad=True)),
 ('model.2.weight', Parameter containing:
  tensor([[-0.2661, -0.0350,  0.3118,  0.0334,  0.0921,  0.0074,  0.2894, -0.0901,
            0.0221,  0.2854],
          [-0.2084, -0.1682, -0.2330,

# **Training**

In [None]:
# Define the optimizer
import torch.optim as optim
adam = optim.Adam(model.parameters(), lr=1e-1)

# Define loss using a predefined loss function
loss_function = nn.MSELoss()

In [None]:
# training for 10 epochs
for epoch in range(10):

  #Set the gradients to 0
  adam.zero_grad()

  #forward propagation
  y_pred = model(x)
  loss = loss_function(y_pred, y)
  print(f"Epoch {epoch}: traing loss: {loss}")

  #backward propagation to compute the gradients
  loss.backward()

  #Updating weights - Take a step to optimize the weights
  adam.step()


Epoch 0: traing loss: 0.46382957696914673
Epoch 1: traing loss: 0.41810521483421326
Epoch 2: traing loss: 0.33339884877204895
Epoch 3: traing loss: 0.2539175748825073
Epoch 4: traing loss: 0.2533775269985199
Epoch 5: traing loss: 0.18649843335151672
Epoch 6: traing loss: 0.14389298856258392
Epoch 7: traing loss: 0.10955234616994858
Epoch 8: traing loss: 0.07010544836521149
Epoch 9: traing loss: 0.05134958028793335


In [None]:
list(model.parameters()) #parameters learnt after training

[Parameter containing:
 tensor([[ 0.0919,  0.4043,  0.0661, -0.2225,  0.8393],
         [-0.1179,  1.2639,  0.4499, -0.0146, -0.3028],
         [ 0.0106,  0.4343,  0.2046, -0.2028,  0.1867],
         [ 0.0155, -0.7481,  0.4118, -0.2486, -0.0780],
         [-0.4846, -0.0452, -0.1402,  0.1874,  0.0923],
         [-0.0870, -0.1877, -0.2299, -0.1982,  0.5085],
         [-0.1209,  0.0378,  0.5342,  0.3766,  0.1330],
         [ 0.1514,  0.9357,  0.3862, -0.3541,  0.0459],
         [ 0.4894, -0.0513,  0.0158, -0.6740,  0.0521],
         [-0.3115, -0.4785,  0.4461, -0.8579,  0.1322]], requires_grad=True),
 Parameter containing:
 tensor([-0.0245,  0.6163, -0.4797,  0.7681, -0.2806, -0.5909, -0.3918,  0.6035,
          0.5538, -0.0950], requires_grad=True),
 Parameter containing:
 tensor([[-0.8311, -0.2224,  0.0213, -0.0971, -0.0278,  0.3614,  0.2324, -0.2281,
          -0.5414,  0.0140],
         [-0.2084, -0.1682, -0.2330, -0.1889, -0.2143, -0.1303, -0.1252, -0.1246,
          -0.3036,  0.0225

# **Inference**

In [None]:
# See how our model performs on the training data
y_pred = model(x)
y_pred

tensor([[ 0.9180],
        [-0.5824],
        [-0.5866],
        [ 0.2653],
        [ 0.9249],
        [ 0.0908],
        [ 1.2653],
        [-1.4679],
        [-0.4246],
        [ 0.1427]], grad_fn=<AddmmBackward>)