# Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from IPython.display import clear_output

!pip3 install pyprind

clear_output()

# Instructions

* Clone the notebook to your drive.
* The notebook has to be submitted in the form of a link giving us **view access**. Share this link in your application.

* If you still have any queries, you can reach out to the [core team](https://www.notion.so/Club-Contacts-70a4823e0ae34f35a0aa5d479e449915)



# Common Technical Questionnaire


### Question 1

Supervised learning is a type of machine learning where the inputs and outputs are mapped through
a family of equations, the machine learning model essentially picks the right curve to fit the data.
Quantile Regression is a type of supervised learning technique used in statistics and economics. One
advantage of quantile regression relative to ordinary least squares regression is that the quantile
regression estimates are more robust against outliers in the response measurements.

QuantileLossτ (y, ˆy) =
{

                           τ · (y − ˆy) if y > ˆy

                          (1 − τ ) · (ˆy − y) if y ≤ ˆy
}

where τ is Quantile whose value lies between 0 and 1.
Please use this template provided and make changes accordingly for this question alone.
Implement a simple Neural Network consisting of 4 nodes, one hidden layer consisting of 5 nodes
and output layer consisting of two nodes. Perform quantile regression on the model and observe
the loss.
**Bonus: Play around with the value of τ to find what value achieves convergence quicker.**



An example implementation of a simple manual neural network is provided. You may use this as inspiration to complete the task at hand.

In [None]:
# Imports

import torch
import torch.nn as nn

In [None]:
x = torch.rand((2, 4), requires_grad=False)
y = torch.rand((2, 2), requires_grad=False)
print(x,"\n",y)

tensor([[0.4270, 0.9313, 0.6315, 0.8689],
        [0.6734, 0.5307, 0.5362, 0.5707]]) 
 tensor([[0.7517, 0.8675],
        [0.7582, 0.7644]])


In [None]:
a0 = torch.rand((4, 5), requires_grad=True)  # For the hidden layer
b0 = torch.rand((2, 5), requires_grad=True)

a1 = torch.rand((5, 2), requires_grad=True)  # For the output layer
b1 = torch.rand((2, 2), requires_grad=True)


print(a0,
      "\n\n",b0,
      "\n\n",a1,
      "\n\n",b1)

tensor([[0.9368, 0.7348, 0.2154, 0.9532, 0.8344],
        [0.4325, 0.3455, 0.2061, 0.1738, 0.7149],
        [0.4404, 0.1564, 0.2872, 0.2801, 0.6692],
        [0.2310, 0.9366, 0.4659, 0.5612, 0.1619]], requires_grad=True) 

 tensor([[0.5816, 0.4139, 0.2252, 0.1493, 0.0464],
        [0.2482, 0.5678, 0.7249, 0.9912, 0.5732]], requires_grad=True) 

 tensor([[0.5713, 0.2372],
        [0.9017, 0.5375],
        [0.9669, 0.1608],
        [0.6123, 0.1329],
        [0.8466, 0.3913]], requires_grad=True) 

 tensor([[0.8267, 0.4043],
        [0.7303, 0.3179]], requires_grad=True)


In [None]:
quantiles = [0.2,0.5,0.75,0.9]

def loss_function(output, target, quantile):
    assert 0 < quantile < 1
    errors = target - output
    losses = torch.max((quantile - 1) * errors, quantile * errors)
    return torch.abs(losses).mean()  # Use 'losses' instead of 'loss'

for quantile in quantiles:
    print("Quantile", quantile)
    # Forward Pass 1
    y_1 = x @ a0 + b0
    y_pred1 = y_1 @ a1 + b1

    loss = loss_function(y_pred1, y, quantile)
    print(loss.item())

    loss.backward()

    # Updating Gradients
    with torch.no_grad():
        a0 -= 0.01 * a0.grad
        b0 -= 0.01 * b0.grad
        a1 -= 0.01 * a1.grad
        b1 -= 0.01 * b1.grad

    # Detach gradients after updating parameters
    a0.grad = None
    b0.grad = None
    a1.grad = None
    b1.grad = None

    # Forward Pass 2
    y_2 = x @ a0 + b0
    y_pred2 = y_2 @ a1 + b1

    loss = loss_function(y_pred2, y, quantile)
    print(loss.item())

    loss.backward()

    # Updating Gradients
    with torch.no_grad():
        a0 -= 0.01 * a0.grad
        b0 -= 0.01 * b0.grad
        a1 -= 0.01 * a1.grad
        b1 -= 0.01 * b1.grad

    # Detach gradients after updating parameters
    a0.grad = None
    b0.grad = None
    a1.grad = None
    b1.grad = None

    # Forward Pass 3
    y_3 = x @ a0 + b0
    y_pred3 = y_3 @ a1 + b1

    loss = loss_function(y_pred3, y, quantile)
    print(loss.item())
    print("\n")


Quantile 0.2
3.45569109916687
3.386751890182495
3.3188230991363525


Quantile 0.5
2.0742645263671875
2.048081159591675
2.0221362113952637


Quantile 0.75
1.0110681056976318
1.0046335458755493
0.9982280731201172


Quantile 0.9
0.3992912471294403
0.3982703387737274
0.3972512483596802




The Quantile 0.9 leads to lesser loss and is preferred

 Adding a Activation function ReLU to see if loss improved:

In [None]:
import torch
import torch.nn as nn

# Define trainable parameters


quantiles = [0.2, 0.5, 0.75, 0.9]

# Define the quantile loss function
def loss_function(output, target, quantile):
    assert 0 < quantile < 1
    errors = target - output
    losses = torch.max((quantile - 1) * errors, quantile * errors)
    return torch.abs(losses).mean()

for quantile in quantiles:
    print("Quantile", quantile)

    # Forward Pass 1
    y_1 = torch.matmul(x, a0) + b0
    y_1f = nn.ReLU()
    y_1_relu = y_1f(y_1)
    y_pred1 = torch.matmul(y_1_relu, a1) + b1

    loss = loss_function(y_pred1, y, quantile)
    print(loss.item())

    loss.backward()

    # Updating Gradients
    with torch.no_grad():
        a0 -= 0.01 * a0.grad
        b0 -= 0.01 * b0.grad
        a1 -= 0.01 * a1.grad
        b1 -= 0.01 * b1.grad

    # Detach gradients after updating parameters
    a0.grad = None
    b0.grad = None
    a1.grad = None
    b1.grad = None

    # Forward Pass 2
    y_2 = torch.matmul(x, a0) + b0
    y_2f = nn.ReLU()
    y_2_relu = y_2f(y_2)
    y_pred2 = torch.matmul(y_2_relu, a1) + b1


    loss = loss_function(y_pred2, y, quantile)
    print(loss.item())
    loss.backward()

    # Updating Gradients
    with torch.no_grad():
        a0 -= 0.01 * a0.grad
        b0 -= 0.01 * b0.grad
        a1 -= 0.01 * a1.grad
        b1 -= 0.01 * b1.grad

    # Detach gradients after updating parameters
    a0.grad = None
    b0.grad = None
    a1.grad = None
    b1.grad = None

    # Forward Pass 3
    y_3 = torch.matmul(x, a0) + b0
    y_3f = nn.ReLU()
    y_3_relu = y_3f(y_3)
    y_pred3 = torch.matmul(y_3_relu, a1) + b1

    loss = loss_function(y_pred3, y, quantile)
    print(loss.item())
    print("\n")


Quantile 0.2
3.1780099868774414
3.113114356994629
3.0491526126861572


Quantile 0.5
1.9057204723358154
1.8810606002807617
1.8566211462020874


Quantile 0.75
0.9283105731010437
0.9222484827041626
0.9162132740020752


Quantile 0.9
0.3664852976799011
0.3655233383178711
0.3645630478858948




Not much change

## Another Way of Implmentation

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader

In [None]:
batch_size = 256

# Convert the training data into a PyTorch Dataset
dataset = TensorDataset(x, y)

# Create a DataLoader to handle batching and shuffling
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
quantiles = [0.2,0.5,0.75,0.9]

In [None]:
# Define a simple neural network architecture
import torch.nn as nn

class QuantileNet(nn.Module):
    def __init__(self, output_size):
        super(QuantileNet, self).__init__()
        self.fc1 = nn.Linear(4, 5)  # Input features are 4-dimensional, hidden layer has 5 neurons
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(5, 2)  # Output layer with 2 nodes

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model for batch training
model = QuantileNet(output_size = 2)

# Define optimizer for batch training
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training parameters
epochs = 100
batch_size = 256

# Convert the training data into a PyTorch Dataset
dataset = TensorDataset(x, y)

# Create a DataLoader to handle batching and shuffling
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
for quantile in quantiles:
  print("Quantile", quantile)
  # Training loop with DataLoader
  model.train()
  for epoch in range(epochs):
      for x_batch, y_batch in dataloader:
          optimizer.zero_grad()
          preds = model(x_batch)
          loss = loss_function(preds, y_batch, quantile)
          loss.backward()
          optimizer.step()


      if epoch % 10 == 0:
          print(f'Epoch {epoch}, Loss: {loss.item()}')


Quantile 0.2
Epoch 0, Loss: 0.1418529897928238
Epoch 10, Loss: 0.13604535162448883
Epoch 20, Loss: 0.13031037151813507
Epoch 30, Loss: 0.12430799752473831
Epoch 40, Loss: 0.11789591610431671
Epoch 50, Loss: 0.11133801192045212
Epoch 60, Loss: 0.10463184118270874
Epoch 70, Loss: 0.09775042533874512
Epoch 80, Loss: 0.09066101908683777
Epoch 90, Loss: 0.08332854509353638
Quantile 0.5
Epoch 0, Loss: 0.18928956985473633
Epoch 10, Loss: 0.16224072873592377
Epoch 20, Loss: 0.13485389947891235
Epoch 30, Loss: 0.11073343455791473
Epoch 40, Loss: 0.08734513819217682
Epoch 50, Loss: 0.06545980274677277
Epoch 60, Loss: 0.0445280447602272
Epoch 70, Loss: 0.024040378630161285
Epoch 80, Loss: 0.016097448766231537
Epoch 90, Loss: 0.012697696685791016
Quantile 0.75
Epoch 0, Loss: 0.016774937510490417
Epoch 10, Loss: 0.01466095820069313
Epoch 20, Loss: 0.010779239237308502
Epoch 30, Loss: 0.00640222430229187
Epoch 40, Loss: 0.006576176732778549
Epoch 50, Loss: 0.006192296743392944
Epoch 60, Loss: 0.0062

---