# Building a regression model in pytorch

## Imports and data download

In [8]:
import copy

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing

In [9]:
# Read data
data = fetch_california_housing()
X, y = data.data, data.target

# train-test split for model evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True)

# Convert to 2D PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

In [4]:
print(X_train.min(axis=0), X_train.max(axis=0) , X_train.mean(axis=0), X_train.std(axis=0))

torch.return_types.min(
values=tensor([   0.4999,    1.0000,    0.8462,    0.3333,    3.0000,    0.6923,
          32.5400, -124.3500]),
indices=tensor([ 2185,  3569,  4502,    35,  5305,  9929, 11203, 14238])) torch.return_types.max(
values=tensor([ 1.5000e+01,  5.2000e+01,  1.4191e+02,  3.4067e+01,  3.5682e+04,
         5.9971e+02,  4.1950e+01, -1.1431e+02]),
indices=tensor([  863,     6, 10274,   586, 11250,  7755,  7578,  9503])) tensor([ 3.8593e+00,  2.8601e+01,  5.4272e+00,  1.0984e+00,  1.4298e+03,
         2.9846e+00,  3.5620e+01, -1.1956e+02]) tensor([1.8682e+00, 1.2559e+01, 2.6983e+00, 5.2668e-01, 1.1472e+03, 5.1250e+00,
        2.1339e+00, 2.0008e+00])


In [5]:
print(y_train.min(), y_train.max() , y_train.mean())

tensor(0.1500) tensor(5.0000) tensor(2.0649)


In [10]:
from sklearn.preprocessing import StandardScaler

scalerX = StandardScaler()
scalerY = StandardScaler()

X_train = scalerX.fit_transform(X_train)
X_test = scalerX.transform(X_test)

y_train = scalerY.fit_transform(y_train)
y_test = scalerY.transform(y_test)

In [7]:
print(X_train.min(axis=0), X_train.max(axis=0) , X_train.mean(axis=0), X_train.std(axis=0))

[-1.79827833 -2.19781881 -1.69781556 -1.45262355 -1.24377606 -0.4472967
 -1.44362408 -2.39448381] [  5.96354843   1.86324794  50.58246156  62.59906806  29.85891845
 116.43843568   2.96630161   2.62361252] [ 1.05128061e-16 -6.63813005e-17 -1.13266264e-16 -7.51362087e-17
  5.80202384e-18 -2.31341291e-17 -1.11424986e-15  2.17392227e-15] [1. 1. 1. 1. 1. 1. 1. 1.]


In [8]:
print(y_train.min(), y_train.max() , y_train.mean())

-1.673581553951286 2.565210915492403 -9.639812847579404e-17


In [11]:
# Convert to 2D PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

## Define a model and learn it

In [12]:
# Define the model
model = nn.Sequential(
      nn.Linear(8,8),
      nn.ReLU(),
      nn.Linear(8, 8),
      nn.ReLU(),
      nn.Linear(8, 1)
)


In [13]:
def reinit_model(model):
  for layer in model.children():
   if hasattr(layer, 'reset_parameters'):
       layer.reset_parameters()

In [14]:
#Useful
epoch =1
batch_size = 32
batch_start = torch.arange(0, len(X_train), batch_size)
with tqdm.tqdm(batch_start, unit="batch", mininterval=0) as bar:
  bar.set_description(f"Epoch {epoch}")
  for start in bar:
    i=0

Epoch 1: 100%|██████████| 452/452 [00:00<00:00, 1175.29batch/s]


In [15]:

def train_model(model, n_epochs, batch_size, X_train, y_train, X_test, y_test, loss_fn_train, loss_fn_test, optimizer):

  history = []
  batch_start = torch.arange(0, len(X_train), batch_size)

  for epoch in range(n_epochs):
      model.train()
      with tqdm.tqdm(batch_start, unit="batch", mininterval=0) as bar:
          bar.set_description(f"Epoch {epoch}")
          running_loss=0.0
          N=1
          for start in bar:
              # take a batch
              model.train()
              X_batch = X_train[start:start+batch_size]
              y_batch = y_train[start:start+batch_size]
              # forward pass
              y_pred = model(X_batch)
              loss = loss_fn_train(y_pred, y_batch)

              # backward pass
              optimizer.zero_grad()
              loss.backward()

              # update weights
              optimizer.step()

              # print progress
              running_loss+=loss
              bar.set_postfix(mse=float(running_loss/(N+0.0)))
              N = N+1
          # evaluate accuracy on the test set at end of each epoch and append it to hiostory
              model.eval()
              y_pred = model(X_test)
              loss = loss_fn_test(y_pred, y_test)
              history.append(loss.item())
          

  return history


# To do

1.   Learn few model with MSE Loss in order to achieve reasonable loss (to be defined). This will allow you to select an architecture with an optimizer, a bacth size etc to perform next experiments
2.   Add a small noise (e.g. gaussian noise with null mean and variance 0.1 or 0.2) to the target to predict, and measure how the performance of above selected predictor degrades when trainign with such data.
3. Define an $\epsilon$-insensitive loss function for the MSE loss. The $\epsilon$-insensitive loss function is defined as $min(mse- \epsilon,0)$. What is the effect if such a loss ?
4. Use the above loss function (with different values of $\epsilon$) for learning while testing is still evaluated with MSE loss. Comment the results obtained.  



In [4]:
reinit_model(model)
batch_size = 32
n_epochs = 10
loss_fn_train = nn.MSELoss()
loss_fn_test = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)


NameError: name 'reinit_model' is not defined

In [33]:
train_model(model, n_epochs, batch_size, X_train, y_train, X_test, y_test, loss_fn_train, loss_fn_test, optimizer)

Epoch 0: 100%|██████████| 452/452 [00:02<00:00, 156.48batch/s, mse=0.392]
Epoch 1: 100%|██████████| 452/452 [00:04<00:00, 106.56batch/s, mse=0.296]
Epoch 2: 100%|██████████| 452/452 [00:03<00:00, 116.18batch/s, mse=0.283]
Epoch 3: 100%|██████████| 452/452 [00:03<00:00, 126.35batch/s, mse=0.278]
Epoch 4: 100%|██████████| 452/452 [00:03<00:00, 116.49batch/s, mse=0.269]
Epoch 5: 100%|██████████| 452/452 [00:04<00:00, 96.41batch/s, mse=0.263] 
Epoch 6: 100%|██████████| 452/452 [00:04<00:00, 112.13batch/s, mse=0.259]
Epoch 7: 100%|██████████| 452/452 [00:03<00:00, 129.28batch/s, mse=0.258]
Epoch 8: 100%|██████████| 452/452 [00:03<00:00, 130.55batch/s, mse=0.254]
Epoch 9: 100%|██████████| 452/452 [00:03<00:00, 135.04batch/s, mse=0.253]


[1.1092199087142944,
 1.1050695180892944,
 1.10002601146698,
 1.0947558879852295,
 1.0895349979400635,
 1.0855776071548462,
 1.0809153318405151,
 1.0760327577590942,
 1.0715341567993164,
 1.0670299530029297,
 1.0627214908599854,
 1.0580672025680542,
 1.0532639026641846,
 1.0480509996414185,
 1.0426621437072754,
 1.0373111963272095,
 1.0321440696716309,
 1.0270891189575195,
 1.0223286151885986,
 1.0176749229431152,
 1.012696623802185,
 1.0077403783798218,
 1.0022815465927124,
 0.9960994124412537,
 0.9892641305923462,
 0.9824613928794861,
 0.9758366346359253,
 0.9673351645469666,
 0.9580157995223999,
 0.9476733207702637,
 0.9356501698493958,
 0.9226036667823792,
 0.9082967042922974,
 0.8928813338279724,
 0.8757085800170898,
 0.8568451404571533,
 0.8361746668815613,
 0.8151617050170898,
 0.7917960286140442,
 0.7683489322662354,
 0.7429015040397644,
 0.7178897857666016,
 0.6956524848937988,
 0.6771388053894043,
 0.6619371771812439,
 0.6535409092903137,
 0.6546143293380737,
 0.6685996055603

In [30]:
reinit_model(model)
batch_size = 32
n_epochs = 10
epsilon = 1e-6
loss_fn_test = nn.MSELoss()
loss_fn_train = epsilonMSELoss(epsilon)
optimizer = optim.Adam(model.parameters(), lr=0.01)



In [31]:
train_model(model, n_epochs, batch_size, X_train, y_train, X_test, y_test, loss_fn_train, loss_fn_test, optimizer)

Epoch 0:   0%|          | 0/452 [00:00<?, ?batch/s]


AttributeError: 'int' object has no attribute 'backward'

In [29]:
# mypy: allow-untyped-defs

from torch.nn.modules.loss import _Loss
from torch import Tensor
from typing import Callable, Optional
from typing_extensions import deprecated
from torch.nn import functional as F






class epsilonMSELoss(_Loss):
    r"""Creates a criterion that measures the mean squared error (squared L2 norm) between
    each element in the input :math:`x` and target :math:`y`.

    The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as:

    .. math::
        \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
        l_n = \left( x_n - y_n \right)^2,

    where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'``
    (default ``'mean'``), then:

    .. math::
        \ell(x, y) =
        \begin{cases}
            \operatorname{mean}(L), &  \text{if reduction} = \text{`mean';}\\
            \operatorname{sum}(L),  &  \text{if reduction} = \text{`sum'.}
        \end{cases}

    :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
    of :math:`n` elements each.

    The mean operation still operates over all the elements, and divides by :math:`n`.

    The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``.

    Args:
        size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
            the losses are averaged over each loss element in the batch. Note that for
            some losses, there are multiple elements per sample. If the field :attr:`size_average`
            is set to ``False``, the losses are instead summed for each minibatch. Ignored
            when :attr:`reduce` is ``False``. Default: ``True``
        reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
            losses are averaged or summed over observations for each minibatch depending
            on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
            batch element instead and ignores :attr:`size_average`. Default: ``True``
        reduction (str, optional): Specifies the reduction to apply to the output:
            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
            ``'mean'``: the sum of the output will be divided by the number of
            elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
            and :attr:`reduce` are in the process of being deprecated, and in the meantime,
            specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``

    Shape:
        - Input: :math:`(*)`, where :math:`*` means any number of dimensions.
        - Target: :math:`(*)`, same shape as the input.

    Examples::

        >>> loss = nn.MSELoss()
        >>> input = torch.randn(3, 5, requires_grad=True)
        >>> target = torch.randn(3, 5)
        >>> output = loss(input, target)
        >>> output.backward()
    """
    __constants__ = ['reduction']

    def __init__(self, size_average=None, reduce=None, reduction: str = 'mean') -> None:
        super().__init__(size_average, reduce, reduction)

    def forward(self, input: Tensor, target: Tensor) -> Tensor:
        self.epsilon = 1e-6  
        return min(F.mse_loss(input, target, reduction=self.reduction)-self.epsilon, 0)