# 3. Linear Regression as a Neural Network

In [11]:
import time
import math
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils import data

## Generate Data

In [4]:
def synthetic_data(w, b, num_examples):
    '''
    Function to synthesize linearly related input data X and labels y.
    X with shape num_examples × len(w).
    y with shape num_examples × 1.
    Errors are normally distributed with N(0, 0.01).
    '''
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape(-1, 1)

In [6]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2

num_examples = 1000

## Data Iterator

In [8]:
def load_array(data_arrays, batch_size, is_train=True):
    '''
    Construct a PyTorch data iterator.
    is_train: shuffle the data if True.
    '''
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

In [9]:
batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [10]:
next(iter(data_iter))

[tensor([[ 0.7527,  0.9974],
         [ 0.5937, -1.3569],
         [-1.2931,  0.3148],
         [-1.9019,  1.7125],
         [ 0.6632, -0.4633],
         [ 0.4504,  0.7735],
         [ 0.9808, -0.1554],
         [-0.2836, -0.8983],
         [ 0.0759,  0.2149],
         [-0.0897,  1.2245]]),
 tensor([[ 2.3259],
         [ 9.9995],
         [ 0.5398],
         [-5.4337],
         [ 7.0965],
         [ 2.4871],
         [ 6.6942],
         [ 6.6952],
         [ 3.6243],
         [-0.1271]])]

## Net Model

Our neural network consists of only a single **linear layer** with **2 input neurons** and **1 output neurons**:

In [12]:
net = nn.Sequential(nn.Linear(2, 1))     

## Initialize Weights

In [13]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

## Loss Function

This is actually the cost function (sum of all individual loss):

In [14]:
loss = nn.MSELoss()

## Optimizer

In [15]:
trainer = torch.optim.SGD(net.parameters(), lr=0.03)

## Training

In [16]:
num_epochs = 3

for epoch in range(num_epochs):
    for X, y in data_iter:
        y_pred = net(X)
        l = loss(y_pred, y)
        trainer.zero_grad()          #zero out the gradients
        l.backward()                 #compute the gradients with backpropagation
        trainer.step()               #update the parameters
    y_pred = net(features)
    l = loss(y_pred, labels)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 0.000199
epoch 2, loss 0.000094
epoch 3, loss 0.000092


In [17]:
net[0].weight.data

tensor([[ 2.0003, -3.4004]])

In [18]:
net[0].bias.data

tensor([4.1997])