<a href="https://colab.research.google.com/github/tric4112/HW-1/blob/main/Perceptron_and_DNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# HW3. Perceptron and DNN


In [4]:
import os
import numpy
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


## Create random data points

* Creating random data points using Scikit Learn’s make_blobs function and assign binary labels {0,1}



In [None]:
from sklearn.datasets import make_blobs

def blob_label(y, label, loc): # assign labels
  target = numpy.copy(y)
  for l in loc:
    target[y == l] = label
  return target

x_train, y_train = make_blobs(n_samples=40, n_features=2, cluster_std=1.5, shuffle=True)
x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(blob_label(y_train, 0, [0]))
y_train = torch.FloatTensor(blob_label(y_train, 1, [1,2,3]))

x_test, y_test = make_blobs(n_samples=10, n_features=2, cluster_std=1.5, shuffle=True)
x_test = torch.FloatTensor(x_test)
y_test = torch.FloatTensor(blob_label(y_test, 0, [0]))
y_test = torch.FloatTensor(blob_label(y_test, 1, [1,2,3]))

## Perceptron

### **Task1. Complete the code below to create a perception that includes a linear layer and an activation function (Tanh or ReLU)**

In [3]:
class Perceptron(torch.nn.Module):
  def __init__(self):
    super(Perceptron, self).__init__()
    self.fc = torch.nn.Linear(1, 1) # linear layer
    self.relu = torch.nn.ReLU() # activation function

  def forward(self, x):
    output = self.fc(x)
    output = self.relu(output)
    return output

NameError: ignored

## Multi Layer Perceptron (Neural Network) - Feedforward

In [2]:
class Feedforward(torch.nn.Module):
  def __init__(self, input_size, hidden_size):
    super(Feedforward, self).__init__()
    self.input_size = input_size
    self.hidden_size  = hidden_size
    self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size) # hidden layer
    self.relu = torch.nn.ReLU()                                   # activation
    self.fc2 = torch.nn.Linear(self.hidden_size, 1)               # output layer
    self.sigmoid = torch.nn.Sigmoid()                             # activation
  
  def forward(self, x):
    hidden = self.fc1(x)
    relu = self.relu(hidden)
    output = self.fc2(relu)
    output = self.sigmoid(output)
    return output

NameError: ignored

In [1]:
model = Feedforward(2, 10)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

NameError: ignored

In [None]:
model.eval()
y_pred = model(x_test)
before_train = criterion(y_pred.squeeze(), y_test)
print('Test loss before training' , before_train.item())

Test loss before training 0.6270946264266968


In [None]:
model.train()
epoch = 200
for epoch in range(1, epoch+1):
  optimizer.zero_grad()
  # Forward pass
  y_pred = model(x_train)
  # Compute Loss
  loss = criterion(y_pred.squeeze(), y_train)

  if epoch == 1 or epoch % 1000 == 0:
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
  # Backward pass
  loss.backward()
  optimizer.step()

Epoch 1: train loss: 0.46179771423339844
Epoch 1000: train loss: 0.033059485256671906
Epoch 2000: train loss: 0.014550670981407166


In [None]:
model.eval()
y_pred = model(x_test)
after_train = criterion(y_pred.squeeze(), y_test) 
print('Test loss after Training' , after_train.item())

Test loss after Training 1.4359853267669678


## Build a Deep Neural Network

* Neural networks comprise of layers/modules that perform operations on data.
* The [`torch.nn`](https://pytorch.org/docs/stable/nn.html>) namespace provides all the building blocks you need to build your own neural network. Every module in PyTorch subclasses the [`nn.Module`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html).
* A neural network is a module itself that consists of other modules (layers). 
* This nested structure allows for building and managing complex architectures easily.

### Define the Class

* We define our neural network by subclassing `nn.Module`, and
initialize the neural network layers in `__init__`. 
* Every `nn.Module` subclass implements the operations on input data in the `forward` method.



### **Task2. Complete the code below to create a deep neural network**
  1. Linear layer: 28x28d input and 512d output
  2. ReLU activation fuction
  3. Linear layer: 512d input and 512d output
  4. ReLU activation fuction
  5. Linear layer: 512d input and 10d output

In [None]:
class NeuralNetwork(nn.Module):
  def __init__(self):
    super(NeuralNetwork, self).__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
      torch.nn.Linear(28*28, 512) , # Linear layer 28x28d input and 512d output
      torch.nn.ReLU(), # ReLU activation fuction
      torch.nn.Linear(512, 512), # Linear layer 512d input and 512d output
      torch.nn.ReLU(), # ReLU activation fuction
      torch.nn.Linear(512, 10), # Linear layer 512d input and 10d output
      
    )

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

We create an instance of ``NeuralNetwork``, and move it to the ``device``, and print
its structure.



In [None]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


* To use the model, we pass it the input data. This executes the model's `forward`, along with some [`background operations`](https://github.com/pytorch/pytorch/blob/270111b7b611d174967ed204776985cefca9c144/torch/nn/modules/module.py#L866).
* Do not call `model.forward()` directly!
  

&nbsp;
* Calling the model on the input returns a 10-dimensional tensor with raw predicted values for each class.
* We get the prediction probabilities by passing it through an instance of the `nn.Softmax` module.



In [None]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6], device='cuda:0')


### Model Layers

Let's break down the layers in the FashionMNIST model. To illustrate it, we
will take a sample minibatch of 3 images of size 28x28 and see what happens to it as
we pass it through the network.



In [None]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


#### nn.Flatten
We initialize the [`nn.Flatten`](https://pytorch.org/docs/stable/generated/torch.nn.Flatten.html>) layer to convert each 2D 28x28 image into a contiguous array of 784 pixel values (the minibatch dimension (at dim=0) is maintained).



In [None]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


#### nn.Linear

The [`linear layer`](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html>) is a module that applies a linear transformation on the input using its stored weights and biases.




In [None]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


#### nn.ReLU

Non-linear activations are what create the complex mappings between the model's inputs and outputs.
They are applied after linear transformations to introduce *nonlinearity*, helping neural networks
learn a wide variety of phenomena.

In this model, we use [`nn.ReLU`](https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html>) between our linear layers, but there's other activations to introduce non-linearity in your model.



In [None]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.7755, -0.0077, -0.5341,  0.2664,  0.0128, -0.3345,  0.1125,  0.1688,
          0.3853,  0.0526, -0.0649, -0.0454,  0.0508, -0.2085, -0.6878,  0.0876,
         -0.4228, -0.1164, -0.1676, -0.8830],
        [-0.7386, -0.0374, -0.1294,  0.2914,  0.0307, -0.1254,  0.2041,  0.2000,
          0.2992, -0.1224,  0.0335,  0.0922,  0.1875, -0.1159, -0.7949,  0.2605,
         -0.6964, -0.0189, -0.2484, -0.7415],
        [-0.8393, -0.0947, -0.2897,  0.5096, -0.0736, -0.3794, -0.2635,  0.1074,
          0.0516, -0.0466,  0.2494, -0.1060,  0.2265, -0.3372, -0.6848,  0.3778,
         -0.0957, -0.0304, -0.5149, -0.6913]], grad_fn=<AddmmBackward>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.2664, 0.0128, 0.0000, 0.1125, 0.1688, 0.3853,
         0.0526, 0.0000, 0.0000, 0.0508, 0.0000, 0.0000, 0.0876, 0.0000, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.2914, 0.0307, 0.0000, 0.2041, 0.2000, 0.2992,
         0.0000, 0.0335, 0.0922, 0.1875, 0.0000, 0.000

#### nn.Sequential

[`nn.Sequential`](https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html>) is an ordered container of modules. The data is passed through all the modules in the same order as defined. You can use
sequential containers to put together a quick network like `seq_modules`.



In [None]:
seq_modules = nn.Sequential(
  flatten,
  layer1,
  nn.ReLU(),
  nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

#### nn.Softmax

The last linear layer of the neural network returns `logits` - raw values in [-\infty, \infty] - which are passed to the
[`nn.Softmax`](https://pytorch.org/docs/stable/generated/torch.nn.Softmax.html>) module. The logits are scaled to values [0, 1] representing the model's predicted probabilities for each class. `dim` parameter indicates the dimension along which the values must sum to 1.



In [None]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
pred_probab

## Model Parameters

Many layers inside a neural network are *parameterized*, i.e. have associated weights and biases that are optimized during training. Subclassing `nn.Module` automatically tracks all fields defined inside your model object, and makes all parameters accessible using your model's `parameters()` or `named_parameters()` methods.

In this example, we iterate over each parameter, and print its size and a preview of its values.




In [None]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
  print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0084, -0.0010,  0.0050,  ...,  0.0341,  0.0044,  0.0208],
        [ 0.0176, -0.0235, -0.0299,  ...,  0.0109, -0.0022, -0.0141]],
       device='cuda:0', grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([0.0297, 0.0265], device='cuda:0', grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0244, -0.0440,  0.0254,  ...,  0.0433,  0.0307,  0.0046],
        [-0.0095, -0.0343, -0.0347,  ...,  0.0346, -0.0295,  0.0402]],
       device='cuda:0', grad_fn=<Slic

## Download data

In [None]:
from torchvision.transforms import ToTensor, Lambda

learning_rate = 1e-3
batch_size = 64
epochs = 5

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


### Define train_loop and test_loop

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  for batch, (X, y) in enumerate(dataloader):
    X = X.to(device)
    y = y.to(device)
    # Compute prediction and loss
    pred = model(X)
    loss = loss_fn(pred, y)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  test_loss, correct = 0, 0

  with torch.no_grad():
    for X, y in dataloader:
      X = X.to(device)
      y = y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

## Model traininig

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
  print(f"Epoch {t+1}\n-------------------------------")
  train_loop(train_dataloader, model, loss_fn, optimizer)
  test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.303256  [    0/60000]
loss: 2.293856  [ 6400/60000]
loss: 2.270924  [12800/60000]
loss: 2.263391  [19200/60000]
loss: 2.255043  [25600/60000]
loss: 2.213623  [32000/60000]
loss: 2.234897  [38400/60000]
loss: 2.194386  [44800/60000]
loss: 2.193148  [51200/60000]
loss: 2.168237  [57600/60000]
Test Error: 
 Accuracy: 41.7%, Avg loss: 2.150582 

Epoch 2
-------------------------------
loss: 2.160332  [    0/60000]
loss: 2.152463  [ 6400/60000]
loss: 2.094164  [12800/60000]
loss: 2.110678  [19200/60000]
loss: 2.060083  [25600/60000]
loss: 1.995009  [32000/60000]
loss: 2.031892  [38400/60000]
loss: 1.948898  [44800/60000]
loss: 1.954193  [51200/60000]
loss: 1.893059  [57600/60000]
Test Error: 
 Accuracy: 57.4%, Avg loss: 1.873372 

Epoch 3
-------------------------------
loss: 1.903510  [    0/60000]
loss: 1.871337  [ 6400/60000]
loss: 1.760513  [12800/60000]
loss: 1.804410  [19200/60000]
loss: 1.690118  [25600/60000]
loss: 1.637441  [32000/600