In [None]:
#importing required libraries .. 
import torch 
import numpy as np 
import torch.nn as nn 
import torch.optim as optim 
import torch.nn.functional as F 
from torchvision import datasets, transforms 
from torch.utils.data.sampler import SubsetRandomSampler #for validation test

In [None]:
#Define a transform to convert to images to tensor and normalize
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,),)])
#Load the data: train and test sets 
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data', download=True, train=True, transform=transform) 
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data', download=True, train=False, transform=transform)
#Preparing! for validaion test
indices = list(range(len(trainset)))
np.random.shuffle(indices) 
#to get 20% of the train set
split = int(np.floor(0.2*len(trainset)))
train_sample = SubsetRandomSampler(indices[:split])
valid_sample = SubsetRandomSampler(indices[split:])
#Data Loader  
trainloader = torch.utils.data.DataLoader(trainset, sampler=train_sample, batch_size=64)
validloader = torch.utils.data.DataLoader(trainset, sampler=valid_sample, batch_size=64)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True) 

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw



In [None]:
class Classifier(nn.Module):
    def __init__(self):
      super().__init__()
      self.fc1 = nn.Linear(784,256)
      self.fc1_bn=nn.BatchNorm1d(256)
      self.fc2 = nn.Linear(256,128)
      self.fc3 = nn.Linear(128,64)
      self.fc4 = nn.Linear(64,10)
      #defining the 20% dropout
      self.dropout= nn.Dropout(0.2)
    def forward(self,x):
      x = x.view(x.shape[0],-1)
      x = self.dropout(F.relu(self.fc1_bn(self.fc1(x))))
      x = self.dropout(F.relu(self.fc2(x)))
      x = self.dropout(F.relu(self.fc3(x)))
      #not using dropout on output layer
      x = F.log_softmax(x, dim=1)
      return x 

∙ Report the depth effect of different hidden layers.
the shape of neural network and depth effect of hidden layers is in depth effect png file that is uploaded with homeworks

In [10]:
model = Classifier()
#defining the loss function 
criterion=nn.NLLLoss() 
optimizer = optim.SGD(model.parameters(), lr=0.01)
valid_loss_min = np.Inf #using this high value to make sure the update the weight first time 
epochs=40
steps = 0 
model.train() #prep model for training 
train_losses, valid_losses = [],[]
for e in range(epochs):
  running_loss =0 
  valid_loss = 0 
  # train the model# 
  for images, labels in trainloader:
    optimizer.zero_grad()
    log_ps = model(images)
    loss = criterion(log_ps, labels) 
    loss.backward()
    optimizer.step()
    running_loss += loss.item()*images.size(0)
  for images, labels in validloader:
    log_ps = model(images) 
    loss = criterion(log_ps, labels) 
    valid_loss += loss.item()*images.size(0)
  running_loss = running_loss/len(trainloader.sampler) 
  valid_loss = valid_loss/len(validloader.sampler)
  train_losses.append(running_loss) 
  valid_losses.append(valid_loss) 
  print('Epoch: {} \tTraining loss: {:.6f} \tValidation Loss: {:.6f}'.format(e+1, running_loss, valid_loss))
  if valid_loss <= valid_loss_min:
    print('validation loss decreased({:.6f} -->{:.6f}). Saving Model ...'.format(valid_loss_min, valid_loss))
    torch.save(model.state_dict(), 'model.pt') 
    valid_loss_min = valid_loss 

Epoch: 1 	Training loss: 2.613956 	Validation Loss: 1.834539
validation loss decreased(inf -->1.834539). Saving Model ...
Epoch: 2 	Training loss: 1.703396 	Validation Loss: 1.633211
validation loss decreased(1.834539 -->1.633211). Saving Model ...
Epoch: 3 	Training loss: 1.568669 	Validation Loss: 1.547629
validation loss decreased(1.633211 -->1.547629). Saving Model ...
Epoch: 4 	Training loss: 1.468287 	Validation Loss: 1.495579
validation loss decreased(1.547629 -->1.495579). Saving Model ...
Epoch: 5 	Training loss: 1.469109 	Validation Loss: 1.453356
validation loss decreased(1.495579 -->1.453356). Saving Model ...
Epoch: 6 	Training loss: 1.398859 	Validation Loss: 1.432605
validation loss decreased(1.453356 -->1.432605). Saving Model ...
Epoch: 7 	Training loss: 1.343869 	Validation Loss: 1.427290
validation loss decreased(1.432605 -->1.427290). Saving Model ...
Epoch: 8 	Training loss: 1.384457 	Validation Loss: 1.398472
validation loss decreased(1.427290 -->1.398472). Saving

∙ Analyze the dropout technique and report its results.

این برنامه تمام نتایج مربوط به مدل و دقت آن را بیان میکند میتوان برای جواب این سوال از متد دوم داخل کلاس شبکه عصبی مان تمام توابع دراپ اوت را برداریم و نتیجه را ببینیم خواهیم دید که احتمالا شبکه در اپیزود های بالا دچار بیش انطباقی میشود

∙ Use early stopping criteria.

همانگونه که میبینید برنامه در اپیزود های مختلف نتیجه حاصل بر روی داده آموزش و داده اعتبار که بخشی از داده آموزش است را مینویسد در اولین اپیزودی که خطا بر روی داده آموزش کم شد ولی در داده اعتبار خطا بالا رفت باید آموزش را متوقف کرد و تعداد اپیزود های برنامه را تنطیم کرد

∙ Become familiar with batch normalization and report its effects.

این نرمالسازی به جای آنکه بر داده اولیه عمل کند بر خود نورون ها عمل میکند این نرمالسازی بر داده هر نورون قبل از آنکه به تابع عملگر نورون برود اعمال میشود
و پارامتر های آن از خروجی های نورون های همان لایه ( خروجی هایی که هنوز تابع عملگر بر آنها اعمال نشده است) استفاده میکند. این نرمال سازی آموزش را سریعتر و از بیش انطباقی جلوگیری میکند در اینجا در یک خط این نرمالسازی را اعمال کرده ایم میتوان نتایج را با وجود این نرمالسازی و بدون وجود این نرمالسازی بررسی کرد

∙ The model should be tested for L1 and L2 regularization.

برای نرمالسازی ال یک باید قبل از خط

loss.backward()

قطعه کد زیر را افزود

l2_lambda = 0.001

l2_norm = sum(p.abs().sum() for p in model.parameters())

loss = loss + l2_lambda * l2_norm

برای نرمالسازی ال دو باید قبل از خط

loss.backward()

قطعه کد زیر را افزود

l2_lambda = 0.

l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())

loss = loss + l2_lambda * l2_norm

∙ Add a regularization term for the weight parameter using the following sample code:

کافیست قبل از خط

loss.backward()

قطعه کد زیر را بیفزاییم

loss = loss+torch.norm(model.layer.weight, p=2)

In [9]:
#track the test loss 
test_loss = 0 
class_correct = list(0. for i in range(10)) 
class_total = list(0. for i in range(10)) 
model.eval() 
for images, labels in testloader:
  #forword pass 
  output= model(images) 
  #calculate the loss
  loss= criterion(output, labels)
  #update the test loss 
  test_loss += loss.item()*images.size(0) 
  # convert output probabilities to predicted class 
  _, pred = torch.max(output, 1) 
  #compare predictions to the true labes 
  correct = np. squeeze(pred.eq(labels.data.view_as(pred))) 
  #cal culate test accuracy for each object class 
  for i in range(len(labels)):
    label= labels.data[i] 
    class_correct[label] += correct[i].item() 
    class_total[label] +=1
#calcau late and print test loss 
test_loss = test_loss/len(testloader.sampler) 
print('Test Loss: {:.6f}\n'.format(test_loss)) 
for i in range(10):
  if class_total[i] > 0:
      print('Test Accuracy of %5s: %2d%% (%2d/%2d)'%
            (str(i), 100*class_correct[i]/class_total[i],
             np.sum(class_correct), np.sum(class_total[i])))
  else: 
    print('Te st Accuracy of %5s: N/A(no training examples)' % classes[i])
print('\nTest Accuracy (ove rall): %2d%% (%2d/%2d )' % ( 
    100. * np.sum(class_correct) / np.sum(class_total), np.sum(class_correct), np.sum(class_total))) 

Test Loss: 0.571335

Test Accuracy of     0: 79% (8575/1000)
Test Accuracy of     1: 96% (8575/1000)
Test Accuracy of     2: 76% (8575/1000)
Test Accuracy of     3: 85% (8575/1000)
Test Accuracy of     4: 82% (8575/1000)
Test Accuracy of     5: 92% (8575/1000)
Test Accuracy of     6: 60% (8575/1000)
Test Accuracy of     7: 92% (8575/1000)
Test Accuracy of     8: 96% (8575/1000)
Test Accuracy of     9: 94% (8575/1000)

Test Accuracy (ove rall): 85% (8575/10000 )
