<a href="https://colab.research.google.com/github/nikodrum/rd-homework/blob/master/Intro_to_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# выбрать device на котором запускать
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

batch_size = 100
num_classes = 10
input_size = 784

In [2]:
import random
import numpy

def set_seed(seed: int, n_gpu: int):
    random.seed(seed)
    numpy.random.seed(seed)
    torch.manual_seed(seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(seed)
        
set_seed(42, 1)

In [3]:
# MNIST dataset 
batch_size = 100

train_dataset = torchvision.datasets.MNIST(root="../data", 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root="../data", 
                                          train=False, 
                                          transform=transforms.ToTensor())

# загрузка данных
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [4]:
def train(conf):

  criterion = conf["criterion"]
  optimizer = conf["optimizer"]

  total_step = len(train_loader)
  for epoch in range(num_epochs):
      for i, (images, labels) in enumerate(train_loader):  
          # перенос тензоров на определенный device
          images = images.reshape(-1, 28*28).to(device)
          labels = labels.to(device)
          
          # Forward pass
          outputs = model(images)
          loss = criterion(outputs, labels)
          
          # Backpropagation and optimization
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
          
      if epoch % 2 == 0:
          print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                  .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

  print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
          .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
  return model
        
def evaluate(model):
  with torch.no_grad():
      correct = 0
      total = 0
      for images, labels in test_loader:
          images = images.reshape(-1, 28*28).to(device)
          labels = labels.to(device)
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

      print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))





In [5]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [6]:
# с чего мы начинаем
hidden_size = 500
num_epochs = 5

learning_rate = 0.001

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model.train()

model = train({
  "optimizer": torch.optim.Adam(model.parameters(), lr=learning_rate),
  "criterion": nn.CrossEntropyLoss()
})
model.eval()
evaluate(model)

Epoch [1/5], Step [600/600], Loss: 0.0851
Epoch [3/5], Step [600/600], Loss: 0.0494
Epoch [5/5], Step [600/600], Loss: 0.0256
Epoch [5/5], Step [600/600], Loss: 0.0256
Accuracy of the network on the 10000 test images: 98.14 %


In [7]:
# попробуем уменьшить количество весов
hidden_size = 50
num_epochs = 10

learning_rate = 0.01

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model.train()

model = train({
  "optimizer": torch.optim.Adam(model.parameters(), lr=learning_rate),
  "criterion": nn.CrossEntropyLoss()
})
model.eval()
evaluate(model)

Epoch [1/10], Step [600/600], Loss: 0.1993
Epoch [3/10], Step [600/600], Loss: 0.1694
Epoch [5/10], Step [600/600], Loss: 0.0658
Epoch [7/10], Step [600/600], Loss: 0.0410
Epoch [9/10], Step [600/600], Loss: 0.1383
Epoch [10/10], Step [600/600], Loss: 0.1770
Accuracy of the network on the 10000 test images: 96.62 %


In [8]:
# испробуем стохастический градиентный спуск
hidden_size = 50
num_epochs = 30

learning_rate = 0.01

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model.train()

model = train({
  "optimizer": torch.optim.SGD(model.parameters(), lr=learning_rate),
  "criterion": nn.CrossEntropyLoss()
})
model.eval()
evaluate(model)

Epoch [1/30], Step [600/600], Loss: 0.8334
Epoch [3/30], Step [600/600], Loss: 0.4566
Epoch [5/30], Step [600/600], Loss: 0.3658
Epoch [7/30], Step [600/600], Loss: 0.2163
Epoch [9/30], Step [600/600], Loss: 0.3385
Epoch [11/30], Step [600/600], Loss: 0.2153
Epoch [13/30], Step [600/600], Loss: 0.2958
Epoch [15/30], Step [600/600], Loss: 0.3524
Epoch [17/30], Step [600/600], Loss: 0.4144
Epoch [19/30], Step [600/600], Loss: 0.2666
Epoch [21/30], Step [600/600], Loss: 0.2217
Epoch [23/30], Step [600/600], Loss: 0.1942
Epoch [25/30], Step [600/600], Loss: 0.1542
Epoch [27/30], Step [600/600], Loss: 0.2179
Epoch [29/30], Step [600/600], Loss: 0.0890
Epoch [30/30], Step [600/600], Loss: 0.2996
Accuracy of the network on the 10000 test images: 93.92 %


In [9]:
# попробуем другой активационный слой
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.act = nn.Tanh()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.act(out)
        out = self.fc2(out)
        return out

In [10]:
# определение гиперпараметров 
hidden_size = 500
num_epochs = 10

learning_rate = 0.001

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model.train()

model = train({
  "optimizer": torch.optim.Adam(model.parameters(), lr=learning_rate),
  "criterion": nn.CrossEntropyLoss()
})
model.eval()
evaluate(model)

Epoch [1/10], Step [600/600], Loss: 0.2129
Epoch [3/10], Step [600/600], Loss: 0.0776
Epoch [5/10], Step [600/600], Loss: 0.0542
Epoch [7/10], Step [600/600], Loss: 0.0911
Epoch [9/10], Step [600/600], Loss: 0.0214
Epoch [10/10], Step [600/600], Loss: 0.0202
Accuracy of the network on the 10000 test images: 98.02 %


In [11]:
# попробуем докинуть слой с сразу добавим дропаут чтобы не переобучиться
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, hidden_size_2, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.act = nn.Tanh()
        self.fc2 = nn.Linear(hidden_size, hidden_size_2)
        self.drop = nn.Dropout(0.4)
        self.fc3 = nn.Linear(hidden_size_2, num_classes)

    
    def forward(self, x):
        out = self.fc1(x)
        out = self.act(out)
        out = self.fc2(out)
        return out

In [12]:
hidden_size = 200
hidden_size_2 = 100
num_epochs = 15

learning_rate = 0.001

model = NeuralNet(input_size, hidden_size,hidden_size_2,  num_classes).to(device)
model.train()

model = train({
  "optimizer": torch.optim.Adam(model.parameters(), lr=learning_rate),
  "criterion": nn.CrossEntropyLoss()
})
model.eval()
evaluate(model)

Epoch [1/15], Step [600/600], Loss: 0.2051
Epoch [3/15], Step [600/600], Loss: 0.1817
Epoch [5/15], Step [600/600], Loss: 0.0939
Epoch [7/15], Step [600/600], Loss: 0.1005
Epoch [9/15], Step [600/600], Loss: 0.0133
Epoch [11/15], Step [600/600], Loss: 0.0088
Epoch [13/15], Step [600/600], Loss: 0.0135
Epoch [15/15], Step [600/600], Loss: 0.0058
Epoch [15/15], Step [600/600], Loss: 0.0058
Accuracy of the network on the 10000 test images: 97.92 %


In [13]:
# определение гиперпараметров 
hidden_size = 100
hidden_size_2 = 50
num_epochs = 20

learning_rate = 0.01

model = NeuralNet(input_size, hidden_size, hidden_size_2, num_classes).to(device)
model.train()

model = train({
  "optimizer": torch.optim.SGD(model.parameters(), lr=learning_rate),
  "criterion": nn.CrossEntropyLoss()
})
model.eval()
evaluate(model)

Epoch [1/20], Step [600/600], Loss: 1.0277
Epoch [3/20], Step [600/600], Loss: 0.5700
Epoch [5/20], Step [600/600], Loss: 0.3890
Epoch [7/20], Step [600/600], Loss: 0.3000
Epoch [9/20], Step [600/600], Loss: 0.5126
Epoch [11/20], Step [600/600], Loss: 0.3978
Epoch [13/20], Step [600/600], Loss: 0.2190
Epoch [15/20], Step [600/600], Loss: 0.3093
Epoch [17/20], Step [600/600], Loss: 0.2333
Epoch [19/20], Step [600/600], Loss: 0.2116
Epoch [20/20], Step [600/600], Loss: 0.2828
Accuracy of the network on the 10000 test images: 93.36 %


In [13]:
# бейзлан был хорош, так что получилось только его повторить :(