In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import time
import torchvision
 
device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
train_dataset = torchvision.datasets.MNIST('../dataset/', train=True, download=True)
test_dataset = torchvision.datasets.MNIST('../dataset/', train=False)

In [2]:
x_train_orig = train_dataset.train_data
y_train_orig = train_dataset.train_labels
x_test_orig = test_dataset.test_data
y_test_orig = test_dataset.test_labels
print('x_train shape : ', x_train_orig.shape)
print('y_train shape : ', y_train_orig.shape)
print('x_test  shape : ', x_test_orig.shape)
print('y_test  shape : ', y_test_orig.shape)


x_train shape :  torch.Size([60000, 28, 28])
y_train shape :  torch.Size([60000])
x_test  shape :  torch.Size([10000, 28, 28])
y_test  shape :  torch.Size([10000])




In [3]:
# data normalization
x_train = (x_train_orig[10000:] / 255.).to(device=device)
y_train = y_train_orig[10000:].to(device=device)
x_val = (x_train_orig[:10000] / 255.).to(device=device)
y_val = y_train_orig[:10000].to(device=device)
x_test = x_test_orig / 255.
y_test = y_test_orig.to(device=device)
 
print('x_train shape : ', x_train.size())
print('y_train shape : ', y_train.size())
print('x_train shape : ', x_val.size())
print('y_train shape : ', y_val.size())
print('x_test  shape : ', x_test.size())
print('y_test  shape : ', y_test.size())

x_train shape :  torch.Size([50000, 28, 28])
y_train shape :  torch.Size([50000])
x_train shape :  torch.Size([10000, 28, 28])
y_train shape :  torch.Size([10000])
x_test  shape :  torch.Size([10000, 28, 28])
y_test  shape :  torch.Size([10000])


In [4]:
trainingset = torch.utils.data.TensorDataset(x_train, y_train)
train_loader = torch.utils.data.DataLoader(trainingset, batch_size=32, shuffle=True)


In [5]:
class Model1(torch.nn.Module):
    def __init__(self):
        super(Model1, self).__init__()
        self.fc1 = torch.nn.Linear(784, 256)
        self.fc2 = torch.nn.Linear(256, 256)
        self.fc3 = torch.nn.Linear(256, 256)
        self.fc4 = torch.nn.Linear(256, 128)
        self.fc5 = torch.nn.Linear(128, 128)
        self.fc6 = torch.nn.Linear(128, 10)
        self.relu = torch.nn.ReLU()
    
    def forward(self, x):
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.relu(self.fc5(x))
        x = torch.nn.functional.dropout(x, training=self.training)
        x = self.fc6(x)
        return x
 
m1 = Model1()
m1.to('cuda')

Model1(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=128, bias=True)
  (fc5): Linear(in_features=128, out_features=128, bias=True)
  (fc6): Linear(in_features=128, out_features=10, bias=True)
  (relu): ReLU()
)

In [17]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(m1.parameters(), lr=0.001)

for epoch in range(10):
    start = time.time()
    total_loss = 0
 
    for xb, yb in train_loader:
        xb.to('cuda')
        yb.to('cuda')
        pred = m1(xb)
        loss = criterion(pred, yb)
 
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
 
        total_loss += loss.item()
    
    with torch.no_grad():
        pred = m1(x_train)
        acc = pred.data.max(1)[1].eq(y_train.data).sum()/len(x_train) * 100
        loss = criterion(pred, y_train)
    print(f"{time.time() - start} sec - loss : {loss} / acc : {acc}")

7.373476028442383 sec - loss : 0.14350922405719757 / acc : 96.15999603271484
7.841026782989502 sec - loss : 0.11628090590238571 / acc : 96.93599700927734
7.807612895965576 sec - loss : 0.08688635379076004 / acc : 97.6780014038086
7.796353101730347 sec - loss : 0.05559922009706497 / acc : 98.50199890136719
7.862710952758789 sec - loss : 0.04879147559404373 / acc : 98.65599822998047
7.751830816268921 sec - loss : 0.045437026768922806 / acc : 98.75599670410156
7.72198486328125 sec - loss : 0.03608589619398117 / acc : 98.9959945678711
7.6609251499176025 sec - loss : 0.038856394588947296 / acc : 98.86399841308594
9.198440551757812 sec - loss : 0.028890419751405716 / acc : 99.25399780273438
8.619903087615967 sec - loss : 0.028300121426582336 / acc : 99.10999298095703


In [18]:
pred = m1(x_test.to(device=device))
acc = pred.data.max(1)[1].eq(y_test.to(device=device).data).sum()/len(x_test) * 100
loss = criterion(pred, y_test.to(device=device))
print(f"loss : {loss} / acc : {acc}")

loss : 0.11915184557437897 / acc : 97.50999450683594


In [18]:
class CNN_Model(torch.nn.Module):
    def __init__(self):
        super(CNN_Model, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, 3, 1, padding=1)
        self.conv2 = torch.nn.Conv2d(32, 16, 2, 1, padding=1)
        self.fc1 = torch.nn.Linear(784, 10)
    
    def forward(self, x):
        x = x.view(-1, 1, 28, 28)
        x = torch.nn.functional.relu(self.conv1(x))
        x = torch.nn.functional.max_pool2d(x, 2)
        x = torch.nn.functional.relu(self.conv2(x))
        x = torch.nn.functional.max_pool2d(x, 2)
        x = torch.nn.Flatten()(x)
        x = self.fc1(x)
        return x
 
m2 = CNN_Model()

In [19]:
x_train.size()

torch.Size([50000, 28, 28])

In [6]:
import torchsummary
import os
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
torchsummary.summary(m1, input_size=x_train.size())
# m2.cuda()
# torchsummary.summary(m2, input_size=x_train.size())

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 256]         200,960
              ReLU-2                  [-1, 256]               0
            Linear-3                  [-1, 256]          65,792
              ReLU-4                  [-1, 256]               0
            Linear-5                  [-1, 256]          65,792
              ReLU-6                  [-1, 256]               0
            Linear-7                  [-1, 128]          32,896
              ReLU-8                  [-1, 128]               0
            Linear-9                  [-1, 128]          16,512
             ReLU-10                  [-1, 128]               0
           Linear-11                   [-1, 10]           1,290
Total params: 383,242
Trainable params: 383,242
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 149.54
Forwar

In [15]:
m1.__dict__

{'training': True,
 '_parameters': OrderedDict(),
 '_buffers': OrderedDict(),
 '_non_persistent_buffers_set': set(),
 '_backward_hooks': OrderedDict(),
 '_is_full_backward_hook': None,
 '_forward_hooks': OrderedDict(),
 '_forward_pre_hooks': OrderedDict(),
 '_state_dict_hooks': OrderedDict(),
 '_load_state_dict_pre_hooks': OrderedDict(),
 '_load_state_dict_post_hooks': OrderedDict(),
 '_modules': OrderedDict([('fc1',
               Linear(in_features=784, out_features=256, bias=True)),
              ('fc2', Linear(in_features=256, out_features=256, bias=True)),
              ('fc3', Linear(in_features=256, out_features=256, bias=True)),
              ('fc4', Linear(in_features=256, out_features=128, bias=True)),
              ('fc5', Linear(in_features=128, out_features=128, bias=True)),
              ('fc6', Linear(in_features=128, out_features=10, bias=True)),
              ('relu', ReLU())])}

In [20]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(m1.parameters(), lr=0.001)

for epoch in range(10):
    start = time.time()
    total_loss = 0
 
    for xb, yb in train_loader:
        xb.to('cuda')
        yb.to('cuda')
        pred = m2(xb)
        loss = criterion(pred, yb)
 
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
 
        total_loss += loss.item()
    
    with torch.no_grad():
        pred = m1(x_train)
        acc = pred.data.max(1)[1].eq(y_train.data).sum()/len(x_train) * 100
        loss = criterion(pred, y_train)
    print(f"{time.time() - start} sec - loss : {loss} / acc : {acc}")

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [None]:
pred = m1(x_test.to(device=device))
acc = pred.data.max(1)[1].eq(y_test.to(device=device).data).sum()/len(x_test) * 100
loss = criterion(pred, y_test.to(device=device))
print(f"loss : {loss} / acc : {acc}")