In [1]:
import os
import torch
from torch import nn ,optim
from torch.utils.data import DataLoader,Dataset, TensorDataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose,transforms
import matplotlib.pyplot as plt
import pandas as pd
from torchvision.io import read_image
import numpy as np

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#device = 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [3]:
from pathlib import Path
import requests

DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/master/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

In [4]:
import pickle
import gzip

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
y_train

array([5, 0, 4, ..., 8, 4, 8], dtype=int64)

In [5]:
x_train, y_train, x_valid, y_valid = map(
    torch.cuda.FloatTensor, (x_train, y_train, x_valid, y_valid))

In [6]:
size = x_train.shape[0]
size2 = x_valid.shape[0]

In [7]:
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=64)
test_ds = TensorDataset(x_valid, y_valid)
test_dl = DataLoader(test_ds, batch_size=64)

In [14]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1)
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(32, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
            nn.ReLU()
        )
    
    def forward(self, xb):
        xb = xb.view(-1, 1, 28, 28)
        xb = nn.functional.relu(self.conv1(xb))
        xb = nn.functional.relu(self.conv2(xb))
        xb = nn.functional.relu(self.conv3(xb))
        xb = nn.functional.avg_pool2d(xb, 4)
        xb = self.flatten(xb)
        xb = self.linear_relu_stack(xb)
        return xb.view(-1, xb.size(1))


In [15]:
lr = 0.01
model = CNN().to(device)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
softmax = nn.LogSoftmax(dim=1)
loss_fn = nn.NLLLoss()

https://stackoverflow.com/questions/59013109/runtimeerror-input-type-torch-floattensor-and-weight-type-torch-cuda-floatte/59013131

https://jbencook.com/pytorch-numpy-conversion/

https://neptune.ai/blog/pytorch-loss-functions

https://discuss.pytorch.org/t/runtimeerror-expected-object-of-scalar-type-long-but-got-scalar-type-float-when-using-crossentropyloss/30542

In [16]:
for epoch in range(10):
    for batch, (xb, yb) in enumerate(train_dl):
        #print(len(xb))
        X, y = xb.to(device), yb.to(device)
        # Compute prediction error
        pred = model(X)
        #print(pred.shape,y.shape)
        loss = loss_fn(softmax(pred), y.long())
        loss.backward()
        with torch.no_grad():
            for p in model.parameters():
                p -= p.grad * lr
            model.zero_grad()
        if batch % 64 == 0 and epoch%2 ==0:
            loss, current = loss.item(), (batch)*len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
       

loss: 2.306357  [    0/50000]
loss: 2.306239  [ 4096/50000]
loss: 2.303768  [ 8192/50000]
loss: 2.305526  [12288/50000]
loss: 2.303134  [16384/50000]
loss: 2.303462  [20480/50000]
loss: 2.304765  [24576/50000]
loss: 2.302166  [28672/50000]
loss: 2.305210  [32768/50000]
loss: 2.301979  [36864/50000]
loss: 2.305450  [40960/50000]
loss: 2.302274  [45056/50000]
loss: 2.302349  [49152/50000]
loss: 2.303608  [    0/50000]
loss: 2.304202  [ 4096/50000]
loss: 2.301545  [ 8192/50000]
loss: 2.303653  [12288/50000]
loss: 2.302799  [16384/50000]
loss: 2.303573  [20480/50000]
loss: 2.304729  [24576/50000]
loss: 2.302855  [28672/50000]
loss: 2.304685  [32768/50000]
loss: 2.302290  [36864/50000]
loss: 2.304857  [40960/50000]
loss: 2.302264  [45056/50000]
loss: 2.302271  [49152/50000]
loss: 2.303539  [    0/50000]
loss: 2.304112  [ 4096/50000]
loss: 2.301568  [ 8192/50000]
loss: 2.303594  [12288/50000]
loss: 2.302756  [16384/50000]
loss: 2.303523  [20480/50000]
loss: 2.304657  [24576/50000]
loss: 2.30

KeyboardInterrupt: 

In [11]:
test_loss, correct = 0, 0
print(size2)
for (xb, yb) in test_dl:
    X, y = xb.to(device), yb.to(device)
    pred = model(X)
    loss = loss_fn(softmax(pred), y.long())
    test_loss += loss.item()
    correct += (pred.argmax(1) == y).type(torch.float).sum().item()

test_loss /= size2
correct /= size2
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


10000
Test Error: 
 Accuracy: 9.9%, Avg loss: 0.036151 



In [12]:
torch.save(model, 'CNN.pth')