In [2]:
import torch

In [3]:

import numpy as np
import torchvision
import torch.nn as nn
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [5]:
#download dataset, but i already have it downloaded
dataset = MNIST(root='data/', download=True)
dataset = MNIST(root='data/', train=True, transform=ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...
Done!




In [6]:

def split_indices(n, val_pct):
    pct_gone = int(n*val_pct)
    print(pct_gone)
    idxs = np.random.permutation(n)
    #so from 12,000 images to the end - we set the training dataset to those random permutations
    return idxs[pct_gone:], idxs[:pct_gone]

In [7]:
train_indices, val_indices = split_indices(len(dataset), 0.2)

12000


In [8]:
#we will randomize the position of the data, in order to lessen the biases
#we will create batches out of the indices that are specified below
train_sampler = SubsetRandomSampler(train_indices)
#when we do 10 batches, or 4800 images per batch - it kinda sucks
train_loader = DataLoader(dataset, 100, sampler=train_sampler)

In [9]:
val_sampler = SubsetRandomSampler(val_indices)
val_loader = DataLoader(dataset, 100, sampler=val_sampler)

In [14]:
#this has one layer so far, imma create two - the layers will be in - hidden/2 hidden /2 - hidden - hidden - out

#layers have specific weights and biases that are optimized whenever the data is passed through the linput fields whenever we optimize them with opt.step()
#784 - 16 - 16 - 10
#weights and the biases are better
class MnistModel(nn.Module):
    def __init__(self, in_size, hidden_size, out_size):
        super().__init__()
        hidden_half = int(hidden_size/2)
        #hidden layer
        self.linear1 = nn.Linear(in_size, hidden_half)
        #output layer
        self.linear2 = nn.Linear(hidden_half,hidden_size)

        self.linear3 = nn.Linear(hidden_size, out_size)
    def forward(self, xb):
        xb = xb.view(xb.size(0), -1)
        out = self.linear1(xb)
        out = F.relu(out)
        out=self.linear2(out)

        out = F.relu(out)

        out = self.linear3(out)
        return out


In [15]:
input_size = 28**2
hidden_size = 64
num_classes = 10

model = MnistModel(input_size, hidden_size, num_classes)

for t in model.parameters():
    print(t.shape)

torch.Size([32, 784])
torch.Size([32])
torch.Size([64, 32])
torch.Size([64])
torch.Size([10, 64])
torch.Size([10])


In [16]:
for img, lbl in train_loader:
    out = model(img)
    loss = F.cross_entropy(out, lbl)
    print(loss.item())
    break

2.298042058944702


In [17]:
#use the gpu if cuda is available
def def_dev():
    if torch.cuda.is_available():
        return torch.device('cuda')
    return torch.device('cpu')


In [18]:
device = def_dev()

In [19]:
device

device(type='cuda')

In [20]:
def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    # print(type (data))
    return data.to(device, non_blocking = True)


In [21]:
#make sure that the images get ported over to using the gpu
for images, labels in train_loader:
    print(images.shape)
    images = to_device(images, device)
    print(images.device)
    break

torch.Size([100, 1, 28, 28])
cuda:0


In [22]:
#__iter__ means that when you iterate through this will be the response and __len__ means when len() is called
class DeviceDataLoader:
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    def __iter__(self):
        for b in self.dl:
            #pauses the function - function but remembering the previous value and iteration, so that the next iteration will yield a different item
            # kinda like you are appending to a list while iterating - but there is no list and there is no appending?
            yield to_device(b, self.device)
    def __len__(self):
        return len(self.dl)

In [23]:
train_dl = DeviceDataLoader(train_loader, device)
valid_dl = DeviceDataLoader(val_loader, device)

In [24]:
len(valid_dl)

120

In [25]:
for xb, yb in valid_dl:
    #as you can see it has now put a lot of stuff on the GPU rather than most of it being on the cpu
    print(xb.device)
    print(yb)

cuda:0
tensor([7, 1, 4, 8, 0, 9, 8, 2, 2, 7, 3, 2, 6, 4, 6, 5, 5, 7, 1, 6, 1, 5, 6, 7,
        0, 3, 7, 8, 7, 1, 5, 2, 5, 1, 7, 2, 7, 1, 7, 8, 9, 8, 3, 7, 9, 3, 5, 3,
        4, 5, 3, 0, 2, 3, 8, 0, 1, 5, 3, 7, 8, 0, 9, 2, 4, 8, 8, 2, 0, 0, 0, 8,
        6, 9, 8, 3, 6, 9, 5, 9, 0, 4, 3, 5, 2, 0, 6, 0, 2, 0, 9, 0, 5, 6, 8, 6,
        4, 8, 8, 8], device='cuda:0')
cuda:0
tensor([3, 5, 8, 1, 2, 3, 7, 2, 6, 2, 8, 1, 0, 7, 2, 7, 8, 7, 9, 3, 7, 6, 2, 5,
        9, 0, 5, 8, 9, 8, 9, 5, 8, 8, 9, 6, 2, 8, 7, 3, 1, 5, 5, 1, 3, 3, 3, 9,
        6, 0, 1, 8, 9, 4, 9, 0, 6, 4, 0, 3, 4, 5, 1, 7, 3, 6, 5, 4, 5, 4, 0, 9,
        1, 2, 5, 2, 4, 2, 2, 5, 7, 9, 3, 4, 4, 0, 0, 3, 5, 0, 3, 5, 1, 4, 7, 0,
        3, 9, 3, 6], device='cuda:0')
cuda:0
tensor([3, 7, 7, 6, 4, 0, 8, 3, 1, 7, 6, 4, 3, 4, 0, 2, 6, 4, 0, 9, 0, 6, 8, 2,
        0, 1, 7, 3, 5, 9, 9, 4, 9, 2, 1, 7, 0, 5, 9, 3, 8, 3, 3, 1, 3, 0, 7, 6,
        8, 7, 1, 4, 7, 6, 4, 8, 7, 8, 5, 5, 8, 3, 4, 3, 2, 8, 6, 8, 2, 7, 1, 3,
        9, 8, 5, 7, 7, 

In [26]:
#Training the model
def loss_batch(model, loss_fn, xb, yb, opt):
    out = model(xb)
    loss = loss_fn(out, yb)
    loss.backward()
    opt.step()
    opt.zero_grad()

In [27]:
def fit(epochs, lr, model, loss_fn, train_dl):
    losses, metrics = [], []
    opt = torch.optim.SGD(model.parameters(), lr)
    for epoch in range (epochs):
        for xb, yb in train_dl:
            loss_batch(model, loss_fn, xb, yb, opt)

In [28]:
model = MnistModel(input_size, hidden_size=32, out_size = num_classes)
to_device(model, device)
fit(5, 0.5, model, F.cross_entropy, train_dl)
fit(5, 0.1, model, F.cross_entropy, train_dl)
fit(5, 0.05, model, F.cross_entropy, train_dl)

In [29]:
acc_arr = []
for xb, yb in valid_dl:
    out = model(xb)
    loss = F.cross_entropy(out, yb)
    probs, preds = (torch.max(out, dim=1))
    prediction = (preds[0].item())
    correct_answer = (yb[0].item())
    acc_arr.append(int(prediction == correct_answer))
    

#it seems that adding the extra layer didn't really do anything but it seems that it went up a little bit atleast
print(sum(acc_arr)/len(acc_arr))

0.9416666666666667


In [37]:
test_dataset = MNIST(root='data/', train=False, transform=ToTensor())
print(device)

for xb, yb in test_dataset:
    to_device(xb, device)


print(test_dataset[0][0].device)

cuda
cpu


In [None]:
def make_pred(img, label):
    out = model(img)
    probs, preds = torch.max(out, dim=1)
    print(preds)




In [31]:
print(device)

cuda


In [38]:
#we don't actually need sampling because we are not training the data on anything
test_dl = DataLoader(test_dataset, 100)

print(device)
test_dl = DeviceDataLoader(test_dl, device)

cuda


In [62]:
as you can see, all of the data is now on the gpu - that is great so that we can train the model on it

def make_pred(img, label):
    out = model(img)
    probs, preds = (torch.max(out, dim=1))
    print(preds)
    print(label)

count = 0
for xb, yb in test_dl:
    if count == 2:
        break

    count += 1



<class 'torch.Tensor'>


AttributeError: ignored