In [1]:
import torch
from torch import nn,optim
from torchvision import datasets,transforms
from torch.utils.data import dataloader
import matplotlib.pyplot as plt
from tqdm import tqdm






In [2]:
my_transform = transforms.Compose([transforms.ToTensor(),transforms.Lambda(lambda img: img.reshape(784))])

In [3]:
training_set = datasets.MNIST(root="data",train=True,transform=my_transform,download=True)
testing_set  = datasets.MNIST(root="data",train=False,transform=my_transform,download=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 34509165.83it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 118644166.33it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 35673565.23it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 5834771.45it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [32]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 16

In [33]:
class PyTeen(nn.Module):
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Linear(784,512),
        nn.ReLU(),
        nn.Linear(512,512),
        nn.ReLU(),
        nn.Linear(512,10)
    )
    self.loss = nn.CrossEntropyLoss()
    self.optimizer = optim.Adam(self.parameters())
   #self.to(torch.device(DEVICE)) #gpu

  def forward(self,input):
    return(self.layers(input))

  def predict(self,input):
    with torch.no_grad():
      pred = self.forward(input)
      return(torch.argmax(pred,axis=-1))

  def train(self,input,label):
    self.optimizer.zero_grad()
    pred = self.forward(input)
    loss = self.loss(pred,label)
    loss.backward()
    self.optimizer.step()
    return(loss)




In [34]:
training_loader = dataloader.DataLoader(training_set,batch_size=BATCH_SIZE,shuffle=True)
testing_loader = dataloader.DataLoader(testing_set,batch_size=BATCH_SIZE,shuffle=False)

In [35]:
network = PyTeen()
network.to(torch.device(DEVICE))


PyTeen(
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
  (loss): CrossEntropyLoss()
)

In [36]:
# training loop
# we need to make the tensors all .to(torch.device('cuda'))
EPOCHS = 10

for i in range(EPOCHS):
  total_loss = 0
  for input,label in tqdm(training_loader):
      input = input.to(torch.device(DEVICE))
      label = label.to(torch.device(DEVICE))

      loss = network.train(input,label)
      total_loss += loss
  print("EPOCH:",i+1,": ",total_loss)





100%|██████████| 3750/3750 [00:14<00:00, 251.75it/s]


EPOCH: 1 :  tensor(725.2255, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:14<00:00, 253.66it/s]


EPOCH: 2 :  tensor(322.3915, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:14<00:00, 254.74it/s]


EPOCH: 3 :  tensor(236.0755, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:14<00:00, 251.77it/s]


EPOCH: 4 :  tensor(183.6461, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:14<00:00, 250.05it/s]


EPOCH: 5 :  tensor(153.6123, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:14<00:00, 253.59it/s]


EPOCH: 6 :  tensor(125.8582, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:15<00:00, 246.56it/s]


EPOCH: 7 :  tensor(115.1066, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:15<00:00, 241.00it/s]


EPOCH: 8 :  tensor(106.9534, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:14<00:00, 250.57it/s]


EPOCH: 9 :  tensor(95.5028, device='cuda:0', grad_fn=<AddBackward0>)


100%|██████████| 3750/3750 [00:14<00:00, 254.66it/s]

EPOCH: 10 :  tensor(85.6999, device='cuda:0', grad_fn=<AddBackward0>)





In [37]:
# evaluation loop
# we need to make the tensors all .to(torch.device('cuda'))
num_corrects = 0
for input,label in tqdm(testing_loader):
  input = input.to(torch.device(DEVICE))
  label = label.to(torch.device(DEVICE))
  pred = network.predict(input)
  for i in range(len(pred)):
    if(pred[i] == label[i]):
      num_corrects += 1
print('\n',num_corrects/(len(testing_loader)*BATCH_SIZE))


100%|██████████| 625/625 [00:02<00:00, 287.44it/s]


 0.9818



