# PyTorch - MNIST
### Date:  10/2/18
### Read in smaller file (2500 records) and use csv file

References:
https://nextjournal.com/gkoehler/pytorch-mnist


In [1]:
import torch
import torchvision

In [2]:
%matplotlib inline

In [3]:
import pdb

In [4]:
import time

In [5]:
path = "/Users/reshamashaikh/ds/my_repos/pytorch_work"

In [6]:
t0 = time.time()

## Numpy

In [18]:
file_name="https://s3.amazonaws.com/assets.datacamp.com/production/course_3524/datasets/mnist_2500.csv"

In [22]:
import numpy as np

xy = np.loadtxt(file_name, delimiter=',' , dtype=np.float32)
x_data = Variable(torch.from_numpy(xy[:, 1:]))
y_data = Variable(torch.from_numpy(xy[:, 0]))

NameError: name 'Variable' is not defined

## Pandas

In [8]:
import pandas as pd

In [9]:
mnist = pd.read_csv(file_name, header=None)
print(len(mnist))
print(type(mnist))

2501
<class 'pandas.core.frame.DataFrame'>


In [10]:
mnist.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,5,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.608,0.609,0.61,0.611,0.612,0.613,0.614,0.615,0.616,0.617
1,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,7,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# batch means: how many chunks should the dataset be split it into?  
# batch size is number of training examples in one forward/backward pass
# the larger the batch size, the more memory is required

batch_size_train = 64
batch_size_test = 1000

In [12]:
# We normalize the data using this (mean, std) (0.1307,), (0.3081,).  
# That is the mean, std computed on training set.

### Reference
https://datasciencehongkong.files.wordpress.com/2018/02/slides.pdf

In [None]:
train_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST
                                           (file_name,  train=True, download=True,
                                            transform=torchvision.transforms.Compose([
                                                torchvision.transforms.ToTensor(),
                                                torchvision.transforms.Normalize((0.1307,), (0.3081,))
                                            ])),
                                           batch_size=batch_size_train, shuffle=True)

In [None]:
test_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST
                                          (file_name, train=False, download=True,
                                           transform=torchvision.transforms.Compose([
                                               torchvision.transforms.ToTensor(),
                                               torchvision.transforms.Normalize((0.1307,), (0.3081,))
                                           ])),
                                          batch_size=batch_size_test, shuffle=True)

In [None]:
len(train_loader)
type(train_loader)

In [None]:
len(test_loader)
type(test_loader)

In [None]:
# Enumerate() method adds a counter to an iterable and returns it in a form of enumerate object. 
# This enumerate object can then be used directly in for loops or be converted into a list of tuples using list() method.

examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [None]:
type(examples)

In [None]:
i=1
for item in examples:
    if i==1:
        print(item[1])
    i+=1

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure()
for i in range(6):
  plt.subplot(2,3,i+1)
  plt.tight_layout()
  plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
  plt.title("Ground Truth: {}".format(example_targets[i]))
  plt.xticks([])
  plt.yticks([])

In [None]:
# number of forward/backward passes of *all* data
n_epochs = 3

learning_rate = 0.01
momentum = 0.5
log_interval = 10

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

In [None]:
network = Net()
optimizer = optim.SGD(network.parameters(), 
                      lr=learning_rate,
                      momentum=momentum
                     )

In [None]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [None]:
def train(epoch):
  network.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = network(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), loss.item()))
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
      torch.save(network.state_dict(), path+'/results/model.pth')
      torch.save(optimizer.state_dict(), path+'/results/optimizer.pth')

In [None]:
def test():
  network.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = network(data)
      test_loss += F.nll_loss(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [None]:
test()
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()

In [None]:
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')
fig

---

In [None]:
# number of forward/backward passes of *all* data
n_epochs = 3

learning_rate = 0.01
momentum = 0.5
log_interval = 10

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

In [None]:
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../mnist_data',  train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../mnist_data', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [None]:
len(train_loader)
type(train_loader)

In [None]:
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [None]:
print(batch_idx, example_data, example_targets)

In [None]:
example_data.shape

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure()
for i in range(6):
  plt.subplot(2,3,i+1)
  plt.tight_layout()
  plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
  plt.title("Ground Truth: {}".format(example_targets[i]))
  plt.xticks([])
  plt.yticks([])


In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

In [None]:
network = Net()
optimizer = optim.SGD(network.parameters(), 
                      lr=learning_rate,
                      momentum=momentum
                     )

In [None]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [None]:
def train(epoch):
  network.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = network(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), loss.item()))
      train_losses.append(loss.item())
      train_counter.append(
        (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
      torch.save(network.state_dict(), path+'/results/model.pth')
      torch.save(optimizer.state_dict(), path+'/results/optimizer.pth')

In [None]:
def test():
  network.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = network(data)
      test_loss += F.nll_loss(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [None]:
!pwd

In [None]:
!ls -alt results/

In [None]:
test()
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()

In [None]:
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')
fig

In [None]:
with torch.no_grad():
  output = network(example_data)

In [None]:
fig = plt.figure()
for i in range(6):
  plt.subplot(2,3,i+1)
  plt.tight_layout()
  plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
  plt.title("Prediction: {}".format(
    output.data.max(1, keepdim=True)[1][i].item()))
  plt.xticks([])
  plt.yticks([])
fig

In [None]:
continued_network = Net()
continued_optimizer = optim.SGD(network.parameters(), lr=learning_rate,
                                momentum=momentum)

In [None]:
network_state_dict = torch.load(path+"/results/model.pth")
continued_network.load_state_dict(network_state_dict)

optimizer_state_dict = torch.load(path+"/results/optimizer.pth")
continued_optimizer.load_state_dict(optimizer_state_dict)

In [None]:
for i in range(4,9):
  test_counter.append(i*len(train_loader.dataset))
  train(i)
  test()

In [None]:
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')
fig

In [None]:
print(type(train_loader))
print(len(train_loader))

print(type(test_loader))
print(len(test_loader))

https://www.aiworkbox.com/lessons/load-mnist-dataset-from-pytorch-torchvision

In [None]:
import torch

In [None]:
print(torch.__version__)

In [None]:
import torchvision

In [None]:
import torchvision.datasets as datasets

In [None]:
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=None)

In [None]:
len(mnist_trainset)

In [None]:
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=None)

In [None]:
len(mnist_testset)

In [None]:
items=0
for data, target in mnist_trainset:
    if items < 5:
        print(target)
        items+=1





https://www.aiworkbox.com/lessons/examine-mnist-dataset-from-pytorch-torchvision

In [None]:
mnist_trainset[0]

In [None]:
# items=0
# for batch_id, (data, label) in enumerate(mnist_trainset):
#     data = Variable(data)
#     target = Variable(label)
#     if items < 5:
#         print (data, target)

In [None]:
type(mnist_trainset[0])

In [None]:
train_image_zero, train_target_zero = mnist_trainset[0]

In [None]:
train_image_zero.show()

In [None]:
print(train_target_zero)