In [8]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets, transforms



def create_nn(batch_size=200, learning_rate=0.001, epochs=5,
              log_interval=5):

    #Loading the dataset into the train and test tensors
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST('../data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=batch_size, shuffle=True)

    class Net(nn.Module):
        # Create a neural network of your choice
        # You can begin with 2 hidden layers and one output layer 
        # With 200 units for each hidden layer and 10 output units
        # insert ReLU activations between the hidden layers
        # and softmax for the output
       
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(28 * 28, 200)
            self.fc2 = nn.Linear(200, 200)
            self.fc3 = nn.Linear(200, 10)

        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return F.log_softmax(x)

    net = Net()
    print(net)

    # create a stochastic gradient descent optimizer
    
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
    # create a loss function
    criterion = nn.NLLLoss()
    
    #global_step = tf.Variable(0, trainable=False)
    #starter_learning_rate = 0.05
    #learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
     #                                  1000, 0.96, staircase=True)

    #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True).minimize(cost, global_step=global_step)
    
    # create a stochastic gradient descent optimizer/ try different optimizers
    # here like ADAM AdaGrad Momentum


    # create a loss function use an NLL loss that mimics crossentropy


    # run the main training loop
    # Every iteration over the complete training set is called an epoch
    for epoch in range(epochs):

        train_loss = 0
        # Train over the dataset for each minibatch
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = Variable(data), Variable(target)
            # resize data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
            data = data.view(-1, 28*28)
            optimizer.zero_grad()
            net_out = net(data)
            loss = criterion(net_out, target)
            loss.backward()
            optimizer.step()
            train_loss+=loss
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                           100. * batch_idx / len(train_loader),
                    loss.data[0]/log_interval))

                train_loss = 0

        # run a test loop
        test_loss = 0
        correct = 0
        for data, target in test_loader:
            data, target = Variable(data), Variable(target)
            data = data.view(-1, 28 * 28)
            net_out = net(data)
            # sum up batch loss
            test_loss += criterion(net_out, target).data[0]
            pred = net_out.data.max(1)[1]  # get the index of the max log-probability
            correct += pred.eq(target.data).sum()

        test_loss /= len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))


if __name__ == "__main__":
    create_nn()

Net(
  (fc1): Linear(in_features=784, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=200, bias=True)
  (fc3): Linear(in_features=200, out_features=10, bias=True)
)





Test set: Average loss: 0.0041, Accuracy: 7022/10000 (70%)


Test set: Average loss: 0.0032, Accuracy: 7697/10000 (77%)


Test set: Average loss: 0.0028, Accuracy: 8003/10000 (80%)


Test set: Average loss: 0.0026, Accuracy: 8104/10000 (81%)


Test set: Average loss: 0.0025, Accuracy: 8185/10000 (82%)

