In [4]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
import random

In [8]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

# If you use GPU, the device should be cuda
#device = 'cpu'
print('Using device:', device)

Using device: cpu


In [3]:
#Loading Minst Dataset
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [5]:
train_dataloader = DataLoader(training_data, batch_size=64,shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64)

In [6]:
class NeuralNetwork(nn.Module):
    def __init__(self,input_dim,hidden_dim,dropout):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.dropout = dropout
        ############# Your code here ############
        ## Note:
        ##1. Create an ANN with 1 hidden layer with ReLU activation
        ##(~3 lines of code)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 10),
        )
        #########################################

    def forward(self, x):
        ############# Your code here ############
        ## Note:
        ##1. Flatten the input and pass it through your ANN
        ##2. Add a dropout using torch.nn.functional
        ##(~ 3 lines of code)
        x = self.flatten(x)
        x = self.linear_relu_stack(x)
        logits=F.dropout(x, p=self.dropout, training=self.training)
        #########################################
        return logits

In [9]:
args = {
    'device': device,
    'input_dim':28*28 ,
    'hidden_dim': 64,
    'dropout': 0.2,
    'lr': 0.01,
    'epochs': 10,
}
args

{'device': device(type='cpu'),
 'input_dim': 784,
 'hidden_dim': 64,
 'dropout': 0.2,
 'lr': 0.01,
 'epochs': 1}

In [10]:
############# Your code here ############
## Note:
## 1. Set model to custom class with appropriate inputs
## 2. Print the model
##(~2 lines of code)
model = NeuralNetwork(args['input_dim'], args['hidden_dim'],dropout=args['dropout']).to(device)
print(model)
#########################################
next(model.parameters()).device 

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)


device(type='cpu')

In [11]:
def train_loop(dataloader, model, loss_fn, optimizer,dev):
    # TODO: Implement this function that trains the model by 
    # using the given optimizer and loss_fn.
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        #Shifting data to device
        X,y= X.to(dev),y.to(dev)
        
        ############# Your code here ############
        ## Note:
        ## 1. Zero grad the optimizer
        ## 2. Feed the data into the model
        ## 3. Calculate the loss fn using output and label
        ## 4. Implement Backpropagation
        ## (~5 lines of code)
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        #########################################

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


In [12]:
def test_loop(dataloader, model, loss_fn,dev):
    # The output of model on all data
    # TODO: Implement this function that tests the model
  
    model.eval()

    # The output of model on all data
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            #Shifting data to device
            X,y= X.to(dev),y.to(dev)
            ############# Your code here ############
            ## Note:
            ## Calculate the ouptut
            ## (~1 line of code)
            pred = model(X)
            #########################################
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    #print("size",size)
    #print("before returning corr",correct)
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct


In [13]:
%%time
import copy

############# Your code here ############
## Note:
## 1.Set the loss_fn to CE Loss
##2.Set the optimizer
##(~2 lines of code)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=args['lr'])
#########################################

best_model = None
best_test_acc = 0


for t in range(args['epochs']):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer,device)
    test_acc=test_loop(test_dataloader, model, loss_fn,device)
    
    ############# Your code here ############
    ## Note:
    ##1.Set a condition to determine the best_test_acc
    ##2. Using copy.deepcopy() save the best_model
    ##(~3 lines of code)
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        best_model = copy.deepcopy(model)
    #########################################
print("Done!")


Epoch 1
-------------------------------
loss: 2.300904  [    0/60000]
loss: 2.226557  [ 6400/60000]
loss: 2.077682  [12800/60000]
loss: 1.795405  [19200/60000]
loss: 1.569941  [25600/60000]
loss: 1.386552  [32000/60000]
loss: 1.185731  [38400/60000]
loss: 1.162593  [44800/60000]
loss: 1.357060  [51200/60000]
loss: 1.020014  [57600/60000]
Test Error: 
 Accuracy: 68.6%, Avg loss: 0.946629 

Done!
CPU times: user 18.5 s, sys: 0 ns, total: 18.5 s
Wall time: 9.3 s
