In [3]:
import torch
import torch.nn as nn
import torchtext

In [5]:
## In addition to linear function, on FF NN we add a non-linear function (activation function)
## Ex.: ReLU, sigmoid, Tanh, etc

## Model 1 - 1 hidden layer

In [6]:
## Steps 
# Step 1 - Load the dataset
# Step 2 - Making the dataset iterable
# Step 3 - Create model class
# Step 4 - Instantiate model class
# Step 5 - Instantiate loss class
# Step 6 - Instantiate optimizer class
# Step 7 - Train model

In [7]:
import torchvision.transforms as transforms
import torchvision.datasets as dataset
from torch.autograd import Variable

In [8]:
## Step 1 - loading the dataset
train_dataset = dataset.MNIST(root = './data/',
                             train = True,
                             transform = transforms.ToTensor(),
                             download = True)
test_dataset = dataset.MNIST(root = './data/',
                             train = False,
                             transform = transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [32]:
## Step 2 - Making the dataset iterable
batch_size = 100
n_iters = 5000
num_epochs = n_iters/(len(train_dataset)/batch_size)
num_epochs = int(num_epochs)
num_epochs

8

In [10]:
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                        batch_size = batch_size,
                                        shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                        batch_size = batch_size,
                                        shuffle = False)

In [11]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x10a0d6668>

In [16]:
## Step 3 - Create model class
class FFNeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FFNeuralNet, self).__init__()
        ## Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        ## Non-linearity
        self.sigmoid = nn.Sigmoid()
        ## Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        ## Linear function
        out = self.fcl(x)
        ## Non-linearity
        out = self.sigmoid(out)
        ## Linear function (readout)
        out = self.fc2(out)
        return out

In [17]:
## Step 4 - Instantiate model class
input_dim = 28 * 28
input_dim

784

In [19]:
hidden_dim =100
output_dim = 10
model = FFNeuralNet(input_dim, hidden_dim, output_dim)
model

FFNeuralNet (
  (fcl): Linear (784 -> 100)
  (sigmoid): Sigmoid ()
  (fc2): Linear (100 -> 10)
)

In [21]:
## Step 5 - Instantiate the loss class
criterion = nn.CrossEntropyLoss()

In [23]:
## Step 6 - Instantiate the optimizer class
learning_rate = 0.01
optim = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [29]:
print(model.parameters())
print(len(list(model.parameters())))
## Hidden layer
print(list(model.parameters())[0].size())
## FC1 Bias
print(list(model.parameters())[1].size())
## FC2 parameters
print(list(model.parameters())[2].size())
## FC2 bias parameters
print(list(model.parameters())[3].size())

<generator object Module.parameters at 0x10a763b48>
4
torch.Size([100, 784])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])


In [36]:
list(model.parameters())[0]

Parameter containing:
 1.2122e-02 -1.7747e-02 -6.8608e-03  ...  -1.4843e-02 -2.7559e-02 -9.5374e-03
 5.8333e-03  7.9092e-03  4.4225e-03  ...   3.0566e-02  3.3487e-02  2.4239e-02
-2.8226e-02 -2.2675e-02 -2.7151e-03  ...  -1.3625e-02 -1.8539e-02  2.5054e-02
                ...                   ⋱                   ...                
 1.9190e-02 -2.5270e-02 -1.1787e-02  ...   8.9276e-03 -1.9335e-04 -1.9708e-02
 3.2956e-02  3.0581e-02 -1.2646e-03  ...   1.8732e-02 -3.0821e-02  3.2544e-02
-2.4731e-02  2.0252e-03 -2.7786e-02  ...   3.1968e-02 -2.2849e-02 -2.7611e-02
[torch.FloatTensor of size 100x784]

In [33]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                images = Variable(images.view(-1,28*28))
                ## Forward
                output = model.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

Iteration 500, Loss 0.8536343574523926, Accuracy 83.19
Iteration 1000, Loss 0.7695441246032715, Accuracy 84.26
Iteration 1500, Loss 0.7384187579154968, Accuracy 85.44
Iteration 2000, Loss 0.746574342250824, Accuracy 86.21
Iteration 2500, Loss 0.5106121897697449, Accuracy 86.8
Iteration 3000, Loss 0.43554842472076416, Accuracy 87.32
Iteration 3500, Loss 0.608245849609375, Accuracy 87.76
Iteration 4000, Loss 0.4576670527458191, Accuracy 88.13
Iteration 4500, Loss 0.4395284950733185, Accuracy 88.37


In [40]:
list(model.parameters())[0][0:-1]

Variable containing:
 1.2122e-02 -1.7747e-02 -6.8608e-03  ...  -1.4843e-02 -2.7559e-02 -9.5374e-03
 5.8333e-03  7.9092e-03  4.4225e-03  ...   3.0566e-02  3.3487e-02  2.4239e-02
-2.8226e-02 -2.2675e-02 -2.7151e-03  ...  -1.3625e-02 -1.8539e-02  2.5054e-02
                ...                   ⋱                   ...                
-1.1572e-02 -1.1441e-02  2.2753e-02  ...  -3.1234e-02 -1.4218e-02 -2.9890e-02
 1.9190e-02 -2.5270e-02 -1.1787e-02  ...   8.9276e-03 -1.9335e-04 -1.9708e-02
 3.2956e-02  3.0581e-02 -1.2646e-03  ...   1.8732e-02 -3.0821e-02  3.2544e-02
[torch.FloatTensor of size 99x784]

## Model B - with Tanh

In [43]:
class FFNeuralNet_tanh(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FFNeuralNet_tanh, self).__init__()
        ## Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        ## Non-linearity
        self.tanh = nn.Tanh()
        ## Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        ## Linear function
        out = self.fcl(x)
        ## Non-linearity
        out = self.tanh(out)
        ## Linear function (readout)
        out = self.fc2(out)
        return out

In [45]:
model_tanh = FFNeuralNet_tanh(input_dim, hidden_dim, output_dim)
learning_rate = 0.01
optim = torch.optim.SGD(model_tanh.parameters(), lr = learning_rate)

In [46]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model_tanh.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                images = Variable(images.view(-1,28*28))
                ## Forward
                output = model_tanh.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

Iteration 500, Loss 0.9005258083343506, Accuracy 82.19
Iteration 1000, Loss 0.5544187426567078, Accuracy 86.61
Iteration 1500, Loss 0.4598257541656494, Accuracy 88.45
Iteration 2000, Loss 0.39953848719596863, Accuracy 89.17
Iteration 2500, Loss 0.47001609206199646, Accuracy 89.73
Iteration 3000, Loss 0.28009968996047974, Accuracy 90.13
Iteration 3500, Loss 0.448478102684021, Accuracy 90.37
Iteration 4000, Loss 0.2781924307346344, Accuracy 90.95
Iteration 4500, Loss 0.49368488788604736, Accuracy 91.23


## Model C - with ReLU

In [52]:
class FFNeuralNet_relu(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FFNeuralNet_relu, self).__init__()
        ## Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        ## Non-linearity
        self.relu = nn.ReLU()
        ## Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        ## Linear function
        out = self.fcl(x)
        ## Non-linearity
        out = self.relu(out)
        ## Linear function (readout)
        out = self.fc2(out)
        return out

In [53]:
model_relu = FFNeuralNet_relu(input_dim, hidden_dim, output_dim)
learning_rate = 0.01
optim = torch.optim.SGD(model_relu.parameters(), lr = learning_rate)

In [54]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model_relu.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                images = Variable(images.view(-1,28*28))
                ## Forward
                output = model_relu.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

Iteration 500, Loss 1.0430890321731567, Accuracy 81.68
Iteration 1000, Loss 0.5451216697692871, Accuracy 86.72
Iteration 1500, Loss 0.4751785397529602, Accuracy 88.75
Iteration 2000, Loss 0.5545175671577454, Accuracy 89.44
Iteration 2500, Loss 0.5666353702545166, Accuracy 90.1
Iteration 3000, Loss 0.42353352904319763, Accuracy 90.42
Iteration 3500, Loss 0.38164427876472473, Accuracy 90.79
Iteration 4000, Loss 0.2604667544364929, Accuracy 91.01
Iteration 4500, Loss 0.5295299291610718, Accuracy 91.4


## Model D - 2 hiddel with ReLU

In [56]:
class FFNeuralNet_relu2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FFNeuralNet_relu2, self).__init__()
        ## Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        ## Non-linearity
        self.relu1 = nn.ReLU()
        ## Linear function 
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        ## Non-linearity
        self.relu2 = nn.ReLU()
        ## Linear function (readout)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        ## Linear function
        out = self.fcl(x)
        ## Non-linearity
        out = self.relu1(out)
        ## Linear function 
        out = self.fc2(out)
        ## Non-linear 2
        out = self.relu2(out)
        ## Linear (readout)
        out = self.fc3(out)
        return out

In [61]:
model_relu2 = FFNeuralNet_relu2(input_dim, hidden_dim, output_dim)
learning_rate = 0.01
hidden_dim = 200
optim = torch.optim.SGD(model_relu2.parameters(), lr = learning_rate)

In [62]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model_relu2.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                images = Variable(images.view(-1,28*28))
                ## Forward
                output = model_relu2.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

Iteration 500, Loss 1.6175955533981323, Accuracy 71.9
Iteration 1000, Loss 0.6780022382736206, Accuracy 81.95
Iteration 1500, Loss 0.4977650046348572, Accuracy 86.57
Iteration 2000, Loss 0.500176727771759, Accuracy 88.56
Iteration 2500, Loss 0.38205471634864807, Accuracy 89.61
Iteration 3000, Loss 0.3401462137699127, Accuracy 90.3
Iteration 3500, Loss 0.3249531686306, Accuracy 90.68
Iteration 4000, Loss 0.3053443431854248, Accuracy 91.25
Iteration 4500, Loss 0.36587414145469666, Accuracy 91.7


## From CPU to GPU

In [70]:
if torch.cuda.is_available():
    print('GPU available')
    model_relu2.cuda()

In [71]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        if torch.cuda.is_available():
            images = Variable(images.view(-1,28*28).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.view(-1,28*28))
            labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model_relu2.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                if torch.cuda.is_available():
                    images = Variable(images.view(-1,28*28).cuda())
                else:
                    images = Variable(images.view(-1,28*28))
                ## Forward
                output = model_relu2.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

Iteration 500, Loss 0.07540509104728699, Accuracy 96.71
Iteration 1000, Loss 0.16496196389198303, Accuracy 96.81


KeyboardInterrupt: 