In [1]:
import torch
import torch.nn as nn
import torchtext

In [2]:
## In addition to linear function, on FF NN we add a non-linear function (activation function)
## Ex.: ReLU, sigmoid, Tanh, etc

## Model 1 - 1 hidden layer

In [3]:
## Steps 
# Step 1 - Load the dataset
# Step 2 - Making the dataset iterable
# Step 3 - Create model class
# Step 4 - Instantiate model class
# Step 5 - Instantiate loss class
# Step 6 - Instantiate optimizer class
# Step 7 - Train model

In [4]:
import torchvision.transforms as transforms
import torchvision.datasets as dataset
from torch.autograd import Variable

In [5]:
## Step 1 - loading the dataset
train_dataset = dataset.MNIST(root = './data/',
                             train = True,
                             transform = transforms.ToTensor(),
                             download = True)
test_dataset = dataset.MNIST(root = './data/',
                             train = False,
                             transform = transforms.ToTensor())

In [7]:
train_dataset.train_data


(  0  ,.,.) = 
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
      ...         ⋱        ...      
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0

(  1  ,.,.) = 
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
      ...         ⋱        ...      
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0

(  2  ,.,.) = 
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
      ...         ⋱        ...      
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
 ...  

(59997,.,.) = 
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
    0    0    0  ...     0    0    0
      ...         ⋱        ...      
    

In [None]:
## Step 2 - Making the dataset iterable
batch_size = 50
n_iters = 5000
num_epochs = n_iters/(len(train_dataset)/batch_size)
num_epochs = int(num_epochs)
num_epochs

In [None]:
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                        batch_size = batch_size,
                                        shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                        batch_size = batch_size,
                                        shuffle = False)

In [None]:
train_loader

In [None]:
## Step 3 - Create model class
class FFNeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FFNeuralNet, self).__init__()
        ## Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        ## Non-linearity
        self.sigmoid = nn.Sigmoid()
        ## Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        ## Linear function
        out = self.fcl(x)
        ## Non-linearity
        out = self.sigmoid(out)
        ## Linear function (readout)
        out = self.fc2(out)
        return out

In [None]:
## Step 4 - Instantiate model class
input_dim = 28 * 28
input_dim

In [None]:
hidden_dim =100
output_dim = 10
model = FFNeuralNet(input_dim, hidden_dim, output_dim)
model

In [None]:
## Step 5 - Instantiate the loss class
criterion = nn.CrossEntropyLoss()

In [None]:
## Step 6 - Instantiate the optimizer class
learning_rate = 0.01
optim = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [None]:
print(model.parameters())
print(len(list(model.parameters())))
## Hidden layer
print(list(model.parameters())[0].size())
## FC1 Bias
print(list(model.parameters())[1].size())
## FC2 parameters
print(list(model.parameters())[2].size())
## FC2 bias parameters
print(list(model.parameters())[3].size())

In [None]:
list(model.parameters())[0]

In [None]:
## Step 7 - Train the model
iter = 0
for epoch in range(3):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)
        #print("-"*20 + "DEBUG INFO")
        #print(labels)
        #print(type(labels))
        #print(labels.size())

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                images = Variable(images.view(-1,28*28))
                ## Forward
                output = model.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

In [None]:
list(model.parameters())[0][0:-1]

## Model B - with Tanh

In [None]:
class FFNeuralNet_tanh(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FFNeuralNet_tanh, self).__init__()
        ## Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        ## Non-linearity
        self.tanh = nn.Tanh()
        ## Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        ## Linear function
        out = self.fcl(x)
        ## Non-linearity
        out = self.tanh(out)
        ## Linear function (readout)
        out = self.fc2(out)
        return out

In [None]:
model_tanh = FFNeuralNet_tanh(input_dim, hidden_dim, output_dim)
learning_rate = 0.01
optim = torch.optim.SGD(model_tanh.parameters(), lr = learning_rate)

In [None]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model_tanh.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                images = Variable(images.view(-1,28*28))
                ## Forward
                output = model_tanh.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

## Model C - with ReLU

In [None]:
class FFNeuralNet_relu(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FFNeuralNet_relu, self).__init__()
        ## Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        ## Non-linearity
        self.relu = nn.ReLU()
        ## Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        ## Linear function
        out = self.fcl(x)
        ## Non-linearity
        out = self.relu(out)
        ## Linear function (readout)
        out = self.fc2(out)
        return out

In [None]:
model_relu = FFNeuralNet_relu(input_dim, hidden_dim, output_dim)
learning_rate = 0.01
optim = torch.optim.SGD(model_relu.parameters(), lr = learning_rate)

In [None]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model_relu.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                images = Variable(images.view(-1,28*28))
                ## Forward
                output = model_relu.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

## Model D - 2 hiddel with ReLU

In [None]:
class FFNeuralNet_relu2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FFNeuralNet_relu2, self).__init__()
        ## Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        ## Non-linearity
        self.relu1 = nn.ReLU()
        ## Linear function 
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        ## Non-linearity
        self.relu2 = nn.ReLU()
        ## Linear function (readout)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        ## Linear function
        out = self.fcl(x)
        ## Non-linearity
        out = self.relu1(out)
        ## Linear function 
        out = self.fc2(out)
        ## Non-linear 2
        out = self.relu2(out)
        ## Linear (readout)
        out = self.fc3(out)
        return out

In [None]:
model_relu2 = FFNeuralNet_relu2(input_dim, hidden_dim, output_dim)
learning_rate = 0.01
hidden_dim = 200
optim = torch.optim.SGD(model_relu2.parameters(), lr = learning_rate)

In [None]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model_relu2.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                images = Variable(images.view(-1,28*28))
                ## Forward
                output = model_relu2.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))

## From CPU to GPU

In [None]:
if torch.cuda.is_available():
    print('GPU available')
    model_relu2.cuda()

In [None]:
## Step 7 - Train the model
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        ## Load images as variables
        if torch.cuda.is_available():
            images = Variable(images.view(-1,28*28).cuda())
            labels = Variable(labels.cuda())
        else:
            images = Variable(images.view(-1,28*28))
            labels = Variable(labels)

        ## Clear gradients
        optim.zero_grad()

        ## Forward pass to get output
        outputs = model_relu2.forward(images)

        ## Calculate loss function
        loss = criterion(outputs, labels)

        ## Backpropagate
        loss.backward()

        ## Update parameters
        optim.step()

        iter +=1

        if iter %500 ==0:
            ## Calculate accuracy
            correct = 0
            total =0

            ## Iterate through test dataset
            for images, labels in test_loader:
                ## Variablea
                if torch.cuda.is_available():
                    images = Variable(images.view(-1,28*28).cuda())
                else:
                    images = Variable(images.view(-1,28*28))
                ## Forward
                output = model_relu2.forward(images)
                ## Get predictions
                _, predicted = torch.max(output.data,1)
                ## Total number of labels
                total += labels.size(0)
                ## Total correct predictions
                correct += (predicted == labels).sum()
            accuracy = 100*correct/total
            ## Print loss
            print('Iteration {}, Loss {}, Accuracy {}'.format(iter,loss.data[0],accuracy))