In [1]:
# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2019, Numenta, Inc.  Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program.  If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------

In [2]:
# Uncomment the following line to install nupic.torch
#!pip install -e git+https://github.com/numenta/nupic.torch.git#egg=nupic.torch

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

torch.manual_seed(18)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
def train(model, loader, optimizer, criterion):
    """
    Train the model using given dataset loader. 
    Called on every epoch.
    :param model: pytorch model to be trained
    :type model: torch.nn.Module
    :param loader: dataloader configured for the epoch.
    :type loader: :class:`torch.utils.data.DataLoader`
    :param optimizer: Optimizer object used to train the model.
    :type optimizer: :class:`torch.optim.Optimizer`
    :param criterion: loss function to use
    :type criterion: function
    """
    model.train()
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()


def test(model, loader, criterion):
    """
    Evaluate pre-trained model using given dataset loader.
    Called on every epoch.
    :param model: Pretrained pytorch model
    :type model: torch.nn.Module
    :param loader: dataloader configured for the epoch.
    :type loader: :class:`torch.utils.data.DataLoader`
    :param criterion: loss function to use
    :type criterion: function
    :return: Dict with "accuracy", "loss" and "total_correct"
    """
    model.eval()
    loss = 0
    total_correct = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss += criterion(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            total_correct += pred.eq(target.view_as(pred)).sum().item()
    
    return {"accuracy": total_correct / len(loader.dataset), 
            "loss": loss / len(loader.dataset), 
            "total_correct": total_correct}

### Parameters

In [5]:
# MLP configuration
INPUT_SIZE = 28*28  
HIDDEN_SIZE = 1000  
OUTPUT_SIZE = 10    

# Sparsity parameters
WEIGHT_SPARSITY = 0.3

# K-Winners parameters
K = 100
K_INFERENCE_FACTOR = 1.5
BOOST_STRENGTH = 1.0
BOOST_STRENGTH_FACTOR = 0.9

# Training parameters
LEARNING_RATE = 0.01
MOMENTUM = 0.5
EPOCHS = 10
FIRST_EPOCH_BATCH_SIZE = 4
TRAIN_BATCH_SIZE = 64
TEST_BATCH_SIZE = 1000

###  Sparse Linear Model
Create a sparse MLP network with 2 hidden layers. We apply 30% sparsity to every hidden layer and use k-winners activation between the layers

In [6]:
from nupic.torch.modules import KWinners, SparseWeights, Flatten

sparseNN = nn.Sequential(
    Flatten(),

    # First Hidden layer
    SparseWeights(nn.Linear(INPUT_SIZE, HIDDEN_SIZE), weightSparsity=WEIGHT_SPARSITY),
    KWinners(n=HIDDEN_SIZE, k=K, kInferenceFactor=K_INFERENCE_FACTOR, 
             boostStrength=BOOST_STRENGTH, boostStrengthFactor=BOOST_STRENGTH_FACTOR),

    # Second Hidden layer
    SparseWeights(nn.Linear(HIDDEN_SIZE, HIDDEN_SIZE), weightSparsity=WEIGHT_SPARSITY),
    KWinners(n=HIDDEN_SIZE, k=K, kInferenceFactor=K_INFERENCE_FACTOR, 
             boostStrength=BOOST_STRENGTH, boostStrengthFactor=BOOST_STRENGTH_FACTOR),

    # Output layer
    nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE),
    nn.LogSoftmax(dim=1)
).to(device)

### Load MNIST Dataset

In [7]:
normalize = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST('data', train=True, download=True, transform=normalize)
test_dataset = datasets.MNIST('data', train=False, transform=normalize)

# Configure data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE, shuffle=True)
first_loader = torch.utils.data.DataLoader(train_dataset, batch_size=FIRST_EPOCH_BATCH_SIZE, shuffle=True)

### Train
On the first epoch we use smaller batch size to calculate the duty cycles used by the k-winner function. Once the duty cycles stabilize we can use larger batch sizes.

In [8]:
sgd = optim.SGD(sparseNN.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
train(model=sparseNN, loader=first_loader, optimizer=sgd, criterion=F.nll_loss)
results = test(model=sparseNN, loader=test_loader, criterion=F.nll_loss)
print(results)

{'accuracy': 0.955, 'loss': 0.1485403091430664, 'total_correct': 9550}


At this point the duty cycles should be stable and we can train on larger batch sizes

In [9]:
for epoch in range(1, EPOCHS):
    train(model=sparseNN, loader=train_loader, optimizer=sgd, criterion=F.nll_loss)
    results = test(model=sparseNN, loader=test_loader, criterion=F.nll_loss)
    print(results)

{'accuracy': 0.9622, 'loss': 0.12122083358764649, 'total_correct': 9622}
{'accuracy': 0.9636, 'loss': 0.12100773010253907, 'total_correct': 9636}
{'accuracy': 0.9623, 'loss': 0.11938942947387696, 'total_correct': 9623}
{'accuracy': 0.9639, 'loss': 0.1144769775390625, 'total_correct': 9639}
{'accuracy': 0.9633, 'loss': 0.11765272369384766, 'total_correct': 9633}
{'accuracy': 0.9651, 'loss': 0.11464618072509766, 'total_correct': 9651}
{'accuracy': 0.9646, 'loss': 0.11385433578491211, 'total_correct': 9646}
{'accuracy': 0.9643, 'loss': 0.1140714729309082, 'total_correct': 9643}
{'accuracy': 0.9657, 'loss': 0.11362770690917968, 'total_correct': 9657}
