<a href="https://colab.research.google.com/github/littell/scratchwork/blob/master/Tsetlin_Lottery_Machine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installing PyTorch 1.1.0

While we don't need autograd during the reinforcement learning phase, we can make use of the GPU acceleration, and having the whole thing be a PyTorch module means we can train it as an ordinary neural network afterward.

In [0]:
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag

platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\10/'    
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'
print(f"Device = {accelerator}")
version='1.1.0'
torch_url=f"http://download.pytorch.org/whl/{accelerator}/torch-{version}-{platform}-linux_x86_64.whl"
!pip install -U {torch_url} torchvision

Device = cu100
Collecting torch==1.1.0 from http://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
[?25l  Downloading http://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl (770.7MB)
[K     |████████████████████████████████| 770.7MB 2.1MB/s 
[?25hRequirement already up-to-date: torchvision in /usr/local/lib/python3.6/dist-packages (0.3.0)
Installing collected packages: torch
  Found existing installation: torch 1.1.0
    Uninstalling torch-1.1.0:
      Successfully uninstalled torch-1.1.0
Successfully installed torch-1.1.0


## Import libraries into Python, then build the dataset loaders

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.autograd import Variable
from torchvision import datasets, transforms

In [0]:
class DatasetLoader(object):
  
    def __init__(self, dataset='MNIST', dataset_dir='./data', batch_size=100):
        dataset_ = {
            'MNIST': datasets.MNIST,
            'CIFAR10': datasets.CIFAR10
        }[dataset]
        
        self.size = { 
            'MNIST': 28*28, 
            'CIFAR10': 3*32*32 
        }[dataset]

        transform = {
            'MNIST': transforms.ToTensor(),
            'CIFAR10': transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                ])
        }[dataset]

        train_dataset = dataset_(root=dataset_dir,
                                 train=True,
                                 transform=transform,
                                 download=True)

        self.train = data.DataLoader(dataset=train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True)

        test_dataset = dataset_(root=dataset_dir,
                                 train=False,
                                 transform=transform,
                                 download=True)

        self.test = data.DataLoader(dataset=test_dataset,
                                      batch_size=batch_size,
                                      shuffle=False)
        

## Make the module that we're going to train

In [0]:

class TsetlinSelector(nn.Module):
  
    def __init__(self, size, max=20):
        super(TsetlinSelector, self).__init__()
        self.size = size
        self.max = max
        
        dtype = torch.FloatTensor
        
        # initialize a vector of the appropriate size, drawn from a Bernoulli distribution
        self.automata = nn.Parameter((torch.randn(size) > 0.5).type(dtype), requires_grad=False)
      
    def forward(self, x):
        return x * (self.automata > 0.0).float()
      
    def reward(self, p):
        ''' Reward p% of automata '''
        automata_on = (self.automata > 0.0).float()                  # 1.0 for automata that are "on", 0.0 otherwise
        reward_mask = (torch.rand_like(self.automata) < p).float()   # Only reward p% of the automata
        self.automata += (automata_on * 2 - 1) * reward_mask         # Add 1.0 for automata that are "on", -1.0 otherwise
        self.automata.clamp_(-self.max, self.max+1)
        
    def penalize(self, p):
        ''' Penalize p% of automata '''
        automata_on = (self.automata > 0.0).float()                  # 1.0 for automata that are "on", 0.0 otherwise
        reward_mask = (torch.rand_like(self.automata) < p).float()   # Only penalize p% of the automata
        self.automata -= (automata_on * 2 - 1) * reward_mask         # Add 1.0 for automata that are "on", -1.0 otherwise
        
    def __repr__(self):
        return f"Automata={self.automata.data}, size={self.automata.shape}, type={type(self.automata)}"


In [0]:
import torch.nn.functional as F

      
class Net(nn.Module):
  
    def __init__(self, image_size=28*28, h1_size = 500, h2_size = 100, output_size=10, dropout_rate=0.1):
        super(Net, self).__init__()
        self.image_size, self.output_size = image_size, output_size
        self.h1_size, self.h2_size = h1_size, h2_size
        self.dropout_rate = dropout_rate
        
        """3-Layer Fully-connected NN"""
        #self.net = nn.Sequential(
            #nn.Linear(image_dim, 500),
            #dropout(0.2, 0.19, 500, dropout_method),
            #nn.ReLU(),
            #nn.Linear(500, 100),
            #dropout(0.5, 0.4, 100, dropout_method),
            #nn.ReLU(),
            #nn.Linear(100, 10)
        #)
        ''' The above was the original network, a standard architecture for testing
        regularization methods.  I decomposed it below in order to have more direct
        access to the parameters '''
        
        dtype = torch.FloatTensor
        
        self.m1 = TsetlinSelector(h1_size)
        self.m2 = TsetlinSelector(h2_size)
        
        self.W1 = nn.Parameter(torch.randn(h1_size, image_size).type(dtype), requires_grad=True)
        #torch.nn.init.xavier_uniform_(self.W1.data)
        self.b1 = nn.Parameter(torch.randn(h1_size).type(dtype) * 0.001, requires_grad=True)
        self.W2 = nn.Parameter(torch.randn(h2_size, h1_size).type(dtype), requires_grad=True)
        #torch.nn.init.xavier_uniform_(self.W2.data)
        self.b2 = nn.Parameter(torch.randn(h2_size).type(dtype) * 0.001, requires_grad=True)
        self.W3 = nn.Parameter(torch.randn(output_size, h2_size).type(dtype), requires_grad=True)
        #torch.nn.init.xavier_uniform_(self.W3.data)
        self.b3 = nn.Parameter(torch.randn(output_size).type(dtype) * 0.001, requires_grad=True)
        
        self.dropout1 = nn.Dropout(dropout_rate)
        self.dropout2 = nn.Dropout(dropout_rate)
        
    
    def forward(self, x):
        x = F.linear(x, self.W1, self.b1)
        #x = self.dropout1(x)
        x = self.m1(x)
        x = nn.ReLU()(x)
        x = F.linear(x, self.W2, self.b2)
        #x = self.dropout2(x)
        x = self.m2(x)
        x = nn.ReLU()(x)
        x = F.linear(x, self.W3, self.b3)
        return x
      
    def reward(self, p):
        self.m1.reward(p)
        self.m2.reward(p)
        
    def penalize(self, p):
        self.m1.penalize(p)
        self.m2.penalize(p)

In [0]:
class Solver(object):
  
    def __init__(self, net, dataset, dropout_rate=0.1, n_epochs=100, lr=0.001):
        self.n_epochs = n_epochs
        self.dataset = dataset
        self.net = net.cuda()
        self.loss_fn = nn.CrossEntropyLoss().cuda()
        self.optimizer = optim.Adam(self.net.parameters(), lr=lr)
             
    def step(self, logits, labels):
        loss = self.loss_fn(logits, labels)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return float(loss.data)
            
    def train(self):
        self.net.train()
        
        for epoch_i in (range(self.n_epochs)):
            epoch_loss = 0
            for images, labels in self.dataset.train:
                images = Variable(images).view(-1, self.net.image_size).cuda()
                labels = Variable(labels).cuda()

                logits = self.net(images)
                
                epoch_loss += self.step(logits, labels)

            epoch_loss /= len(self.dataset.train.dataset)
            print('Epoch %s | loss: %s' % (epoch_i, epoch_loss))
            
    def evaluate(self):
        
        total = 0.
        correct = 0.
        self.net.eval()
        for images, labels in self.dataset.test:
            images = Variable(images).view(-1, self.net.image_size).cuda()

            logits = self.net(images)

            _, predicted = torch.max(logits.data, 1)
            total += labels.size(0) * 1.
            correct += (predicted.cpu().data == labels).sum().item()
        print('=========================================')
        print('Test accuracy: %s' % float(100.0 * correct / total))
                

In [0]:
class ReinforcementSolver(Solver):
  

    def step(self, logits, labels):
        _, predicted = torch.max(logits.data, 1)
        incorrect = (predicted != labels.data).sum().item()
        percent_incorrect = incorrect / len(predicted)
        self.net.reward(0.9 * (1. - percent_incorrect))
        self.net.penalize(0.1 * percent_incorrect)
        return incorrect
          

In [0]:
import random

class MixedSolver(Solver):
  
    def adam_step(self, logits, labels):
        loss = self.loss_fn(logits, labels)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return float(loss.data)
      
    def reinforcement_step(self, logits, labels):
        _, predicted = torch.max(logits.data, 1)
        incorrect = (predicted != labels.data).sum().item()
        percent_incorrect = incorrect / len(predicted)
        self.net.reward(0.9 * (1. - percent_incorrect))
        self.net.penalize(0.1 * percent_incorrect)
        return incorrect
      
    def step(self, logits, labels):
        if random.random() > 0.5:
            return self.adam_step(logits, labels)
        else:
            return self.reinforcement_step(logits, labels)

In [0]:
dataset = DatasetLoader('MNIST', batch_size=10)
net = Net(image_size=28*28, h1_size=1000, h2_size=600)
standard_solver = MixedSolver(net, dataset, n_epochs=100)
standard_solver.train()
standard_solver.evaluate()

Epoch 0 | loss: 7.475450570703335
Epoch 1 | loss: 1.7961884170142026
Epoch 2 | loss: 1.1536926459478587
Epoch 3 | loss: 0.7362711226151829
Epoch 4 | loss: 0.5706952831190033
Epoch 5 | loss: 0.43342907724315
Epoch 6 | loss: 0.36825820870570725
Epoch 7 | loss: 0.2909423584133289
Epoch 8 | loss: 0.24558685042870715
Epoch 9 | loss: 0.20688970821643768
Epoch 10 | loss: 0.17706360710580613
Epoch 11 | loss: 0.14983166409963516
Epoch 12 | loss: 0.1673409612043659
Epoch 13 | loss: 0.1259439014232203
Epoch 14 | loss: 0.10925592018023629
Epoch 15 | loss: 0.11155467434674211
Epoch 16 | loss: 0.10661213779965728
Epoch 17 | loss: 0.09809046063778523
Epoch 18 | loss: 0.10653187368728216
Epoch 19 | loss: 0.08198832884632681
Epoch 20 | loss: 0.0781372562187759
Epoch 21 | loss: 0.08132444439005743
Epoch 22 | loss: 0.08408074555088649
Epoch 23 | loss: 0.07266684997335617
Epoch 24 | loss: 0.0778233510460457
Epoch 25 | loss: 0.06719883419621422
Epoch 26 | loss: 0.05784094525367488
Epoch 27 | loss: 0.051496