# FizzBuzz implementation as a multilayer perceptron with pytorch

Model based on http://joelgrus.com/2016/05/23/fizz-buzz-in-tensorflow/

In [32]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import metrics

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader


N_DIGITS = 10
TRAINING_RANGE = range(101, 1024)
TEST_RANGE = range(1, 100)
BATCH_SIZE = 128
LEARNING_RATE = 0.01
DISPLAY_RATE = 1000

PASSES = 1500


class Net(nn.Module):
    def __init__(self, w_x=N_DIGITS, num_hidden=100, ratio=1):
        super(Net, self).__init__()
        self.num_hidden = num_hidden
        self.main = nn.Sequential(
            nn.Linear(w_x, num_hidden),
            nn.ReLU(True),
            nn.Linear(int(num_hidden / ratio), 4)
        )

    def forward(self, x):
        return self.main(x)


def binary_encode(i, num_digits):
    return torch.Tensor([i >> d & 1 for d in range(num_digits)])


def binary_decode(digits):
    return int(sum([(2 ** i) * x for i, x in enumerate(digits)]))


def get_class(i):
    return int(i % 3 == 0) + int(i % 5 == 0) * 2

def get_fizzbuzz(class_, x):
    return {0: str(x), 1: "fizz", 2: "buzz", 3: "fizzbuzz"}[class_]

def weight_init(m):
    if hasattr(m, 'weight'):
        m.weight.data.normal_(0.0, 0.01)
    if hasattr(m, 'bias'):
        m.bias.data.fill_(0)


if __name__ == "__main__":
    net = Net()
    net.apply(weight_init)
    training_X = torch.stack([binary_encode(i, N_DIGITS) for i in TRAINING_RANGE], 0)
    training_Y = torch.LongTensor([[get_class(i)] for i in TRAINING_RANGE])

    dataset = TensorDataset(training_X, training_Y)
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
    assert len(training_X) == len(training_Y)

    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

    running_loss = 0.0
    net.train()
    try:
        epoch = 0
        for p in range(PASSES):
            for i, data in enumerate(dataloader, 0):
                x, y = data
                x, y = Variable(x), Variable(y.squeeze())

                output = net(x)
                criterion = nn.CrossEntropyLoss()

                optimizer.zero_grad()
                loss = criterion(output, y)
                loss.backward()
                optimizer.step()

                running_loss += float(loss.data)
                if epoch % DISPLAY_RATE == 0:
                    print("epoch:{:8d}\trunning loss: {:.10f}\tloss: {:.10f}" .format(epoch, running_loss / DISPLAY_RATE, float(loss.data)))
                    running_loss = 0.0
                
                epoch = epoch + 1
    except KeyboardInterrupt:
        pass

    net.eval()
    true = []
    pred = []
    print("testing model on numbers from 0 to 100")
    print("predicted:", end="")
    failures = []
    for i, i_bin in [(i, binary_encode(i, N_DIGITS)) for i in TEST_RANGE]:
        x = Variable(i_bin)
        output = net(x)
        true.append(get_class(i))
        pred.append(np.argmax(output.data.numpy()))
        print(get_fizzbuzz(np.argmax(output.data.numpy()), i) + " ", end="")
        if get_class(i) != np.argmax(output.data.numpy()):
            failures.append((i, get_fizzbuzz(get_class(i), i), get_fizzbuzz(np.argmax(output.data.numpy()), i)))
    print("")
    
    print("failures:")
    for (i, ytrue, ypred) in failures:
          print("i:{}\ty_truth:{}\ty_pred:{}".format(i, ytrue, ypred))

    print("Jaccard similarity score: {:.2f}".format(metrics.jaccard_similarity_score(true, pred)))

epoch:       0	running loss: 0.0013862756	loss: 1.3862756491
epoch:    1000	running loss: 0.9142203544	loss: 0.4237289131
epoch:    2000	running loss: 0.2524000379	loss: 0.1094664186
epoch:    3000	running loss: 0.0928775487	loss: 0.0572104305
epoch:    4000	running loss: 0.0385754984	loss: 0.0231338292
epoch:    5000	running loss: 0.0198464577	loss: 0.0121519035
epoch:    6000	running loss: 0.0116750320	loss: 0.0075830524
epoch:    7000	running loss: 0.0151903483	loss: 0.0055109733
epoch:    8000	running loss: 0.0046500189	loss: 0.0038128782
epoch:    9000	running loss: 0.0035680280	loss: 0.0026144478
epoch:   10000	running loss: 0.0109901348	loss: 0.0015988385
epoch:   11000	running loss: 0.0020416695	loss: 0.0016909739
testing model on numbers from 0 to 100
predicted:1 2 fizz 4 buzz fizz 7 8 fizz buzz 11 fizz 13 buzz fizzbuzz 16 17 fizz 19 buzz fizz 22 23 fizz buzz 26 fizz 28 29 fizzbuzz 31 32 fizz 34 buzz fizz 37 buzz fizz buzz 41 42 43 44 fizzbuzz 46 47 fizz 49 buzz fizz 52 53 fiz