### 0. Import Libraries

In [1]:
import itertools
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

### 1. Define Parameters

In [2]:
params = {
    'model_size': 16,  # how big our models will be
    'num_epochs': 2,  # one epoch = one loop through the dataset
    'batch_size': 32,  # size of one batch of training examples
    'eval_every': 20,  # frequency of evaluations, in # of batches
}

### 2. Generating the Data

In [3]:
def get_all_models(length):
    return np.array(list(
        itertools.product([0, 1], repeat=length)
    ))

get_all_models(3)

array([[0, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 1],
       [1, 0, 0],
       [1, 0, 1],
       [1, 1, 0],
       [1, 1, 1]])

In [4]:
def most(model):
    return int(sum(model) > 0.5*len(model))  # integers are useful later

def batch_apply(models, quantifier):
    """Applies quantifier function to 2-D array of models,
    where each row corresponds to one model."""
    return np.apply_along_axis(quantifier, 1, models)

batch_apply(get_all_models(3), most)

array([0, 0, 0, 1, 0, 1, 1, 1])

In [5]:
def shuffle_data(models, labels):
    """Shuffles the order of an array of models and of labels."""
    assert len(models) == len(labels), "models and labels must be of same length"
    permutation = np.random.permutation(len(models))
    return models[permutation], labels[permutation]

In [6]:
def get_data(model_size, quantifier, train_split=0.75, shuffle=True):
    """Gets training and test data for quantifier."""
    # get all models and labels
    models = get_all_models(model_size)
    labels = batch_apply(models, quantifier)
    # shuffle them
    if shuffle:
        models, labels = shuffle_data(models, labels)
    # split into train/test
    split_index = int(len(models) * train_split)  # int returns floor / rounds down
    train_models = models[:split_index]  # up to index, not including
    train_labels = labels[:split_index]
    test_models = models[split_index:]  # from index, including
    test_labels = labels[split_index:]
    return train_models, train_labels, test_models, test_labels

In [7]:
np.unique(batch_apply(get_all_models(16), lambda seq: sum(seq) >=3), return_counts=True)

(array([False,  True]), array([  137, 65399]))

In [8]:
get_data(3, most)

(array([[1, 1, 1],
        [0, 1, 1],
        [1, 0, 0],
        [1, 1, 0],
        [1, 0, 1],
        [0, 0, 1]]), array([1, 1, 0, 1, 1, 0]), array([[0, 0, 0],
        [0, 1, 0]]), array([0, 0]))

### 3. Build Model

In [9]:
class FFNN(nn.Module):  # all models in PyTorch extend nn.Module
    
    def __init__(self, input_size, output_size):
        super(FFNN, self).__init__()
        
        self.layer1 = nn.Linear(input_size, 32)  # first hidden layer has 32 units
        self.layer2 = nn.Linear(32, 32)  # as does second
        self.output = nn.Linear(32, output_size)
        
    def forward(self, models):  # note: forward can take any number of arguments
        x = torch.as_tensor(models, dtype=torch.float)
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = self.output(x)
        return F.softmax(x, dim=1)  # softmax converts to a probability distribution

### 4. Train Model

In [10]:
# get the data
train_models, train_labels, test_models, test_labels = get_data(
    params['model_size'], most)
# get the model
model = FFNN(params['model_size'], 2)  # 2 outputs: False/True

In [11]:
model

FFNN(
  (layer1): Linear(in_features=16, out_features=32, bias=True)
  (layer2): Linear(in_features=32, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=2, bias=True)
)

In [12]:
model(train_models)

tensor([[0.4879, 0.5121],
        [0.4775, 0.5225],
        [0.4724, 0.5276],
        ...,
        [0.5016, 0.4984],
        [0.4793, 0.5207],
        [0.4723, 0.5277]], grad_fn=<SoftmaxBackward>)

In [13]:
num_batches = int(len(train_models) / params['batch_size'])
# get an optimizer
opt = torch.optim.Adam(model.parameters())

for epoch in range(params['num_epochs']):
    # shuffle the training data each epoch
    train_models, train_labels = shuffle_data(train_models, train_labels)
    model.train()  # for our model, this has no effect, but is good practice
    
    # individual training steps!
    for batch_num in range(num_batches):
        # batch the data
        batch_models = train_models[batch_num*params['batch_size']:(batch_num+1)*params['batch_size']]
        batch_labels = train_labels[batch_num*params['batch_size']:(batch_num+1)*params['batch_size']]
        
        # get model's output
        model_probs = model(batch_models)  # calls .forward
        
        # zero the gradients
        opt.zero_grad()
        # calculate loss
        loss = F.cross_entropy(model_probs,
                               torch.as_tensor(batch_labels))
        loss.backward()  # computes the gradients!
        opt.step()  # updates the parameters
        
        if (batch_num + 1) % params['eval_every'] == 0:
            with torch.no_grad():  # speeds things up
                model.eval()  # again, no effect on our model, but good practice
                model_probs = model(test_models).numpy()
                model_predictions = model_probs.argmax(axis=1).flatten()
                # 1 if correct prediction, 0 otherwise
                correct = (model_predictions == test_labels).astype(int)
                print('Test set accuracy; after epoch {}, batch {}: {}'.format(
                    epoch, batch_num+1,
                    sum(correct) / len(correct)
                ))
            model.train()

Test set accuracy; after epoch 0, batch 20: 0.5694580078125
Test set accuracy; after epoch 0, batch 40: 0.6025390625
Test set accuracy; after epoch 0, batch 60: 0.6021728515625
Test set accuracy; after epoch 0, batch 80: 0.67486572265625
Test set accuracy; after epoch 0, batch 100: 0.73822021484375
Test set accuracy; after epoch 0, batch 120: 0.852294921875
Test set accuracy; after epoch 0, batch 140: 0.7327880859375
Test set accuracy; after epoch 0, batch 160: 0.868896484375
Test set accuracy; after epoch 0, batch 180: 0.8724365234375
Test set accuracy; after epoch 0, batch 200: 0.9051513671875
Test set accuracy; after epoch 0, batch 220: 0.9010009765625
Test set accuracy; after epoch 0, batch 240: 0.90936279296875
Test set accuracy; after epoch 0, batch 260: 0.9344482421875
Test set accuracy; after epoch 0, batch 280: 0.9317626953125
Test set accuracy; after epoch 0, batch 300: 0.94281005859375
Test set accuracy; after epoch 0, batch 320: 0.95306396484375
Test set accuracy; after epo