In [36]:
import torchvision
import torch
import random
from torchvision import transforms

convert_tensor = transforms.ToTensor();
ds = torchvision.datasets.MNIST('mnist/', download=True)
data_negative = [(convert_tensor(x).flatten(), -1, x) for (x, y) in ds if y != 8]
data_positive = [(convert_tensor(x).flatten(), 1, x) for (x, y) in ds if y == 8]
all_data = data_negative + data_positive
random.shuffle(all_data)

In [37]:
def reset(data):
    features = []
    labels = []
    images = []

    for f, l, i in data:
        features.append(f.cuda().clone())
        labels.append(l)
        images.append(i)

    features = torch.stack(features)
    labels = torch.as_tensor(labels)
    return features, labels, images

In [38]:
def prepare_data(features, labels, num_additional_features):
    size = features.shape[0]
    dim = features.shape[1]

    p = torch.zeros(dim, num_additional_features).cuda()
    q = torch.zeros(dim, num_additional_features).cuda()

    for i in range(num_additional_features):
        feat_1 = random.randrange(dim)
        feat_2 = random.randrange(dim)
        p[feat_1][i] = 1;
        q[feat_2][i] = 1;
    
    f1 = features @ p
    f2 = features @ q
    f = f1 * f2

    features = torch.cat((features, f, torch.ones(size, 1).cuda()), 1)

    split = int(size * 0.80)
    training = (features[:split], labels[:split])
    validation = (features[split:], labels[split:])
    return training, validation

In [43]:
def train(data, num_iterations):
    features, labels = data
    size = features.shape[0]
    dim = features.shape[1]

    iteration = 0
    model = torch.zeros(dim).cuda()

    updated = True
    while iteration < num_iterations and updated:
        updated = False
        for i in range(size):
            pred = labels[i] * torch.dot(model, features[i])
            if  pred.item() <= 0:
                # print(f"Iteration = {iteration}, updating w where pred = {pred}")
                model += labels[i] * features[i]
                iteration += 1
                updated = True
                if iteration >= num_iterations:
                    break

    return model

In [44]:
def validate(data, model):
    features, labels = data
    size = features.shape[0]

    correct = 0
    incorrect = 0
    for i in range(size):
        value = torch.dot(model, features[i]).item()
        if (value >= 0):
            pred = 1
        else:
            pred = -1

        if pred == labels[i].item():
            # print(f"validation data {correct+incorrect} correctly predicted: label = {y}, value = {value}")
            correct += 1
        else: 
            # print(f"validation data {correct+incorrect} incorrectly predicted: label = {y}, prediction = {pred}, value = {value}")
            incorrect += 1
    
    total = correct + incorrect
    error_rate = float(incorrect) / total
    return error_rate

In [45]:
def train_and_validate(num_additional_features):
    features, labels, images = reset(all_data)
    training, validation = prepare_data(features, labels, num_additional_features)

    model = train(training, 10000)
    training_error = validate(training, model)

    validation_error = validate(validation, model)

    print(f"Num_additional_features = {num_additional_features}, training_error = {training_error:2.2%}, validation_error = {validation_error:2.2%}")

In [48]:
i = 0

while i < 100000:
    train_and_validate(i)
    i += 1000

Num_additional_features = 0, training_error = 6.15%, validation_error = 6.29%
Num_additional_features = 1000, training_error = 4.69%, validation_error = 4.92%
Num_additional_features = 2000, training_error = 4.36%, validation_error = 4.29%


RuntimeError: ignored