In [None]:
import numpy as np
import copy

from lut import *

## Toy example from paper

In [None]:
X = np.array(
    [
        [0, 0, 0, 0],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 1, 1],
        [1, 0, 0, 0],
        [1, 1, 0, 0],
        [1, 1, 0, 1],
    ],
    dtype=bool,
)

In [None]:
lut_0 = Lut(2)
lut_0.train(X, cols=[0, 1])
lut_0

In [None]:
lut_1 = Lut(2)
lut_1.train(X, cols=[0, 2])
lut_1

In [None]:
new_X = training_set_from_luts([lut_0, lut_1], X)
new_X

In [None]:
lut_3 = Lut(2)
lut_3.train(new_X)
lut_3

In [None]:
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

color_list = [x["color"] for x in plt.rcParams["axes.prop_cycle"]]

from matplotlib.ticker import MaxNLocator

## Iris Dataset (differentiating class 0 from 1)

In [None]:
iris = datasets.load_iris()

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
X_iris_scaled = scaler.fit_transform(iris.data[iris.target != 0])

target = np.array(iris.target[iris.target != 0] == 2, dtype=bool)[:, None]
X_iris = np.hstack((np.array(X_iris_scaled > 0.5, dtype=bool), target))
X_iris.shape

In [None]:
X_train, X_test = train_test_split(X_iris, test_size=0.33, random_state=42)

In [None]:
lut = Lut(4)
lut.train(X_train)
lut

In [None]:
preds = lut.predict(X_train)
accuracy_score(preds, X_train[:, -1])

In [None]:
preds = lut.predict(X_test)
accuracy_score(preds, X_test[:, -1])

## Custom dataset

In [None]:
num_examples = 1000
dist = 4
ax1 = np.random.normal(loc=-dist/2, scale=1.0, size=num_examples)
ax2 = np.random.normal(loc=-dist/2, scale=1.0, size=num_examples)
ay = np.zeros((num_examples,), dtype=int)

a = np.hstack(((ax1[:, None] > 0.0).astype(int), (ax2[:, None] > 0.0).astype(int), ay[:, None]))

bx1 = np.random.normal(loc=dist/2, scale=1.0, size=num_examples)
bx2 = np.random.normal(loc=dist/2, scale=1.0, size=num_examples)
by = np.ones((num_examples,), dtype=int)

b = np.hstack(((bx1[:, None] > 0.0).astype(int), (bx2[:, None] > 0.0).astype(int), by[:, None]))

fig, ax = plt.subplots(1, 1)

ax.scatter(ax1, ax2, label="a")
ax.scatter(bx1, bx2, label="b", alpha=0.5)
ax.legend();

In [None]:
X_ab = np.vstack((a, b))

X_train, X_test = train_test_split(X_ab, test_size=0.33, random_state=42, shuffle=True)

lut = Lut(2)
lut.train(X_train)
lut

In [None]:
preds = lut.predict(X_train)
accuracy_score(preds, X_train[:, -1])

In [None]:
preds = lut.predict(X_test)
accuracy_score(preds, X_test[:, -1])

Here we have the two clusters symmetrically around 0 and the criterion for binarizing the dataset ($>0$) is well chosen. If we choose the criterion badly, the performance of the luts can drop significantly.

## MNIST

In [None]:
# from sklearn.datasets import fetch_openml

# X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
# y = np.array([int(x) for x in y])
# np.savez("MNIST.npz", X=X, y=y)

data = np.load("MNIST.npz", allow_pickle=True)
X = data["X"]
y = data["y"]

In [None]:
%%time
bits = 9

pca = PCA(n_components=bits)
X_pca = pca.fit_transform(X)

# fig, ax = plt.subplots(1, 1)
# X_back = pca.inverse_transform(X_pca)
# ax.imshow(X_back[10].reshape((28,28)), cmap="gray")

scaler = MinMaxScaler(feature_range=(0, 1))
X_tf = scaler.fit_transform(X_pca)

X_mnist = np.hstack(
    (
        (X_tf > 0.5).astype(bool),
        ((y == 0) | (y == 1) | (y == 2) | (y == 3) | (y == 4)).astype(bool)[:, None],
    )
)

X_train, X_test = train_test_split(X_mnist, test_size=0.33, random_state=42, shuffle=True)

lut = Lut(bits)
lut.train(X_train)

preds = lut.predict(X_train)
print(f"Accuracy on training set: {accuracy_score(preds, X_train[:, -1]):.2f}%")

preds = lut.predict(X_test)
print(f"Accuracy on test set: {accuracy_score(preds, X_test[:, -1]):.2f}%")

print(f"{lut.rnd.sum() / len(lut.rnd.ravel()) * 100:.2f}% of lut entries are random")

In [None]:
bit_arr = list(range(2, 21))
train_arr = []
test_arr = []
rnd_arr = []

for bits in tqdm(bit_arr):
    pca = PCA(n_components=bits)
    X_pca = pca.fit_transform(X)
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_tf = scaler.fit_transform(X_pca)
    X_mnist = np.hstack(
        (
            (X_tf > 0.5).astype(bool),
#             ((y == 0) | (y == 1) | (y == 2) | (y == 3) | (y == 4)).astype(bool)[:, None],
            np.random.choice([0, 1], size=(len(y),))[:, None]
        )
    )
    X_train, X_test = train_test_split(X_mnist, test_size=0.33, random_state=42, shuffle=True)
    lut = Lut(bits)
    lut.train(X_train)
    preds = lut.predict(X_train)
    train_arr.append(accuracy_score(preds, X_train[:, -1]))
    preds = lut.predict(X_test)
    test_arr.append(accuracy_score(preds, X_test[:, -1]))
    rnd_arr.append(lut.rnd.sum() / len(lut.rnd.ravel()) * 100)

In [None]:
fig, ax = plt.subplots(1, 1)

ax.plot(bit_arr, train_arr, label="Train")
ax.plot(bit_arr, test_arr, "--", label="Test")
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.set_xlabel("Number of Bits")
ax.set_ylabel("Accuracy")
ax.set_title("Performance of a single lut on 0-4 vs. 5-9 MNIST classification\nLabels (targets) random\n(PCA used to reduce dimensions to corresponding bit size)", pad=20)
ax.grid()
ax.legend()

ax2 = ax.twinx()
ax2.plot(bit_arr, rnd_arr, "-.", label="Percentage of\nlut entries\nrandom\n(tie-breaking)", c=color_list[2])
ax2.legend(bbox_to_anchor=(1.1,1), loc="upper left");

In [None]:
fig.savefig("single_lut_performance_random.jpg", dpi=100, bbox_inches="tight")