In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import copy

from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

color_list = [x["color"] for x in plt.rcParams["axes.prop_cycle"]]

from matplotlib.ticker import MaxNLocator

import multiprocessing
import os

In [3]:
from lut import *

## Custom dataset

In [None]:
from sklearn.datasets import make_moons

In [None]:
X_float, y_float = make_moons(n_samples=1000, shuffle=True, noise=0.05, random_state=None)

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
X_tf = scaler.fit_transform(X_float)
X = X_tf > 0.5

y = y_float.astype(bool)

In [None]:
lut = Lut(2, [100, 100, 100, 100])
preds_train = lut.train(X, y)

print(f"Accuracy on training set: {accuracy_score(preds_train, y):.2f}%")

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(8, 4))

ax = axs[0]
ax.scatter(X_tf[:, 0], X_tf[:, 1], c=y_float, cmap="bwr");

ax = axs[1]
ax.scatter(X_tf[:, 0], X_tf[:, 1], c=preds_train, cmap="bwr")

plt.tight_layout();

## MNIST

In [6]:
data = np.load("data/lut/MNIST.npz", allow_pickle=True)
X_ = data["X"]
y_ = data["y"]

scaler = MinMaxScaler(feature_range=(0, 1))
X_tf = scaler.fit_transform(X_)

X = (X_tf > 0.5).astype(bool)
y = (y_ == 0) | (y_ == 1) | (y_ == 2) | (y_ == 3) | (y_ == 4)

X, y = shuffle(X, y, n_samples=10_000, random_state=100)

X_train, X_test, y_train, y_test, = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=False
)

In [78]:
%%time
lut = Lut(8, [1024] * 5)
preds_train = lut.train(X_train, y_train)
preds_test = lut.predict(X_test)

print(f"Accuracy on training set: {accuracy_score(preds_train, y_train):.2f}%")
print(f"Accuracy on test set: {accuracy_score(preds_test, y_test):.2f}%")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [08:25<00:00, 101.05s/it]


Accuracy on training set: 0.89%
Accuracy on test set: 0.87%
CPU times: user 24.6 s, sys: 34.4 s, total: 59 s
Wall time: 9min 42s


In [79]:
acc = lut.get_accuracies_per_layer(X_train, y_train)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [08:36<00:00, 103.40s/it]


In [80]:
for i in range(len(acc)):
    print(f"{i + 1}  {0.001}   {np.mean(acc[i]):.4f}  {np.std(acc[i]):.4f}")

1  0.001   0.6028  0.0412
2  0.001   0.7398  0.0193
3  0.001   0.8275  0.0067
4  0.001   0.8633  0.0031
5  0.001   0.8781  0.0013
6  0.001   0.8871  0.0000


## Column selection functions

In [7]:
%%time
lut = Lut(8, [100] * 5)
preds_train = lut.train(X_train, y_train)
preds_test = lut.predict(X_test)

print(f"Accuracy on training set: {accuracy_score(preds_train, y_train):.2f}%")
print(f"Accuracy on test set: {accuracy_score(preds_test, y_test):.2f}%")

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:06<00:00,  1.30s/it]


Accuracy on training set: 0.93%
Accuracy on test set: 0.84%
CPU times: user 2.57 s, sys: 2.31 s, total: 4.89 s
Wall time: 8.25 s


In [10]:
lut.cols_arr_[0].shape

(100, 8)

In [22]:
784 / 8

98.0

In [23]:
98 / 7

14.0

In [24]:
14 / 7

2.0

In [25]:
2 / 2

1.0

In [None]:
num_luts_prev = 100
num_luts_current = 100
bits = 4

np.random.choice(range(1024), size=rows * cols)