In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import copy

from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

color_list = [x["color"] for x in plt.rcParams["axes.prop_cycle"]]

from matplotlib.ticker import MaxNLocator

import multiprocessing
import os

In [None]:
from lut import *

## Custom dataset

In [None]:
from sklearn.datasets import make_moons

In [None]:
X_float, y_float = make_moons(n_samples=1000, shuffle=True, noise=0.05, random_state=None)

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
X_tf = scaler.fit_transform(X_float)
X = X_tf > 0.5

y = y_float.astype(bool)

In [None]:
lut = Lut(2, [100, 100, 100, 100])
preds_train = lut.train(X, y)

print(f"Accuracy on training set: {accuracy_score(preds_train, y):.2f}%")

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(8, 4))

ax = axs[0]
ax.scatter(X_tf[:, 0], X_tf[:, 1], c=y_float, cmap="bwr");

ax = axs[1]
ax.scatter(X_tf[:, 0], X_tf[:, 1], c=preds_train, cmap="bwr")

plt.tight_layout();

## MNIST

In [None]:
data = np.load("MNIST.npz", allow_pickle=True)
X_ = data["X"]
y_ = data["y"]

scaler = MinMaxScaler(feature_range=(0, 1))
X_tf = scaler.fit_transform(X_)

In [None]:
X = (X_tf > 0.5).astype(bool)
y = (y_ == 0) | (y_ == 1) | (y_ == 2) | (y_ == 3) | (y_ == 4)

X, y = shuffle(X, y, n_samples=10_000, random_state=100)

X_train, X_test, y_train, y_test, = train_test_split(
    X, y, test_size=0.33, random_state=42, shuffle=False
)

In [None]:
%%time
lut = Lut(6, [100, 100, 100])
preds_train = lut.train(X_train, y_train)
preds_test = lut.predict(X_test)

print(f"Accuracy on training set: {accuracy_score(preds_train, y_train):.2f}%")
print(f"Accuracy on test set: {accuracy_score(preds_test, y_test):.2f}%")

## Percentage random

In [None]:
rnd_arr = []
for i in range(2, 11):
    lut = Lut(6, [100] * i)
    preds_train = lut.train(X_train, y_train)
    rnd_arr.append([x.sum() / len(x.ravel()) for x in lut.rnd_arr_])

In [None]:
fig, ax = plt.subplots(1, 1)
for i, rnd in enumerate(rnd_arr):
    ax.plot(range(2, len(rnd) + 2), rnd, label=i + 2)
    
ax.grid()
ax.set_xticks(range(2, 13))
ax.set_xticklabels(range(2, 13))
ax.set_xlabel("Hidden layer")
ax.set_ylabel("Percentage of lut entries random")
ax.legend(bbox_to_anchor=(1, 1), loc="upper left", title="Number of\nhidden layers\nof respective\nlut network")
ax.set_title("Percentage of random entries for different architectures\nEach hidden layer has 100 luts");
# ax.xaxis.set_major_locator(MaxNLocator(integer=True));