In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import copy

from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

color_list = [x["color"] for x in plt.rcParams["axes.prop_cycle"]]

from matplotlib.ticker import MaxNLocator

import multiprocessing
import os
import pickle
import scipy

In [3]:
from mnist import load_mnist_binary

X_train, X_test, y_train, y_test = load_mnist_binary()

# Nearest Neighbors

In [6]:
from sklearn.neighbors import KNeighborsClassifier

In [10]:
clf = KNeighborsClassifier(
    n_neighbors=5,
    weights="uniform",
    algorithm="auto",
    leaf_size=30,
    p=2,
    metric="minkowski",
    metric_params=None,
    n_jobs=-1,
).fit(X_train, y_train);

In [12]:
preds_train = clf.predict(X_train)
print(f"Accuracy on training set: {accuracy_score(preds_train, y_train):.2f}")
preds_test = clf.predict(X_test)
print(f"Accuracy on test set: {accuracy_score(preds_test, y_test):.2f}")

Accuracy on training set: 0.99
Accuracy on test set: 0.97


In [13]:
clf = KNeighborsClassifier(
    n_neighbors=1,
    weights="uniform",
    algorithm="auto",
    leaf_size=30,
    p=2,
    metric="minkowski",
    metric_params=None,
    n_jobs=-1,
).fit(X_train, y_train)

preds_train = clf.predict(X_train)
print(f"Accuracy on training set: {accuracy_score(preds_train, y_train):.2f}")
preds_test = clf.predict(X_test)
print(f"Accuracy on test set: {accuracy_score(preds_test, y_test):.2f}")

Accuracy on training set: 1.00
Accuracy on test set: 0.97


# Random forest

In [46]:
from sklearn.ensemble import RandomForestClassifier

In [47]:
clf = RandomForestClassifier(
    n_estimators=10,
    criterion="gini",
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    min_weight_fraction_leaf=0.0,
    max_features="auto",
    max_leaf_nodes=None,
    min_impurity_decrease=0.0,
    bootstrap=True,
    oob_score=False,
    n_jobs=None,
    random_state=None,
    verbose=0,
    warm_start=False,
    class_weight=None,
    ccp_alpha=0.0,
    max_samples=None,
).fit(X_train, y_train)

preds_train = clf.predict(X_train)
print(f"Accuracy on training set: {accuracy_score(preds_train, y_train):.2f}")
preds_test = clf.predict(X_test)
print(f"Accuracy on test set: {accuracy_score(preds_test, y_test):.2f}")

Accuracy on training set: 1.00
Accuracy on test set: 0.96


# Logistic Regression

In [30]:
from sklearn.linear_model import LogisticRegression

In [31]:
clf = LogisticRegression(
    penalty="l2",
    dual=False,
    tol=0.0001,
    C=1.0,
    fit_intercept=True,
    intercept_scaling=1,
    class_weight=None,
    random_state=None,
    solver="lbfgs",
    max_iter=10_000,
    multi_class="auto",
    verbose=0,
    warm_start=False,
    n_jobs=-1,
    l1_ratio=None,
).fit(X_train, y_train)

preds_train = clf.predict(X_train)
print(f"Accuracy on training set: {accuracy_score(preds_train, y_train):.2f}")
preds_test = clf.predict(X_test)
print(f"Accuracy on test set: {accuracy_score(preds_test, y_test):.2f}")

Accuracy on training set: 0.87
Accuracy on test set: 0.87


# Naïve Bayes

In [10]:
from sklearn.naive_bayes import BernoulliNB

In [11]:
clf = BernoulliNB(alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None).fit(
    X_train, y_train
)

preds_train = clf.predict(X_train)
print(f"Accuracy on training set: {accuracy_score(preds_train, y_train):.2f}")
preds_test = clf.predict(X_test)
print(f"Accuracy on test set: {accuracy_score(preds_test, y_test):.2f}")

Accuracy on training set: 0.77
Accuracy on test set: 0.77


# CNN

In [11]:
from torch import nn
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [12]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, (5,5), padding=2)
        self.conv2 = nn.Conv2d(64, 32, (5,5))
        self.fc1   = nn.Linear(32*5*5, 256)
        self.fc2   = nn.Linear(256, 128)
        self.fc3   = nn.Linear(128, 1)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [13]:
class MNISTDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = torch.tensor(X.reshape((X.shape[0], 1, 28, 28)), dtype=torch.float32)
        self.y = torch.tensor(np.expand_dims(y, axis=1), dtype=torch.float32)
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

def evaluate(network, data, metric):
    network.eval()
    with torch.no_grad():
        losses = torch.zeros((len(data)))
        for idx, (x, y) in enumerate(data):
            preds = network(x)
            loss = metric(preds, y)
            losses[idx] = loss
    return losses

def update(network: nn.Module, data: DataLoader, loss_fn: nn.Module, 
           opt: torch.optim.Optimizer) -> list:
    network.train()
    losses = torch.zeros((len(data)), requires_grad=False)
    
    for idx, (x, y) in enumerate(data):
        opt.zero_grad()
        preds = network(x)
        loss = loss_fn(preds, y)
        losses[idx] = loss.detach()
        loss.backward()
        opt.step()

    return losses

In [17]:
train_ds = MNISTDataset(X_train, y_train)
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, drop_last=True)

test_ds = MNISTDataset(X_test, y_test)
test_dl = DataLoader(test_ds, batch_size=32, shuffle=True, drop_last=True)

loss_fn = nn.functional.binary_cross_entropy

In [28]:
model = CNN()
opt = torch.optim.Adam(model.parameters(), lr=1e-3)

epochs = 1
for epoch in range(epochs):
    train_losses_ = update(model, train_dl, loss_fn, opt)

    test_losses = evaluate(model, test_dl, loss_fn)

In [60]:
with torch.no_grad():
    for idx, (x, y) in enumerate(train_dl):
        preds = model(x)
        if idx == 0:
            ps = torch.round(preds).numpy()
            ys = y.numpy()
        else:
            ps = np.append(ps, torch.round(preds).numpy())
            ys = np.append(ys, y.numpy())

acc_train = accuracy_score(ys, ps)
print(f"Train accuracy: {acc_train:.2f}")

Train accuracy: 0.99


In [61]:
with torch.no_grad():
    for idx, (x, y) in enumerate(test_dl):
        preds = model(x)
        if idx == 0:
            ps = torch.round(preds).numpy()
            ys = y.numpy()
        else:
            ps = np.append(ps, torch.round(preds).numpy())
            ys = np.append(ys, y.numpy())

acc_test = accuracy_score(ys, ps)
print(f"Test accuracy: {acc_test:.2f}")

Test accuracy: 0.99


# Model size

In [4]:
viz_size(get_lut_size(2, [1024] * 5) * 1024)

2.621 MB
2621 KB
2621440 B


In [4]:
def get_lut_size(bits, layers):
    single_lut_size = 2 ** bits # Size in bits
    total_lut_size = 0
    for num_luts in layers:
        total_lut_size += num_luts * single_lut_size
    return total_lut_size / 8 # Size in bytes

def viz_size(size):
    # Size is in bytes
    print(f"{size/1e6:.3f} MB")
    print(f"{size/1e3:.0f} KB")
    print(f"{size:.0f} B")

## LUT

In [7]:
viz_size(get_lut_size(2, [1024] * 5 + [1]) * 1024)

2.622 MB
2622 KB
2621952 B


In [9]:
viz_size(get_lut_size(2, [64] * 5 + [1]) * 1024 + 32 / 8 * 1024)

0.168 MB
168 KB
168448 B


In [60]:
viz_size(get_lut_size(10, [256] * 5 + [1]))

0.164 MB
164 KB
163968 B


In [6]:
viz_size(get_lut_size(12, [1024] * 5 + [1]))

2.622 MB
2622 KB
2621952 B


In [27]:
viz_size(get_lut_size(10, [1024] * 5 + [1]))

0.655 MB
655 KB
655488 B


In [4]:
viz_size(get_lut_size(8, [256] * 5 + [1]))

0.041 MB
41 KB
40992 B


In [28]:
viz_size(get_lut_size(8, [1024] * 5 + [1]))

0.164 MB
164 KB
163872 B


## CNN

In [29]:
model = CNN()

pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

# Earlier we defined 32-bit floating point
viz_size(pytorch_total_params * 32 / 8)

1.164 MB
1164 KB
1163908 B


## Logistic Regression

In [32]:
methods = [x for x in dir(clf) if x[0] != "_"]

In [34]:
# assuming 32-bit floats and ints everywhere
z = 0

for m in methods:
    tp = type(getattr(clf, m))
    if tp == float:
        z += (32 / 8)
    elif tp == np.ndarray:
        z += len(getattr(clf, m).flatten()) * (32 / 8)
    elif tp == int:
        z += (32 / 8)
    else:
        pass # We do not bother for bools

Sklearn uses 64-bit floats, but for the Thesis we will use 32 bits.

In [36]:
viz_size(z)

0.003 MB
3 KB
3180 B


# Naïve Bayes

In [20]:
methods = [x for x in dir(clf) if x[0] != "_"]

In [27]:
# assuming 32-bit floats and ints everywhere
z = 0

for m in methods:
    tp = type(getattr(clf, m))
    if tp == float:
        z += (32 / 8)
    elif tp == np.ndarray:
        z += len(getattr(clf, m).flatten()) * (32 / 8)
    elif tp == int:
        z += (32 / 8)
    else:
        pass # We do not bother for bools

In [29]:
viz_size(z)

0.016 MB
16 KB
15724 B


## Random Forest

In [113]:
print(clf.estimators_[0].tree_.__doc__)

Array-based representation of a binary decision tree.

    The binary tree is represented as a number of parallel arrays. The i-th
    element of each array holds information about the node `i`. Node 0 is the
    tree's root. You can find a detailed description of all arrays in
    `_tree.pxd`. NOTE: Some of the arrays only apply to either leaves or split
    nodes, resp. In this case the values of nodes of the other type are
    arbitrary!

    Attributes
    ----------
    node_count : int
        The number of nodes (internal nodes + leaves) in the tree.

    capacity : int
        The current capacity (i.e., size) of the arrays, which is at least as
        great as `node_count`.

    max_depth : int
        The depth of the tree, i.e. the maximum depth of its leaves.

    children_left : array of int, shape [node_count]
        children_left[i] holds the node id of the left child of node i.
        For leaves, children_left[i] == TREE_LEAF. Otherwise,
        children_left[i] > i.

In [50]:
z = 0

for est in clf.estimators_:
    tr = est.tree_
    methods = [x for x in dir(tr) if x[0] != "_"]
    for m in methods:
        tp = type(getattr(tr, m))
        if tp == float:
            z += (32 / 8)
        elif tp == np.ndarray:
            z += len(getattr(tr, m).flatten()) * (32 / 8)
        elif tp == int:
            z += (32 / 8)
        else:
            pass # We do not bother for bools

In [52]:
viz_size(z)

3.797 MB
3797 KB
3797488 B
