In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

## Install libraries

```bash
conda create -n edu4 python=3.11 jupyter matplotlib
```

```bash 
! pip install -U -r requirements.txt
```

```bash
! pip install -U numpy
! pip install -U scikit-learn
```

## Update repository

In [None]:
! git pull

## Add import path

In [None]:
import os
import sys
import gc

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
del module_path

## Organize imports

In [None]:
import multiprocessing
from pathlib import Path

In [None]:
from collections import OrderedDict

In [None]:
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
from tqdm import tqdm

In [None]:
import plotly.express as px

In [None]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis, 
    QuadraticDiscriminantAnalysis
)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier 
from sklearn.datasets import (
    load_iris,
    load_wine
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import (
    MaxAbsScaler,
    MinMaxScaler,
    StandardScaler
)
from sklearn.metrics import (
    classification_report,
    confusion_matrix
)

In [None]:
from scipy import stats
from scipy.interpolate import interp1d

In [None]:
import torch

In [None]:
from torch import nn
from torchvision import transforms
from torchvision.datasets import MNIST

#### Number of CPU cores

In [None]:
workers = multiprocessing.cpu_count()
workers

In [None]:
SEED = 2024

## Initialize Path

In [None]:
PATH = Path('data')
model_dir = PATH / 'models'
model_path = model_dir / 'model_chechpoint.ckpt'
images_path = PATH / 'images'
images_path.mkdir(exist_ok=True, parents=True)
pumpkin_path = PATH / 'Pumpkin_Seeds_Dataset.xlsx'

## Model wrapper

In [None]:
class NetWrapper(object):

    def __init__(self, net, transform):
        self.net = net.eval()
        self.transform = transform

    @torch.inference_mode()
    def forward(self, x, k=6):
        t = self.transform(x)
        r = self.net[: k](t)

        return r        
    
    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)
    

## Load the model

In [None]:
model = torch.load(model_path, map_location='cpu')

In [None]:
model

In [None]:
def clear_state_dict(state_dict):
    for key in list(state_dict.keys()):
        state_dict[key.replace('model.1.', 'fc1.')] = state_dict.pop(key)
    for key in list(state_dict.keys()):
        state_dict[key.replace('model.4.', 'fc2.')] = state_dict.pop(key)
    for key in list(state_dict.keys()):
        state_dict[key.replace('model.7.', 'fc3.')] = state_dict.pop(key)

    return state_dict

In [None]:
state_dict = clear_state_dict(model['state_dict'])

In [None]:
state_dict

In [None]:
channels = 1
width = 28
height = 28
hidden_size = 16
num_classes = 10
in_features = channels * width * height

In [None]:
net = nn.Sequential(OrderedDict([
    ('flatten', nn.Flatten()),
    ('fc1', nn.Linear(channels * width * height, hidden_size)),
    ('act1', nn.ReLU()),
    ('fc2', nn.Linear(hidden_size, hidden_size)),
    ('act2', nn.ReLU()),
    ('fc3', nn.Linear(hidden_size, num_classes)),
]))

In [None]:
net

In [None]:
net.load_state_dict(state_dict)

In [None]:
net = net.eval()
net

## Helper functions

In [None]:
def layer_V(data, net, k=5):
    V = list()
    X = list()
    with tqdm(data) as ds:
        for i, (x, y) in enumerate(ds):
            v = net(x, k=k).detach().numpy()[0]
            V.append(v)
            X.append(x)

    return np.array(V), X

In [None]:
def loop_maxes(V, func, *args, **kwargs):
    with tqdm(V) as mstml:
        for i, v in enumerate(mstml):
            func(i, v, *args, **kwargs)

In [None]:
def find_v_x(V, mrng, idx):
    mid = np.argmin(np.array(V)[mrng], axis=0)[idx]
    x_id = mrng[mid]
    v_x = V[x_id]

    return v_x, x_id

In [None]:
def find_v_A(V, mrng):
    return np.minimum.reduce(np.array(V)[mrng])

In [None]:
def find_G_x(V, v_x):
    with tqdm(V) as mstm:
        G_x = np.array([i for i, v in enumerate(mstm) if np.all(v_x <= v)])

    return G_x

## Initialize MNIST dataset

In [None]:
transform = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize((0.1307,), (0.3081,)),
            ]
)

In [None]:
data = MNIST(images_path, train=False, download=True)

In [None]:
data[0]

In [None]:
net[:5], net

In [None]:
im = data[0][0]

In [None]:
net[:3](transform(im))

In [None]:
net[:2](transform(data[0][0]))

In [None]:
def pnet(x, k=6):
    t = transform(x)
    r = net[: k](t)

    return r

In [None]:
pnet(data[0][0])

In [None]:
wnet = NetWrapper(net, transform)

In [None]:
pnet(data[0][0], k=5) == wnet.forward(data[0][0], k=5)

In [None]:
sf = nn.Softmax()

In [None]:
sf(pnet(data[0][0]))

In [None]:
max_stimul = list()
max_images = list()
with tqdm(data) as ds:
    for i, (x, y) in enumerate(ds):
        v = pnet(x, k=5).detach().numpy()[0]
        max_stimul.append(v)
        max_images.append(x)
    

In [None]:
max_stimul = np.array(max_stimul)

In [None]:
max_stimul.shape

In [None]:
np.argmax(max_stimul, axis=0)

In [None]:
np.max(max_stimul, axis=0)

In [None]:
max_stimul[3290, :]

In [None]:
plt.imshow(max_images[8140])

## Alanyze maximum stimulus

In [None]:
max_range_2 = list()
with tqdm(max_stimul) as mstm:
    for i, v in enumerate(mstm):
        if v[2] >= 18:
            max_range_2.append(i)

In [None]:
max_range_2

In [None]:
v_a, a_id = find_v_x(max_stimul, max_range_2, 2)
v_a, a_id

In [None]:
plt.imshow(max_images[a_id])

In [None]:
v_A = find_v_A(max_stimul, max_range_2)
v_A

In [None]:
max_range_5 = list()
with tqdm(max_stimul) as mstm:
    for i, v in enumerate(mstm):
        if v[5] >= 10:
            max_range_5.append(i)

In [None]:
max_range_5

In [None]:
v_b, b_id = find_v_x(max_stimul, max_range_5, 5)
v_b, b_id

In [None]:
plt.imshow(max_images[b_id])

In [None]:
v_B = find_v_A(max_stimul, max_range_5)
v_B

In [None]:
G_A = find_G_x(max_stimul, v_A)
G_A

In [None]:
G_B = find_G_x(max_stimul, v_B)
G_B

In [None]:
inter_idx = np.intersect1d(G_A, G_B)
inter_idx

In [None]:
plt.imshow(data[6794][0])

In [None]:
v_D = np.maximum(v_A, v_B)
v_D

In [None]:
G_5_D = find_G_x(max_stimul, v_D)
G_5_D

In [None]:
wnet.net[:6]

In [None]:
U_X, X_U = layer_V(data, wnet, k=6)

In [None]:
u_D = find_v_A(U_X, inter_idx)
u_D

In [None]:
U_X[inter_idx]

In [None]:
G_D = find_G_x(U_X, u_D)
G_D

In [None]:
plt.imshow(data[9390][0])

## Initialize simple dataset

In [None]:
iris = load_iris()

In [None]:
wines = load_wine()

In [None]:
df = pd.read_excel(pumpkin_path)

In [None]:
X = iris['data']
y = iris['target']

In [None]:
X = wines['data']
y = wines['target']

In [None]:
y = df['Class']
X = df.drop(columns=['Class'], axis=1)

In [None]:
y

In [None]:
y = y.replace('Çerçevelik', 0).replace('Ürgüp Sivrisi', 1)

In [None]:
y = y.to_numpy()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=SEED
)

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
scaler = MaxAbsScaler()
X_train = scaler.fit_transform(X_train)

In [None]:
X_test = scaler.transform(X_test)

## Fitting Logistic Regression

In [None]:
mlc = LogisticRegression(
    multi_class='ovr',
    n_jobs=workers
)

In [None]:
mlc = LinearDiscriminantAnalysis()

In [None]:
mlc = QuadraticDiscriminantAnalysis()

In [None]:
mlc = mlc.fit(X_train, y_train)

In [None]:
mlc.classes_

In [None]:
V_test = mlc.predict_log_proba(X_test)
y_pred = mlc.predict_proba(X_test)
y_hats = mlc.predict(X_test)

In [None]:
print(classification_report(y_test, y_hats))

In [None]:
confusion_matrix(y_test, y_hats)

In [None]:
V_train = mlc.predict_log_proba(X_test)

In [None]:
I_X = [[x, v_x] for x, v_x in zip(X_test, V_test)]

In [None]:
np.sum(V_test, axis=1)

In [None]:
np.sum(y_pred, axis=1)

In [None]:
y_pred[-1]

In [None]:
x = X_test[-1]
v_x = V_test[-1]

In [None]:
def upset(v):
    B, v_B = list(), list()
    for x_t, v_t in zip(X_test, V_test):
        if np.all(v <= v_t):
            B.append(x_t)
            v_B.append(v_t)

    return B, v_B

In [None]:
Bs = list()
v_Bs = list()
for x_a, v_a in zip(X_test, V_test):
    B_a, v_B_a = upset(v_a)
    Bs.append(B_a)
    v_Bs.append(v_B_a)

In [None]:
len(Bs)

In [None]:
i = 0
for B_c in Bs:
    i += len(B_c)
i

## Clustering lattices

In [None]:
V_0 = list()
A = list()
V_1 = list()
B = list()
V_2 = list()
C = list()
for x, v_x, y in zip(X_test, V_test, y_test):
    if y == 0:
        A.append(x)
        V_0.append(v_x)
    elif y == 1:
        B.append(x)
        V_1.append(v_x)
    elif y == 2:
        C.append(x)
        V_2.append(v_x)

In [None]:
len(V_0)

In [None]:
V_0[0]

In [None]:
# np.array(V_0)

In [None]:
v_A = np.min(np.array(V_0), axis = 0)
v_B = np.min(np.array(V_1), axis = 0)
# v_C = np.min(np.array(V_2))

In [None]:
v_A, v_B

In [None]:
G_v_A = list()
G_v_B = list()
for x, v_x, y in zip(X_test, V_test, y_test):
    if np.all(v_A <= v_x):
        G_v_A.append((x, y))
    if np.all(v_B <= v_x):
        G_v_B.append((x, y))

In [None]:
i_A = 0
for v_x, y in G_v_A:
    i_A += y
print(f'{i_A} {len(G_v_A)} {len(G_v_A) - i_A}')

In [None]:
i_B = 0
for v_x, y in G_v_B:
    i_B += y
print(f'{i_B} {len(G_v_B)} {len(G_v_B) - i_B}')