In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

## Install libraries

```bash
conda create -n edu4 python=3.11 jupyter matplotlib
```

```bash 
! pip install -U -r requirements.txt
```

```bash
! pip install -U numpy
! pip install -U scikit-learn
```

## Update repository

In [None]:
! git pull

## Add import path

In [None]:
import os
import sys
import gc

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
del module_path

## Organize imports

In [None]:
import multiprocessing
from pathlib import Path

In [None]:
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
import plotly.express as px

In [None]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis, 
    QuadraticDiscriminantAnalysis
)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier 
from sklearn.datasets import (
    load_iris,
    load_wine,
    load_breast_cancer
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import (
    MaxAbsScaler,
    MinMaxScaler,
    StandardScaler
)
from sklearn.metrics import (
    classification_report,
    confusion_matrix
)

In [None]:
from scipy import stats
from scipy.interpolate import interp1d

In [None]:
from src.lattmc.fca.utils import *
from src.lattmc.fca.data_utils import *
from src.lattmc.fca.image_utils import *
from src.lattmc.fca.models import *
from src.lattmc.fca.fca_utils import *
from src.lattmc.fca.image_gens import *

#### Number of CPU cores

In [None]:
workers = multiprocessing.cpu_count()
workers

In [None]:
SEED = 2024

## Initialize Path

In [None]:
PATH = Path('data')
images_path = PATH / 'images'
images_path.mkdir(exist_ok=True, parents=True)
pumpkin_path = PATH / 'Pumpkin_Seeds_Dataset.xlsx'

## Initialize simple dataset

In [None]:
iris = load_iris()

In [None]:
wines = load_wine()

In [None]:
df = pd.read_excel(pumpkin_path)

In [None]:
X = iris['data']
y = iris['target']

In [None]:
X = wines['data']
y = wines['target']

In [None]:
y = df['Class']
X = df.drop(columns=['Class'], axis=1)

In [None]:
y

In [None]:
y = y.replace('Çerçevelik', 0).replace('Ürgüp Sivrisi', 1)

In [None]:
y = y.to_numpy()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=SEED
)

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
scaler = MaxAbsScaler()
X_train = scaler.fit_transform(X_train)

In [None]:
X_test = scaler.transform(X_test)

## Fitting Logistic Regression

In [None]:
multi_class='ovr'

In [None]:
multi_class='auto'

In [None]:
mlc = LogisticRegression(
    multi_class=multi_class,
    max_iter=1000000,
    n_jobs=workers
)

In [None]:
mlc = LinearDiscriminantAnalysis()

In [None]:
mlc = QuadraticDiscriminantAnalysis()

In [None]:
mlc = mlc.fit(X_train, y_train)

In [None]:
mlc.classes_

In [None]:
V_test = mlc.predict_log_proba(X_test)
y_pred = mlc.predict_proba(X_test)
y_hats = mlc.predict(X_test)

In [None]:
print(classification_report(y_test, y_hats))

In [None]:
confusion_matrix(y_train, mlc.predict(X_train))

In [None]:
confusion_matrix(y_test, y_hats)

## GV build

In [None]:
mlc.coef_

In [None]:
pos_idx = np.where(mlc.coef_ >= 0)[1]
neg_idx = np.where(mlc.coef_ < 0)[1]
neg_idx, pos_idx

In [None]:
X_trains = dict()
X_trains[0] = X_train[np.where(y_train == 0)]
X_trains[1] = X_train[np.where(y_train == 1)]

In [None]:
X_tests = dict()
X_tests[0] = X_test[np.where(y_test == 0)]
X_tests[1] = X_test[np.where(y_test == 1)]

In [None]:
y_test[np.where(y_test == 1)]

In [None]:
v_A = find_v_A(X_train, np.where(y_train == 1), pos_idx=pos_idx, neg_idx=neg_idx)

In [None]:
v_A

In [None]:
G_A = find_G_x(X_train, v_A, pos_idx=pos_idx, neg_idx=neg_idx)

In [None]:
y_train[G_A]

In [None]:
np.count_nonzero(y_train[G_A] == 0) / np.count_nonzero(y_train == 1)

In [None]:
y_train.shape, y_train[G_A].shape

In [None]:
np.where(y_train[G_A] == 1)[0].shape, np.where(y_train[G_A] == 0)[0].shape

In [None]:
V_train = mlc.predict_log_proba(X_test)

In [None]:
I_X = [[x, v_x] for x, v_x in zip(X_test, V_test)]

In [None]:
np.sum(V_test, axis=1)

In [None]:
np.sum(y_pred, axis=1)

In [None]:
y_pred[-1]

In [None]:
x = X_test[-1]
v_x = V_test[-1]

In [None]:
def upset(v):
    B, v_B = list(), list()
    for x_t, v_t in zip(X_test, V_test):
        if np.all(v <= v_t):
            B.append(x_t)
            v_B.append(v_t)

    return B, v_B

In [None]:
Bs = list()
v_Bs = list()
for x_a, v_a in zip(X_test, V_test):
    B_a, v_B_a = upset(v_a)
    Bs.append(B_a)
    v_Bs.append(v_B_a)

In [None]:
len(Bs)

In [None]:
i = 0
for B_c in Bs:
    i += len(B_c)
i

## Clustering lattices

In [None]:
V_0 = list()
A = list()
V_1 = list()
B = list()
V_2 = list()
C = list()
for x, v_x, y in zip(X_test, V_test, y_test):
    if y == 0:
        A.append(x)
        V_0.append(v_x)
    elif y == 1:
        B.append(x)
        V_1.append(v_x)
    elif y == 2:
        C.append(x)
        V_2.append(v_x)

In [None]:
len(V_0)

In [None]:
V_0[0]

In [None]:
# np.array(V_0)

In [None]:
v_A = np.min(np.array(V_0), axis = 0)
v_B = np.min(np.array(V_1), axis = 0)
# v_C = np.min(np.array(V_2))

In [None]:
v_A, v_B

In [None]:
G_v_A = list()
G_v_B = list()
for x, v_x, y in zip(X_test, V_test, y_test):
    if np.all(v_A <= v_x):
        G_v_A.append((x, y))
    if np.all(v_B <= v_x):
        G_v_B.append((x, y))

In [None]:
i_A = 0
for v_x, y in G_v_A:
    i_A += y
print(f'{i_A} {len(G_v_A)} {len(G_v_A) - i_A}')

In [None]:
i_B = 0
for v_x, y in G_v_B:
    i_B += y
print(f'{i_B} {len(G_v_B)} {len(G_v_B) - i_B}')