In [None]:
from emb_generating import imgs_to_emb

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.model_selection import GridSearchCV, cross_val_score, KFold

import torch
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task
from sklearn.linear_model import LogisticRegression
from pytorch_tabnet.tab_model import TabNetClassifier

from sklearn.metrics import classification_report, confusion_matrix, f1_score
from pytorch_tabnet.metrics import Metric

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df_train_seal = imgs_to_emb('../data/train/seal/*')
df_train_no_seal = imgs_to_emb('../data/train/no_seal/*')

In [None]:
df_train = pd.concat([df_train_seal, df_train_no_seal]).reset_index(drop=True).drop(columns=['Image name'])
df_train

In [None]:
X = df_train.drop(columns=['Target'])
y = df_train['Target']

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.8, random_state=42)

In [None]:
log_reg = LogisticRegression()

log_reg_param_grid = {
        'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
        'penalty':['elasticnet',  'l2'],
        'C':[0.01, 0.1, 1, 10, 100],
}

fold = KFold(n_splits=5, shuffle=True, random_state=43)
log_reg_grid = GridSearchCV(estimator=log_reg, param_grid=log_reg_param_grid, scoring='f1', cv=fold, n_jobs=-1, verbose=True)
log_reg_grid.fit(X_train, y_train)

In [None]:
log_reg_grid.best_params_

In [None]:
log_reg_preds = log_reg_grid.predict(X_val)
print(classification_report(y_val, log_reg_preds))

In [None]:
cm_log_reg = confusion_matrix(log_reg_preds,y_val)
cm_normalizes_log_reg = np.round(cm_log_reg/np.sum(cm_log_reg,axis=1).reshape(-1, 1), 2)

cm_plot_log_reg=sns.heatmap(cm_normalizes_log_reg, cmap='OrRd_r', annot=True)
cm_plot_log_reg.yaxis.set_ticklabels(['No seal', 'Seal'])
cm_plot_log_reg.xaxis.set_ticklabels(['No seal', 'Seal'])

plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Logistic Regression Confusion Matrix')
plt.show()

In [None]:
N_THREADS = 6
N_FOLDS = 5
RANDOM_STATE = 42
TEST_SIZE = 0.2
TIMEOUT = 600

np.random.seed(RANDOM_STATE)
torch.set_num_threads(N_THREADS)

def f1_metric(y_true, y_pred, **kwargs):
    return f1_score(y_true, (y_pred > 0.5).astype(int), **kwargs)

task = Task('binary', metric = f1_metric)

roles = {
    'target': 'Target',
}

lama = TabularAutoML(task = task, 
                       timeout = TIMEOUT,
                       cpu_limit = N_THREADS,
                       reader_params = {'n_jobs': N_THREADS, 'cv': N_FOLDS, 'random_state': RANDOM_STATE})
lama.fit_predict(df_train, roles = roles)

In [None]:
lama_preds_temp = lama.predict(X_val)

lama_preds = []
for prediction in lama_preds:
    if prediction > 0.5:
        lama_preds.append(1)
    else:
        lama_preds.append(0)

print(classification_report(y_val, lama_preds))

In [None]:
cm_lama = confusion_matrix(lama_preds,y_val)
cm_normalizes_lama = np.round(cm_lama/np.sum(cm_lama,axis=1).reshape(-1, 1), 2)

cm_plot_lama=sns.heatmap(cm_normalizes_lama, cmap='OrRd_r', annot=True)
cm_plot_lama.yaxis.set_ticklabels(['No seal', 'Seal'])
cm_plot_lama.xaxis.set_ticklabels(['No seal', 'Seal'])

plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('LightAutoML Confusion Matrix')
plt.show()

In [None]:
class Gini(Metric):
    def __init__(self):
        self._name = "gini"
        self._maximize = True

    def __call__(self, y_true, y_score):
        auc = f1_score(y_true, y_score[:, 1])
        return max(2*auc - 1, 0.)

tab_net = TabNetClassifier()
tab_net.fit(
  X_train.values, y_train,
  eval_metric=[Gini],
  max_epochs=1000,
  patience=200,
)

In [None]:
tab_net_preds = tab_net.predict(X_val.values)
print(classification_report(y_val, tab_net_preds))

In [None]:
cm_tab_net = confusion_matrix(tab_net_preds,y_val)
cm_normalizes_tab_net = np.round(cm_tab_net/np.sum(cm_tab_net,axis=1).reshape(-1, 1), 2)

cm_plot_tab_net=sns.heatmap(cm_normalizes_tab_net, cmap='OrRd_r', annot=True)
cm_plot_tab_net.yaxis.set_ticklabels(['No seal', 'Seal'])
cm_plot_tab_net.xaxis.set_ticklabels(['No seal', 'Seal'])

plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('TabNet Confusion Matrix')
plt.show()