In [None]:
import pathlib
from joblib import load
import json
import sys
import os

import pandas as pd
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
import numpy as np

sys.path.append("../")
from src.classification.classification import show_params, prepare_data_for_training
from src.data_preparation.preprocessing import run_preprocessing
from src.data_preparation.features import extract_features, make_final_df

In [None]:
EXPERIMENTS_PATH = pathlib.Path("../experiments")
date = "01_23_12_53_06_svm_nclusters_2000_no_binary"
experiment_path = EXPERIMENTS_PATH / date

In [None]:
def load_model(experiment_path):
    model = load(experiment_path / "model.joblib")
    return model

def load_clustering_model(experiment_path):
    clustering_model = load(experiment_path / "clustering_model.joblib")
    return clustering_model

def load_final_df(experiment_path):
    final_df = pd.read_csv(experiment_path / "final_df.csv")
    return final_df

def load_params(experiment_path):
    with open(experiment_path / "params.json") as file:
        params = json.load(file)
    return params

In [None]:
model = load_model(experiment_path)
clustering_model = load_clustering_model(experiment_path)
final_df = load_final_df(experiment_path)
params = load_params(experiment_path)

In [None]:
show_params(params)

In [None]:
final_df

# Metrics on Kaggle dataset

In [None]:
X_scaled, y = prepare_data_for_training(
    final_df.drop("path", axis=1), binary=params["binary"]
)
X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=1 / 3, random_state=42
    )

In [None]:
y_pred = model.predict(X_test)

In [None]:
def show_matrix(y_pred, y):
    confusion_matrix_ = confusion_matrix(y, y_pred, labels=["cat", "dog"])
    ConfusionMatrixDisplay(confusion_matrix_, display_labels=["cat", "dog"]).plot()
    return confusion_matrix_

In [None]:
def show_metrics(confusion_matrix_):
    accuracy = np.trace(confusion_matrix_) / np.sum(confusion_matrix_)
    cat_precision = confusion_matrix_[0, 0] / (confusion_matrix_[0, 0] + confusion_matrix_[0, 1])
    dog_precision = confusion_matrix_[1, 1] / (confusion_matrix_[1, 1] + confusion_matrix_[1, 0])

    cat_recall = confusion_matrix_[0, 0] / (confusion_matrix_[0, 0] + confusion_matrix_[1, 0])
    dog_recall = confusion_matrix_[1, 1] / (confusion_matrix_[1, 1] + confusion_matrix_[0, 1])

    average_precision = (cat_precision + dog_precision) / 2

    print(
        f"""\n
        Accuracy: {accuracy*100:.2f} % 

        Cat precision: {cat_precision*100:.2f} %
        Cat recall: {cat_recall*100:.2f} %

        Dog precision: {dog_precision*100:.2f} %
        Dog recall: {dog_recall*100:.2f} %

        Average precision: {average_precision*100:.2f} %
        """
    )

In [None]:
confusion_matrix_ = show_matrix(y_pred, y_test)

In [None]:
show_metrics(confusion_matrix_)

## Metrics on PASCAL dataset

In [None]:
final_df

In [None]:
pascal_path = pathlib.Path("../data/pascal")
images_path = pascal_path / "images"
labels_path = pascal_path / "labels.csv"

In [None]:
descriptors_dict = run_preprocessing(images_path, 1, "MSER", len(os.listdir(images_path)))

In [None]:
features = extract_features(descriptors_dict, clustering_model)

In [None]:
labels = pd.read_csv(labels_path)
pascal_df = make_final_df(features, labels)

In [None]:
pascal_df

In [None]:
X_pascal, y_pascal = prepare_data_for_training(
    pascal_df.drop("path", axis=1), binary=params["binary"]
)

In [None]:
y_pascal_pred = model.predict(X_pascal)

In [None]:
confusion_matrix_pascal = show_matrix(y_pascal_pred, y_pascal)

In [None]:
show_metrics(confusion_matrix_pascal)