In [None]:
import joblib
from sklearn.ensemble import RandomForestClassifier
from vacation.data import GalaxyDataset, CLASS_NAMES

from vacation.evaluation.visualizations import (
    plot_example_matrix,
    plot_confusion_matrix,
    plot_hyperparameter_importance,
)

import torch

from vacation.model.random_forest import hog_features

from tqdm.auto import tqdm

from sklearn.metrics import classification_report, accuracy_score

In [None]:
rf = joblib.load("../../best_models/rf_optimized.sav")
rf

In [None]:
dataset = GalaxyDataset(
    "/scratch/tgross/vacation_data/reduced_size/Galaxy10_DECals_proc_test.h5",
    device="cpu",
    cache_loaded=True,
)

features_no_preprocess, _, _ = hog_features(dataset, augmented=False)
features, _, _ = hog_features(dataset, augmented=True)

In [None]:
y_pred_no_preprocess = rf.predict(features_no_preprocess)
y_pred = rf.predict(features)

In [None]:
y_true = dataset.get_labels()

In [None]:
print("REPORT - WITHOUT PREPROCESS")
print(classification_report(y_true=y_true.cpu().numpy(), y_pred=y_pred_no_preprocess))

In [None]:
print("REPORT - WITH PREPROCESS")
print(classification_report(y_true=y_true.cpu().numpy(), y_pred=y_pred))

In [None]:
print("CONFUSION MATRIX - WITHOUT PREPROCESS")
plot_confusion_matrix(
    y_true=y_true, y_pred=torch.from_numpy(y_pred_no_preprocess), normalize=True
)

In [None]:
print("CONFUSION MATRIX - WITH PREPROCESS")
plot_confusion_matrix(y_true=y_true, y_pred=torch.from_numpy(y_pred), normalize=True)

In [None]:
plot_example_matrix(
    dataset=dataset,
    y_pred=torch.from_numpy(y_pred),
    layout=(3, 3),
    figsize=(7, 7),
    seed=42,
    save_path="./build/examples.png",
)