In [1]:
import sys

from loader import load
from reduce import autoencode, pca, lda, cluster_reduce
from preprocess import default_preprocess
from process import DTree, LGBM, RForest, MLP
from fusion import fusion
from sklearn.model_selection import train_test_split

PREPROCESSORS = {
    'default': default_preprocess,
    'none': lambda X: X,
}

DIM_REDUCTIONS = {
    'lda': lda,
    'pca': lambda X, _: pca(X, n_components=0.95),                              # n_components is set arbitrarily for now
    'autoencoder': lambda X, _: autoencode(X, n_components=53, save=True),      # n_components is set arbitrarily for now, the 53 here is what PCA usually chooses at 95%
    'hcluster': lambda X, _: cluster_reduce(X, n_components=53, linkage='ward')
}

PROCESSORS = {
    'lgbm': LGBM,
    'randomforest': RForest,
    'mlp': MLP,
}

[AUTOENCODER] Num GPUs Available:  0


In [None]:
def get_predictions(X, Y, test_size, dim_reduce_methods, classifiers):
    results = {}

    for classifier_name, classifier in classifiers.items():
        print(f"[PROCESS] Running {classifier_name}")
        classifier_results = []
        
        for dr_name, dr_method in dim_reduce_methods.items():
            print(f"[DIM REDUCE] Applying {dr_name}")
            X_reduced = dr_method(X, Y)
            
            auc, y_pred = classifier(X_reduced, Y, test_size=test_size)
            print(f"AUC: {auc}")
            classifier_results.append((dr_name, auc, y_pred))
        
        results[classifier_name] = classifier_results

    return results

In [None]:
X, Y = load()
test_size = 0.2

X = default_preprocess(X)
results = get_predictions(X, Y, test_size, DIM_REDUCTIONS, PROCESSORS)





In [None]:
_, _, _, y_val = train_test_split(X, Y, test_size=test_size, random_state=42)
# Fusing same classifier, different DRs
for classifier_name, classifier_results in results.items():
    print(f"\n[FUSION] Fusing predictions for classifier: {classifier_name}")
    prob_predictions = [res[2] for res in classifier_results]
    auc_fused = fusion(prob_predictions, y_val)
    print(f"Fused AUC for {classifier_name}: {auc_fused:.4f}")

In [None]:
# Fusing different classifiers, same DR
for dr_name in DIM_REDUCTIONS.keys():
    print(f"\n[FUSION] Fusing predictions for DR method: {dr_name}")
    prob_predictions = [
        res[2] for classifier_results in results.values()
        for dr, _, res in classifier_results if dr == dr_name
    ]
    auc_fused = fusion(prob_predictions, y_val)
    print(f"Fused AUC for {dr_name}: {auc_fused:.4f}")