In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from Clustering import Clustering
from DataNoiseAdder import DataNoiseAdder
from DatasetCorruptor import DatasetCorruptor
from DecisionTreeEnsemble import DecisionTreeEnsemble
from SyntheticDataGenerator import SyntheticDataGenerator
from EnsembleDiversity import EnsembleDiversity
from EnsembleMetrics import EnsembleMetrics

from utils import get_dataset, get_ensemble_preds_from_models, get_precision_recall_auc, auprc_threshs
from utils import plot_precision_recall, plot_aroc_at_curve, fitness_scatter
from utils import compute_metrics_in_buckets, flatten_df, compute_cluster_metrics

from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings('ignore')

### Experiment Args

In [4]:
args = {}
args['ntrls'] = 10
args['ensemble_size'] = 10
args['dataset_path'] = "/Users/scottmerrill/Documents/UNC/Research/OOD-Ensembles/datasets"
args['dataset_name'] = 'german' 

# Decision Tree/Model Pool Params
args['num_classifiers'] = 100
args['feature_fraction'] = 0.5
args['data_fraction'] = 0.8
args['max_depth'] = 10
args['min_samples_leaf'] = 5
args['random_state'] = 1
args['clusters_list'] = [3]
args['shift_feature_count'] = 5
AUCTHRESHS = np.array([0.1, 0.2, 0.3, 0.4, 1. ])

In [5]:
x_train, y_train, x_val_id, y_val_id, x_val_ood, y_val_ood = get_dataset(args['dataset_path'] , args['dataset_name'])
num_features = x_train.shape[1]

In [6]:
model_pool = DecisionTreeEnsemble(args['num_classifiers'], 
                                  args['feature_fraction'],
                                  args['data_fraction'],
                                  args['max_depth'],
                                  args['min_samples_leaf'],
                                  args['random_state'])

# train model pool
model_pool.train(x_train, y_train)

In [10]:
generator = SyntheticDataGenerator(x_train, y_train)

interp_x, interp_y = generator.interpolate(x_train.shape[0])
gmm_x, gmm_y = generator.gaussian_mixture(x_train.shape[0])
dt_x, dt_y = generator.decision_tree(x_train.shape[0])

In [12]:
dt_x.shape

(790, 58)

In [13]:
dt_y.shape

(790,)