In [1]:
#import os
from typing import Callable, List, Tuple, Dict

from imodels.util.data_util import get_clean_dataset
from joblib import Parallel, delayed
from tqdm import tqdm

import pandas as pd
import numpy as np
from _simulate_data import simulate_data
from _util import TreeBasedModel
from shap import TreeExplainer
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from _run_single_replication import run_single_replication, _get_best_lambda
from _run_experiment import run_experiment
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from adhs import ShrinkageClassifier
import ipdb


## Find the optimal lambda for the simulated data



In [9]:
N=100;p=12#for testing !
N=1000;p=50
X, y, rlvFtrs = simulate_data(N, p)
X = X.to_numpy()
#X_train, X_test, y_train, y_test = train_test_split(
#    X, y, test_size=0.2, stratify=y)

### Single Tree

In [4]:
lambdas = [0.0, 0.1, 1.0, 10.0, 25.0, 50.0, 100.0]
N=1000;p=50
X, y, rlvFtrs = simulate_data(N, p)
X = X.to_numpy()

run_single_replication(
    X,
    y,
    DecisionTreeClassifier() ,#base_estimator: TreeBasedModel,
    ["hs", "hs_entropy", "hs_permutation"], #shrink_modes: List[str],
    lambdas,#lambdas: List[float],
    "classification",#problem_type: str,
    roc_auc_score, #score_fn: Callable,
    individual_trees=True
)

[{'shrink_mode': 'hs',
  'lambda': 100.0,
  'num_trees': 1,
  'ROC AUC': 0.5450321395775941},
 {'shrink_mode': 'hs_entropy',
  'lambda': 100.0,
  'num_trees': 1,
  'ROC AUC': 0.5537190082644627},
 {'shrink_mode': 'hs_permutation',
  'lambda': 100.0,
  'num_trees': 1,
  'ROC AUC': 0.5369880624426078}]

## Forest

In [9]:
lambdas = [0.0, 0.1, 1.0, 10.0, 25.0, 50.0, 100.0]
N=1000;p=50
X, y, rlvFtrs = simulate_data(N, p)
X = X.to_numpy()

results = run_single_replication(
    X,
    y,
    RandomForestClassifier(n_estimators=10) ,#base_estimator: TreeBasedModel,
    ["hs", "hs_entropy", "hs_permutation"], #shrink_modes: List[str],
    lambdas,#lambdas: List[float],
    "classification",#problem_type: str,
    roc_auc_score, #score_fn: Callable,
    individual_trees=True
)

In [14]:
len(results)#[2]
results[2]

{'shrink_mode': 'hs',
 'lambda': 0.1,
 'num_trees': 3,
 'ROC AUC': 0.45334558823529414}

In [None]:
lambdas = [0.0, 0.1, 1.0, 10.0, 25.0, 50.0, 100.0]
N=1000;p=50
X, y, rlvFtrs = simulate_data(N, p)
X = X.to_numpy()

run_single_replication(
    X,
    y,
    RandomForestClassifier(n_estimators=10) ,#base_estimator: TreeBasedModel,
    ["hs", "hs_entropy", "hs_permutation"], #shrink_modes: List[str],
    lambdas,#lambdas: List[float],
    "classification",#problem_type: str,
    roc_auc_score, #score_fn: Callable,
    individual_trees=False
)

In [2]:
run_experiment(
            [("heart", "heart", "imodels")],
            RandomForestClassifier(),
            ["hs", "hs_entropy", "hs_permutation"],
            [1,10],
            "classification",
            roc_auc_score,
            1,
            2,
            "output",
            "classification_rf",
        )

Running classification_rf:   0%|          | 0/1 [00:00<?, ?it/s, dataset=heart]

fetching heart from imodels


Running classification_rf: 100%|██████████| 1/1 [01:58<00:00, 118.25s/it, dataset=heart]


In [3]:
run_experiment(
            [("sim", "sim", "sim")],
            RandomForestClassifier(),
            ["hs", "hs_entropy", "hs_permutation"],
            [1,10],
            "classification",
            roc_auc_score,
            1,
            2,
            "output",
            "classification_rf",
        )

Running classification_rf: 100%|██████████| 1/1 [00:53<00:00, 53.57s/it, dataset=sim]
