In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import math
import os
import sys
from collections import defaultdict
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
from joblib import Parallel, delayed
from scipy.integrate import nquad, quad, simps
from scipy.stats import entropy, gamma, multivariate_normal, norm
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import normalize
from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import FeatureImportanceForestClassifier, build_coleman_forest
from sktree.tree import DecisionTreeClassifier

In [3]:
SIMULATIONS = {
    "1": {},
    "2": {"m_factor": 1},
    "3": {"band_type": "ar", "rho": 0.5},
    "4": {"band_type": "ar", "m_factor": 1, "rho": 0.5},
}

In [6]:
n_estimators = 500
seed = 1234
n_jobs = -1

n_jobs_trees = 1

In [7]:
NON_OOB_MODEL_NAMES = {
    "might-honestfraction05-og": {
        "n_estimators": 500,
        "random_state": seed,
        "honest_fraction": 0.5,
        "n_jobs": n_jobs_trees,
        "bootstrap": False,
        "stratify": True,
        # "max_samples": ,
        "permute_per_tree": False,
    },
}

OOB_MODEL_NAMES = {
    "might-honestfraction05-bootstrap-permuteonce": {
        "n_estimators": n_estimators,
        "random_state": seed,
        "honest_fraction": 0.5,
        "n_jobs": n_jobs_trees,
        "bootstrap": True,
        "stratify": True,
        "max_samples": 1.6,
        "permute_per_tree": False,
    },
    # "might-honestfraction05-bootstrap": {
    #     "n_estimators": n_estimators,
    #     "random_state": seed,
    #     "honest_fraction": 0.5,
    #     "n_jobs": n_jobs_trees,
    #     "bootstrap": True,
    #     "stratify": True,
    #     "max_samples": 1.6,
    #     "permute_per_tree": True,
    # },
    # "might-honestfraction025-bootstrap": {
    #     "n_estimators": n_estimators,
    #     "random_state": seed,
    #     "honest_fraction": 0.25,
    #     "n_jobs": n_jobs_trees,
    #     "bootstrap": True,
    #     "stratify": True,
    #     "max_samples": 1.6,
    #     "permute_per_tree": True,
    # },
    # "might-honestfraction075-bootstrap": {
    #     "n_estimators": n_estimators,
    #     "random_state": seed,
    #     "honest_fraction": 0.75,
    #     "n_jobs": n_jobs_trees,
    #     "bootstrap": True,
    #     "stratify": True,
    #     "max_samples": 1.6,
    #     "permute_per_tree": True,
    # },
}

In [9]:
SAMP_SIZES = [2**i for i in range(8, 12)]
SAMP_SIZES_plot = [2 * 2**i for i in range(8, 10)]
DIM = 4**6
REPS = range(100, 1000)
P = 4096
# SAVE_PATH = "/Volumes/Extreme Pro/cancer/output/power_exp/might-honestfraction05-bootstrap-maxsamples16-permutepertree"
SAVE_PATH = "/Volumes/Extreme Pro/cancer/output/power_exp/"
print(SAMP_SIZES)
n_repeats = 200

SIM_TYPES = ["trunk", "trunk-overlap"]

[256, 512, 1024, 2048]


In [11]:
# fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(28, 14))
results = defaultdict(list)
n_dims = 4096


REPs = 20
ALPHA = 0.05
for model_name in OOB_MODEL_NAMES.keys():
    for sim in SIM_TYPES:
        power_MIGHT = []
        for n_samples in SAMP_SIZES:
            pvals = []
            for idx in range(n_repeats):
                file_name = (
                    Path(SAVE_PATH)
                    / model_name
                    / sim_name
                    / f"might_{sim_name}_{n_samples}_{n_dims}_{idx}.npz"
                )

                if not os.path.exists(file_name):
                    continue

                data = np.load(file_name)
                print(dict(data).keys())
                pval = data["might_pvalue"]

                results["pvalue"].append(pval)
                results["n_samples"].append(n_samples)
                # results['n_dims'].append(n_dims)
                results["sim_name"].append(sim_anme)
                results["model_name"].append(model_name)
                results["idx"].append(idx)

NameError: name 'Path' is not defined

In [236]:

# plt.rcParams["font.size"] = 10
# fig, axs = plt.subplots(1, 1, figsize=(15, 5))
# axs.plot(
#     SAMP_SIZES_plot,
#     power_MIGHT,
#     "blue",
#     marker="o",
#     markersize=2,
#     label="MIGHT",
#     linewidth=2,
#     alpha=0.5,
# )
# # axs[int(sim) - 1].plot(
# #     SAMP_SIZES_plot,
# #     power_MIGHT_Coleman,
# #     "green",
# #     marker="o",
# #     markersize=2,
# #     label="MIGHT_Coleman",
# #     linewidth=2,
# #     alpha=0.5,
# # )
# # axs[int(sim) - 1].plot(
# #     SAMP_SIZES_plot,
# #     power_MIGHT_Coleman_pertree,
# #     "red",
# #     marker="o",
# #     markersize=2,
# #     label="MIGHT_Coleman_Pertree",
# #     linewidth=2,
# #     alpha=0.5,
# # )
# axs.plot(
#     SAMP_SIZES_plot,
#     ALPHA * np.ones(len(SAMP_SIZES_plot)),
#     "black",
#     linestyle="dashed",
#     marker="o",
#     markersize=2,
#     label="Alpha",
#     linewidth=2,
#     alpha=0.5,
# )
# axs.set(
#     ylim=[0, 1.05],
#     xticks=[200, 500, 1000],
#     xlabel="Sample Size",
# )
# # axs[int(sim) - 1].set_ylim(0, 1.05)
# # axs[int(sim) - 1].set_yticks([])
# # axs[int(sim) - 1].set_xticks([])
# # axs[int(sim) - 1].set_yticks([0, 1])
# # axs[int(sim) - 1].set_xticks([200, 500, 1000])
# # axs[int(sim) - 1].set_xlabel("Sample Size")

# if sim == "1":
#     axs[int(sim) - 1].set_ylabel("Power at 4096 Dimensions")
#     axs[int(sim) - 1].set_title("TRUNK")
# if sim == "2":
#     axs.set_title("TRUNK-Overlap with MIGHT-withbootstrap-Coleman-permutepertree")

# if sim == "3":
# axs[int(sim) - 1].set_title("TRUNK-Banbed")
# axs[int(sim) - 1].legend(bbox_to_anchor=(1.8, 1), loc="upper right")
# # np.savetxt("{}/null_dist_{}_{}_{}.txt".format(SAVE_PATH, sim, 'MIGHT',rep), obs_stats_samplesize, delimiter=",")
# # np.savetxt("{}/null_dist_{}_{}_{}.txt".format(SAVE_PATH, sim, 'MIGHT',rep), alt_stats_samplesize, delimiter=",")

[0.03482587064676617, 0.0845771144278607, 0.03980099502487562, 0.04477611940298507]


In [239]:
print(power_MIGHT)

[0.06965174129353234, 0.06965174129353234, 0.06467661691542288, 0.05970149253731343]
