In [1]:
import warnings
warnings.simplefilter('ignore', FutureWarning)
import numpy as np
import scipy as scp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import (permutation_test_score, learning_curve, LeaveOneGroupOut,
                                     KFold, cross_val_score, cross_val_predict, cross_validate,
                                     train_test_split)
from sklearn.utils import parallel_backend
from sklearn.base import clone
from sklearn import datasets
from joblib.parallel import Parallel, delayed
import pickle
from permutation_helpers import random_data_gen, post_hoc_permutation, simulate
from dask.distributed import progress, Client



## Setup

In [None]:
## IRIS dataset
# X_iris, y_iris = datasets.load_iris(return_X_y=True)
# mask = y_iris < 2
# X_iris, y_iris = X_iris[mask], y_iris[mask]

In [None]:
X = np.load("random_data_X.npy")
y = np.load("random_data_y.npy")

In [None]:
estimator = LogisticRegressionCV(class_weight='balanced', Cs=4)
cv = KFold(n_splits=3, shuffle=True, random_state=0)
n_permutations = 5000

## Pre-training permutation

In [None]:
score, null, p = permutation_test_score(estimator=estimator,
                                        X=X, y=y,
                                        cv = cv,
                                        n_permutations=n_permutations,
                                        scoring='roc_auc',
                                        verbose=True, n_jobs=-1
                                       )

In [None]:
null

## Post-training permutation

In [None]:
y_pred = cross_val_predict(clone(estimator), X, y, cv=cv, method='predict_proba')
# post_score, post_null, post_p = post_hoc_permutation(y, y_pred[:, 1], n_jobs=-1, verbose=True, n_permutations=n_permutations)

In [None]:
cross_val_score(clone(estimator), X, y, cv=cv, scoring="roc_auc", verbose=1)

Above permutes across CV folds. If we want to permute within CV folds we need to use something like the below and run the permutation function each time, then aggregate

In [None]:
from permutation_helpers import post_hoc_permutation_cv

## Running simulations

In [3]:
@simulate(parameter_range=np.linspace(0, 5), n_sim=10, client=Client())
def scale(param=None, x=None, intercept=None, seed=None):
    # if np.random.randn()>0:
    #     return None
    return param*x + intercept

Running 10 simulations
using dask client at http://127.0.0.1:8787/status


In [4]:
# try:
#     client.shutdown()
#     client = Client()
# except:
#     client = Client()
result = scale(x=np.array([1, 2, 4]), intercept=2)
result

Running 10 simulations
Using dask client at http://127.0.0.1:8787/status
500 parallel jobs


([<Future: pending, key: scale-5161944533a84565037563cef54d136e>,
  <Future: pending, key: scale-41e5df86842a69c850466cf2d272c8b1>,
  <Future: pending, key: scale-35defe9616b5941a8b2f79f1f963ecbe>,
  <Future: pending, key: scale-b89a8ab1bf32d2057a460ea187d36eea>,
  <Future: pending, key: scale-68729f502bddf463e135e312eeebfce1>,
  <Future: pending, key: scale-5b655d7839af9f601342441efe6980be>,
  <Future: pending, key: scale-be3e2127192820188cb0f780689d7171>,
  <Future: pending, key: scale-57da071cff37646d151f621b2e47b451>,
  <Future: pending, key: scale-eef3efac0adc9bb1f0524a369c678fb3>,
  <Future: pending, key: scale-bafd0731cf79175ca3863c0766b9acfe>,
  <Future: pending, key: scale-b6d01cee9f491ad0e46b97a8d3951809>,
  <Future: pending, key: scale-85ffaa4655c22402321c0eab112802d3>,
  <Future: pending, key: scale-ae9be0ea083d3f291921a07c3e784dd7>,
  <Future: pending, key: scale-dd540a8d8ce9fc42ecfdc79b5f8b5bb7>,
  <Future: pending, key: scale-cdcf82b7bcc4619609bc403e72ee34a3>,
  <Future:

In [6]:
result[1](result[0])

{0.0: {0: array([2., 2., 2.]),
  1: array([2., 2., 2.]),
  2: array([2., 2., 2.]),
  3: array([2., 2., 2.]),
  4: array([2., 2., 2.]),
  5: array([2., 2., 2.]),
  6: array([2., 2., 2.]),
  7: array([2., 2., 2.]),
  8: array([2., 2., 2.]),
  9: array([2., 2., 2.])},
 0.10204081632653061: {0: array([2.10204082, 2.20408163, 2.40816327]),
  1: array([2.10204082, 2.20408163, 2.40816327]),
  2: array([2.10204082, 2.20408163, 2.40816327]),
  3: array([2.10204082, 2.20408163, 2.40816327]),
  4: array([2.10204082, 2.20408163, 2.40816327]),
  5: array([2.10204082, 2.20408163, 2.40816327]),
  6: array([2.10204082, 2.20408163, 2.40816327]),
  7: array([2.10204082, 2.20408163, 2.40816327]),
  8: array([2.10204082, 2.20408163, 2.40816327]),
  9: array([2.10204082, 2.20408163, 2.40816327])},
 0.20408163265306123: {0: array([2.20408163, 2.40816327, 2.81632653]),
  1: array([2.20408163, 2.40816327, 2.81632653]),
  2: array([2.20408163, 2.40816327, 2.81632653]),
  3: array([2.20408163, 2.40816327, 2.816

In [None]:
with open("simulation_results.pkl", "wb") as f:
    pickle.dump(result, f)

## Simulate permutations with different params

In [2]:
import cmldask.CMLDask as da
rhino_client = da.new_dask_client(
    job_name="simulations",
    memory_per_job="2.5GB",
    max_n_jobs=150, threads_per_job=10, 
    adapt=False,
    local_directory="/home1/jrudoler/dask-worker-space",
    log_directory="/home1/jrudoler/logs/",
    resource_spec="h_vmem=2.5G,s_vmem=2.5G"
    )

Unique port for jrudoler is 51360
{'dashboard_address': ':51360'}
To view the dashboard, run: 
`ssh -fN jrudoler@rhino2.psych.upenn.edu -L 8000:192.168.86.146:51360` in your local computer's terminal (NOT rhino) 
and then navigate to localhost:8000 in your browser
You've chosen to scale your cluster manually. This means workers will continue to run until you manually shut them down. Remember to run `client.shutdown` after you're done computing and no longer need to reserve resources.


In [3]:
"hi"

'hi'

In [20]:
rhino_client.shutdown()

In [3]:
rhino_client.cluster.scale(50)

In [4]:
@simulate(parameter_range=np.logspace(2, 5, 5).astype(int), n_sim=500)
def simulate_samplesize(param=None, seed=None):
    X, y = random_data_gen(n_samples=param, n_feats=10, maha=0., ratio=0.5, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
#     estimator = LogisticRegressionCV(class_weight='balanced', Cs=6)
    estimator = LogisticRegression(class_weight='balanced', C=1e-3)
#     cv = KFold(n_splits=5, shuffle=True, random_state=0)
    n_permutations = 5000
    estimator.fit(X=X_train, y=y_train)
    y_pred = estimator.predict_proba(X_test)[:, 1]
    score, permutation_scores, pvalue = post_hoc_permutation(
        y_true=y_test, y_score=y_pred,
        n_permutations=n_permutations, n_jobs=-1,
        )
    return score, permutation_scores, pvalue

@simulate(parameter_range=np.linspace(0., 1.5, 5), n_sim=500)
def simulate_maha(param=None, seed=None):
    X, y = random_data_gen(n_samples=1000, n_feats=10, maha=param, ratio=0.5, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
#     estimator = LogisticRegressionCV(class_weight='balanced', Cs=6)
#     estimator = LogisticRegression(class_weight='balanced', C=1e-3)
    estimator = LogisticRegression(class_weight='balanced', C=1e-3)
#     cv = KFold(n_splits=5, shuffle=True, random_state=0)
    n_permutations = 5000
    estimator.fit(X=X_train, y=y_train)
    y_pred = estimator.predict_proba(X_test)[:, 1]
    score, permutation_scores, pvalue = post_hoc_permutation(
        y_true=y_test, y_score=y_pred,
        n_permutations=n_permutations, n_jobs=-1,
        )
    return score, permutation_scores, pvalue

@simulate(parameter_range=np.logspace(1, 10, 5, base=2).astype(int), n_sim=500)
def simulate_nfeats(param=None, seed=None):
    X, y = random_data_gen(n_samples=1000, n_feats=param, maha=0., ratio=0.5, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
#     estimator = LogisticRegressionCV(class_weight='balanced', Cs=6)
    estimator = LogisticRegression(class_weight='balanced', C=1e-3)
#     cv = KFold(n_splits=5, shuffle=True, random_state=0)
    n_permutations = 5000
    estimator.fit(X=X_train, y=y_train)
    y_pred = estimator.predict_proba(X_test)[:, 1]
    score, permutation_scores, pvalue = post_hoc_permutation(
        y_true=y_test, y_score=y_pred,
        n_permutations=n_permutations, n_jobs=-1,
        )
    return score, permutation_scores, pvalue

@simulate(parameter_range=np.linspace(.1, .9, 5), n_sim=500)
def simulate_ratio(param=None, seed=None):
    X, y = random_data_gen(n_samples=1000, n_feats=10, maha=0., ratio=param, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
#     estimator = LogisticRegressionCV(class_weight='balanced', Cs=6)
    estimator = LogisticRegression(class_weight='balanced', C=1e-3)
#     cv = KFold(n_splits=5, shuffle=True, random_state=0)
    n_permutations = 5000
    estimator.fit(X=X_train, y=y_train)
    y_pred = estimator.predict_proba(X_test)[:, 1]
    score, permutation_scores, pvalue = post_hoc_permutation(
        y_true=y_test, y_score=y_pred,
        n_permutations=n_permutations, n_jobs=-1,
        )
    return score, permutation_scores, pvalue
    
    

Running 500 simulations
using dask client at http://192.168.86.146:51360/status
Running 500 simulations
using dask client at http://192.168.86.146:51360/status
Running 500 simulations
using dask client at http://192.168.86.146:51360/status
Running 500 simulations
using dask client at http://192.168.86.146:51360/status


In [5]:
samplesize_futures, samplesize_gather = simulate_samplesize()
nfeats_futures, nfeats_gather = simulate_nfeats()
maha_futures, maha_gather = simulate_maha()
ratio_futures, ratio_gather = simulate_ratio()

Running 500 simulations
Using dask client at http://192.168.86.146:51360/status
2500 parallel jobs
Running 500 simulations
Using dask client at http://192.168.86.146:51360/status
2500 parallel jobs
Running 500 simulations
Using dask client at http://192.168.86.146:51360/status
2500 parallel jobs
Running 500 simulations
Using dask client at http://192.168.86.146:51360/status
2500 parallel jobs


In [34]:
samplesize_result = samplesize_gather(samplesize_futures) 
df_result = pd.DataFrame(samplesize_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_samplesize_post.pkl")

In [35]:
nfeats_result = nfeats_gather(nfeats_futures)
df_result = pd.DataFrame(nfeats_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_nfeats_post.pkl")

In [36]:
maha_result = maha_gather(maha_futures)
df_result = pd.DataFrame(maha_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_maha_post.pkl")

In [37]:
ratio_result = ratio_gather(ratio_futures)
df_result = pd.DataFrame(ratio_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_ratio_post.pkl")

In [7]:
rhino_client.shutdown()

2023-01-22 16:53:00,147 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


## Compare with original permutation test

In [5]:
def _train_score(estimator, X_train, X_test, y_train, y_test, 
                score_func, shuffle_labels=False):
    if shuffle_labels:
        indices = np.random.default_rng().permutation(len(y_train))
        y_train = y_train[indices]
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict_proba(X_test)[:,1]
    score = score_func(y_true=y_test, y_score=y_pred)
    return score



def pre_training_permutation(estimator, X_train, X_test, y_train, y_test,
                            n_permutations, score_func, verbose=False, n_jobs=None):
    score = _train_score(
        clone(estimator), X_train, X_test, y_train, y_test, score_func, shuffle_labels=False
    )
    permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(_train_score)(
            clone(estimator),
            X_train, X_test, y_train, y_test,
            score_func,
            shuffle_labels=True,
        )
        for _ in range(n_permutations)
    )
    permutation_scores = np.array(permutation_scores)
    pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1)
    return score, permutation_scores, pvalue


In [6]:
@simulate(parameter_range=np.linspace(0., 1.5, 5), n_sim=100)
def simulate_maha_pre(param=None, seed=None):
    X, y = random_data_gen(n_samples=1000, n_feats=10, maha=param, ratio=0.5, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
    estimator = LogisticRegression(class_weight='balanced', C=1e-3)
    n_permutations = 5000
    score, null, p = pre_training_permutation(
        estimator,
        X_train, X_test, y_train, y_test,
        n_permutations=n_permutations,
        score_func=roc_auc_score,
        verbose=True, n_jobs=-1
    )
    return score, null, p

@simulate(parameter_range=np.logspace(1, 10, 5, base=2).astype(int), n_sim=100)
def simulate_nfeats_pre(param=None, seed=None):
    X, y = random_data_gen(n_samples=1000, n_feats=param, maha=0., ratio=0.5, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
    estimator = LogisticRegression(class_weight='balanced', C=1e-3)
    n_permutations = 5000
    score, null, p = pre_training_permutation(
        estimator,
        X_train, X_test, y_train, y_test,
        n_permutations=n_permutations,
        score_func=roc_auc_score,
        verbose=True, n_jobs=-1
    )
    return score, null, p

@simulate(parameter_range=np.linspace(.1, .9, 5), n_sim=100)
def simulate_ratio_pre(param=None, seed=None):
    X, y = random_data_gen(n_samples=1000, n_feats=10, maha=0., ratio=param, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
    estimator = LogisticRegression(class_weight='balanced', C=1e-3)
    n_permutations = 5000
    score, null, p = pre_training_permutation(
        estimator,
        X_train, X_test, y_train, y_test,
        n_permutations=n_permutations,
        score_func=roc_auc_score,
        verbose=True, n_jobs=-1
    )
    return score, null, p

@simulate(parameter_range=np.logspace(2, 5, 5).astype(int), n_sim=100)
def simulate_samplesize_pre(param=None, seed=None):
    X, y = random_data_gen(n_samples=param, n_feats=10, maha=0., ratio=0.5, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
    estimator = LogisticRegression(class_weight='balanced', C=1e-3)
    n_permutations = 5000
    score, null, p = pre_training_permutation(
        estimator,
        X_train, X_test, y_train, y_test,
        n_permutations=n_permutations,
        score_func=roc_auc_score,
        verbose=True, n_jobs=-1
    )
    return score, null, p

Running 100 simulations
using dask client at http://192.168.86.143:51360/status
Running 100 simulations
using dask client at http://192.168.86.143:51360/status
Running 100 simulations
using dask client at http://192.168.86.143:51360/status
Running 100 simulations
using dask client at http://192.168.86.143:51360/status


In [7]:
maha_futures_pre, maha_gather = simulate_maha_pre()
samplesize_futures_pre, samplesize_gather = simulate_samplesize_pre()
nfeats_futures_pre, nfeats_gather = simulate_nfeats_pre()
ratio_futures_pre, ratio_gather = simulate_ratio_pre()

Running 100 simulations
Using dask client at http://192.168.86.143:51360/status
500 parallel jobs
Running 100 simulations
Using dask client at http://192.168.86.143:51360/status
500 parallel jobs
Running 100 simulations
Using dask client at http://192.168.86.143:51360/status
500 parallel jobs
Running 100 simulations
Using dask client at http://192.168.86.143:51360/status
500 parallel jobs


In [39]:
maha_result = maha_gather(maha_futures)
df_result = pd.DataFrame(maha_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_maha_pre.pkl")

In [13]:
da.get_exceptions(maha_futures, range(len(maha_futures)))

Unnamed: 0_level_0,exception,traceback_obj
param,Unnamed: 1_level_1,Unnamed: 2_level_1
5,KilledWorker('simulate_maha_pre-1ba5d3d93c306f...,
22,KilledWorker('simulate_maha_pre-9b29c0ce724cd9...,
24,KilledWorker('simulate_maha_pre-8ba680caabb617...,
26,KilledWorker('simulate_maha_pre-874dbed58779d2...,
27,KilledWorker('simulate_maha_pre-abd7c0f6e90a7f...,
...,...,...
2485,KilledWorker('simulate_maha_pre-d5208410d26bab...,
2486,KilledWorker('simulate_maha_pre-c18beba6601b72...,
2489,KilledWorker('simulate_maha_pre-1071e9dc45356a...,
2495,KilledWorker('simulate_maha_pre-6db98636abb197...,


In [9]:
rhino_client.shutdown()

2023-01-20 10:25:17,215 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


In [29]:
rhino_client.rebalance()

In [28]:
rhino_client.cancel(nfeats_futures_pre + samplesize_futures_pre)

In [24]:
samplesize_result = samplesize_gather(samplesize_futures_pre) 
df_result = pd.DataFrame(samplesize_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_samplesize_pre.pkl")

In [25]:
nfeats_result = nfeats_gather(nfeats_futures_pre)
df_result = pd.DataFrame(nfeats_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_nfeats_pre.pkl")

In [9]:
maha_result = maha_gather(maha_futures_pre)
df_result = pd.DataFrame(maha_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_maha_pre.pkl")

In [10]:
ratio_result = ratio_gather(ratio_futures_pre)
df_result = pd.DataFrame(ratio_result).melt(var_name="param")
df_result[["score", "perm_scores", "pval"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')
df_result.to_pickle("simulate_ratio_pre.pkl")

## Next steps
* Re-rerun post-hoc simulations with null maha distance
* Check out over/under-fitting with more or less regularization values. Compare to theoretical auc based on mahalanobis distance
* Check bias of null model (proportion of false positives - at different $\alpha$ thresholds [.1, .05, .01, .005])
* Check power of the test for a non-null model (positive mahalanobis distance) (proportion of predicted positives)
* Compare bias and power/sensitivity to the pre-trained model

## Farther along
* See how class balance, number of samples, etc. effect the above

In [17]:
import xarray as xr

In [None]:
[maha for data in ]

In [32]:
df_result._metadata = {"hi":"joey"}

In [36]:
df_result.to_csv("test_metadata.csv")

In [35]:
pd.read_pickle("test_metadata.pkl")._metadata

{'hi': 'joey'}

# Hyperparameter tuning

In [2]:
import cmldask.CMLDask as da
rhino_client = da.new_dask_client(
    job_name="C_tuning",
    memory_per_job="1.5GB",
    max_n_jobs=5,
    threads_per_job=1, 
    adapt=False,
    local_directory="/home1/jrudoler/",
    log_directory="/home1/jrudoler/logs/",
    resource_spec="h_vmem=1.5G,s_vmem=1.5G",
    job_extra=["-M jrudoler56@gmail.com", "-m b"]
)

Unique port for jrudoler is 51360
{'dashboard_address': ':51360'}
To view the dashboard, run: 
`ssh -fN jrudoler@rhino2.psych.upenn.edu -L 8000:192.168.86.146:51360` in your local computer's terminal (NOT rhino) 
and then navigate to localhost:8000 in your browser
You've chosen to scale your cluster manually. This means workers will continue to run until you manually shut them down. Remember to run `client.shutdown` after you're done computing and no longer need to reserve resources.


In [3]:
rhino_client.cluster.scale(50)

In [66]:
@simulate(parameter_range=np.linspace(0., 1.5, 5), n_sim=1000)
def test_best_C(param=None, seed=None):
    X, y = random_data_gen(n_samples=5000, n_feats=10, maha=param, ratio=0.5, seed=seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, shuffle=True)
    estimator = LogisticRegressionCV(class_weight='balanced', Cs=np.logspace(np.log10(1e-4), np.log10(1e4), 10))
    estimator.fit(X_train, y_train)
    pred = estimator.predict_proba(X_test)[:, 1]
    return roc_auc_score(y_true=y_test, y_score=pred), estimator.C_[0]

Running 1000 simulations
using dask client at http://192.168.86.146:51360/status


In [67]:
C_futures, C_gather = test_best_C()

Running 1000 simulations
Using dask client at http://192.168.86.146:51360/status
5000 parallel jobs


In [6]:
rhino_client.shutdown()

In [68]:
from dask.distributed import progress
progress(C_futures)

VBox()

In [69]:
C_results = C_gather(C_futures)

In [70]:
df_result = pd.DataFrame(C_results).melt(var_name="param")
df_result[["score", "C"]] = df_result['value'].apply(pd.Series)
df_result = df_result.drop(columns='value')

In [71]:
pd.options.display.max_rows=None
df_result.groupby(["param", "C"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,score,score,score,score,score,score,score,score
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
param,C,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
0.0,0.0001,309.0,0.499892,0.01861,0.44717,0.48731,0.50062,0.513995,0.550985
0.0,0.000774,149.0,0.498415,0.01835,0.451009,0.485321,0.498518,0.509826,0.551207
0.0,0.005995,150.0,0.500224,0.017105,0.455061,0.487642,0.499109,0.512641,0.546532
0.0,0.046416,125.0,0.498583,0.017024,0.454648,0.48756,0.498983,0.509701,0.551041
0.0,0.359381,99.0,0.500658,0.017665,0.467458,0.487938,0.497407,0.511088,0.552959
0.0,2.782559,62.0,0.498147,0.02035,0.459579,0.483843,0.498679,0.510677,0.545119
0.0,21.544347,44.0,0.497653,0.018439,0.46221,0.48549,0.497513,0.510053,0.536289
0.0,166.810054,37.0,0.494963,0.02079,0.461919,0.477921,0.492195,0.506316,0.543879
0.0,1291.549665,17.0,0.505258,0.01771,0.480168,0.491336,0.502444,0.514601,0.542907
0.0,10000.0,8.0,0.48616,0.025528,0.445049,0.480551,0.485583,0.48937,0.537311


In [53]:
np.logspace(np.log10(1e-4), np.log10(1e4), 10)

array([1.00000000e-04, 7.74263683e-04, 5.99484250e-03, 4.64158883e-02,
       3.59381366e-01, 2.78255940e+00, 2.15443469e+01, 1.66810054e+02,
       1.29154967e+03, 1.00000000e+04])