In [4]:
from ddks.data import *
from ddks.data.openimages_dataset import LS
from ddks import methods
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt
import tqdm
import pickle
import os
plt.style.use('ah')
plt.show()

In [12]:
methods_list = [methods.ddks_method,  methods.onedks_method, methods.hotelling_method, methods.kldiv_method]
datasets = [DVU, LS]
dimensions = np.arange(1, 10) + 1

def within(x, y, eps=1.0E-3):
    return np.abs(x - y) < eps

def return_mean_significance(_method, Dataset, dimension, sample_size, permutations=100):
    _dataset = Dataset(dimension=dimension, sample_size=sample_size)
    p, t = next(_dataset)
    return _method(p, t, permutations)

def bisection(_method, Dataset, dimension, permutations=100, trials=10, max_sample_size=100):
    mids = []
    for i in tqdm.tqdm(np.arange(trials)):
        low = 2
        high = 100
        mid = int((low + high) / 2)
        low_sig = return_mean_significance(_method, Dataset, dimension, low, permutations)
        high_sig = return_mean_significance(_method, Dataset, dimension, high, permutations)
        mid_sig = return_mean_significance(_method, Dataset, dimension, mid, permutations)
        significance = 1.0
        while not within(mid_sig, 0.05):
            if (high - low) <= 2:
                if (np.abs(high_sig - 0.05) < np.abs(mid_sig - 0.05)) \
                        and (np.abs(high_sig - 0.05) < np.abs(low_sig - 0.05)):
                    mid = high
                    mid_sig = high_sig
                elif (np.abs(low_sig - 0.05) < np.abs(mid_sig - 0.05)) \
                        and (np.abs(low_sig - 0.05) < np.abs(mid_sig - 0.05)):
                    mid = low
                    mid_sig = low_sig
                break
            if low_sig > 0.05 and mid_sig < 0.05:
                new_mid = int((low + mid) / 2)
                high = mid
                high_sig = mid_sig
                mid = new_mid
                mid_sig = return_mean_significance(_method, Dataset, dimension, mid, permutations)
            elif mid_sig > 0.05 and high_sig < 0.05:
                new_mid = int((mid + high) / 2)
                low = mid
                low_sig = mid_sig
                mid = new_mid
                mid_sig = return_mean_significance(_method, Dataset, dimension, mid, permutations)
            else:
                if high < max_sample_size:
                    high = high + 50
                    mid = int((low + high) / 2)
                    low_sig = return_mean_significance(_method, Dataset, dimension, low, permutations)
                    high_sig = return_mean_significance(_method, Dataset, dimension, high, permutations)
                    mid_sig = return_mean_significance(_method, Dataset, dimension, mid, permutations)
                else:
                    mid = np.nan
                    break
        mids.append(mid)

    return np.nanmean(mids), np.nanstd(mids), np.sum(np.isfinite(mids))
        

In [13]:
data = {}
for dataset in datasets:
    print(dataset)
    if os.path.exists(f'ddks_dims_{dataset.name}.pkl'):
        data = pickle.load(open(f'ddks_dims_{dataset.name}.pkl', 'rb'))
    else:
        for dimension in dimensions:
            data[str(dimension)] = dict()
            for metric in methods_list:
                data[str(dimension)][metric.name] = bisection(metric, dataset, dimension=dimension)
                print(str(dimension), metric.name, data[str(dimension)][metric.name])
        pickle.dump(data, open(f'ddks_dims_{dataset.name}.pkl', 'wb'))


100%|██████████| 10/10 [00:18<00:00,  1.89s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

2 ddKS (19.2, 4.894895300208167, 10)


100%|██████████| 10/10 [00:17<00:00,  1.72s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

2 OneDKS (21.9, 12.003749414245533, 10)


100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

2 Hotelling-T2 (10.3, 4.539823785126467, 10)


100%|██████████| 10/10 [00:09<00:00,  1.10it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

2 KLDiv (61.2, 22.489108474992957, 5)


100%|██████████| 10/10 [00:13<00:00,  1.40s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

3 ddKS (18.2, 7.180529228406497, 10)


100%|██████████| 10/10 [00:22<00:00,  2.24s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

3 OneDKS (21.7, 15.440531078949325, 10)


100%|██████████| 10/10 [00:06<00:00,  1.51it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

3 Hotelling-T2 (19.3, 5.950630218724736, 10)


100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

3 KLDiv (67.66666666666667, 20.973528288985833, 6)


100%|██████████| 10/10 [01:32<00:00,  9.21s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

4 ddKS (28.3, 12.712592182556632, 10)


100%|██████████| 10/10 [00:29<00:00,  2.99s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

4 OneDKS (20.4, 3.32264954516723, 10)


100%|██████████| 10/10 [00:07<00:00,  1.30it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

4 Hotelling-T2 (15.0, 5.138093031466052, 10)


100%|██████████| 10/10 [00:12<00:00,  1.24s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

4 KLDiv (93.0, 7.0, 2)


100%|██████████| 10/10 [03:50<00:00, 23.05s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

5 ddKS (24.1, 10.454185764563398, 10)


100%|██████████| 10/10 [00:35<00:00,  3.59s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

5 OneDKS (21.9, 4.90815647672321, 10)


100%|██████████| 10/10 [00:08<00:00,  1.23it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

5 Hotelling-T2 (10.7, 3.689173349139344, 10)


100%|██████████| 10/10 [00:32<00:00,  3.25s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

5 KLDiv (nan, nan, 0)


100%|██████████| 10/10 [08:24<00:00, 50.47s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

6 ddKS (24.5, 4.780167361086848, 10)


100%|██████████| 10/10 [00:38<00:00,  3.83s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

6 OneDKS (18.7, 6.229767250869008, 10)


100%|██████████| 10/10 [00:07<00:00,  1.35it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

6 Hotelling-T2 (10.3, 3.0347981810987035, 10)


 20%|██        | 2/10 [01:21<05:24, 40.59s/it]
2021-04-13  16:02:05 ERROR Internal Python error in the inspect module.
Below is the traceback from this internal error.

2021-04-13  16:02:05 INFO 
Unfortunately, your original traceback can not be constructed.



Traceback (most recent call last):
  File "/Users/hage581/sw/miniconda/envs/ddks/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-d1567e03f165>", line 9, in <module>
    data[str(dimension)][metric.name] = bisection(metric, dataset, dimension=dimension)
  File "<ipython-input-12-77eb87273b89>", line 20, in bisection
    high_sig = return_mean_significance(_method, Dataset, dimension, high, permutations)
  File "<ipython-input-12-77eb87273b89>", line 11, in return_mean_significance
    return _method(p, t, permutations)
  File "/Users/hage581/projects/belle2/ddks/ddks/methods/__init__.py", line 46, in __call__
    return self.significance_function(p, t, j=j)
  File "/Users/hage581/projects/belle2/ddks/ddks/methods/__init__.py", line 25, in __call__
    scores.append(self.score_function(_p, _t))
  File "/Users/hage581/projects/belle2/ddks/ddks/methods/__init__.py", line 86

TypeError: object of type 'NoneType' has no len()