In [None]:
"""
Compute intrinsic dimension of different datasets
"""

In [None]:
# environment setup

# imports
from src.vizutils import *
from src.dataset import *
from src.utils import *
import os
import random
from tqdm import tqdm

# torch
import torch

# GPUs
device_ids = [0] # indices of devices for models, data and otherwise
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(i) for i in device_ids)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
use_cuda = torch.cuda.is_available()
print('running on {}'.format(device))

# set random seed
seed = 1337
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [None]:
# load dataset and loader
dataset_name =  'prostate'
img_size = 224
dataset_size = 20000
dataset = get_datasets(dataset_name, subset_size=dataset_size, img_size=img_size)#7500)#, img_size=32)
dataloader_viz = DataLoader(dataset, batch_size=1, shuffle=True)

In [None]:
# logging
log_dir = 'logs/dimensionality/{}'.format(dataset_name)
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
logger = Logger('custom', log_dir)
logger.write_msg('extrinsic size = {}\tdataset size = {}'.format(img_size, dataset_size))

In [None]:
# view some data
for batch_idx, (data, target) in enumerate(dataloader_viz):
    data = data.to(device)
    target = target.to(device)
    plot_imgbatch(data)
    print(target)
    break

In [None]:
def estimate_intrinsic_dim(dataset, estimator, batchsize=10024):
    """
    Imports code from https://github.com/ppope/dimensions, unmodified.
    """
    cur_dir = os.getcwd()
    os.chdir('dimensions')
    
    from main import run_mle, run_geomle, run_twonn#, run_shortest_path
    from argparse import Namespace
    
    args = Namespace(
        estimator=estimator,
        k1=25,
        k2=55,# default
        single_k=True,
        eval_every_k=True,
        average_inverse=True,
        max_num_samples=1000,
        save_path='logs/{}_{}_log.json'.format(dataset_name, estimator),
        
        # likely will not need to change these from default vals:
        anchor_samples=0, # 0 for using all samples from the training set
        anchor_ratio=0, # 0 for using all samples from the training set
        bsize=batchsize, #batch size for previous images
        n_workers=1,
        
        # GeoMLE args
        nb_iter1=1,
        nb_iter2=20,
        inv_mle=False
    )
    
    
    if estimator == "mle":
        results = run_mle(args, dataset)
    else:
        raise NotImplementedError
        
    os.chdir(cur_dir)
    return results

In [None]:
estimators = ['mle']
for estimator in estimators[:1]:
    results = estimate_intrinsic_dim(dataset, estimator, batchsize=10000)
    logger.write_msg(str(results))