# Intrinsic dimension estimation experiments

In [None]:
import math_helpers as mh
import estimators
import numpy as np
import matplotlib.pyplot as plt

## How does the size of the noise affect the accuracy?

In [None]:
# Set parameters:
d = 2           # actual intrinsic dimension of manifold
D = d+1         # ambient dimension - since we're using spheres, these will all sit in R^(d+1)
m = 1000        # number of points in our data set sampled (noisily) from the manifold
eta = 1/(2.001*D)    # parameter for dimension estimation (threshold)
R_sphere = 20   # radius of sphere
s = 1       # size to bound the noise around the manifold
r = 5           # radius of open ball to use as neighbourhood when estimating dimension

seed = 32

In [None]:
print(eta)

In [None]:
num_tests = 10
ss = np.linspace(0, r, num_tests)
prop_correct = np.zeros(num_tests)

for i, s in enumerate(ss):
    X = mh.noisy_sphere(d, R_sphere, s, m, seed)
    results = estimators.tgt_and_dim_estimates(X, r, eta)
    estimated_dimensions = np.array([res[1] for res in results])
    prop_correct[i] = np.sum(estimated_dimensions == d) / m

In [None]:
plt.figure(figsize = (6, 6), dpi = 300)

plt.scatter(ss/R_sphere, prop_correct)
plt.xlabel(r"$s/\tau$")
plt.ylabel("Accuracy")
plt.title(f"Accuracy drops as noise level increases.")

plt.show()

## Higher dimensions

In [None]:
R_sphere = 20
d = 3
D = d+1
eta = 1/(2.001*D)    # parameter for dimension estimation (threshold)
r = 5
m = 1000*R_sphere # the surface area scales by approx R_sphere for each dimension we add
s = 0.2*R_sphere

In [None]:
print(f"{s=}")
print(f"{eta=}")

In [None]:
X = mh.noisy_sphere(d, R_sphere, s, m, seed)
np.save("X_S3.npy", X)

Now you have to do a little trickery in order to parallelise the computation: in this folder, open up the command line and run the dimension estimation script with `python .\run_estimation_script.py X_S3.npy <r> <eta> dim_estims_S3.npy [<verbose>]`, replacing `r` and `eta` with the actual values and setting `verbose` to True if you want to see more logging details as the script is running.

In [None]:
dim_estimates = np.load("dim_estims_S3.npy")

In [None]:
prop_correct_S3 = sum(dim_estimates == 3) / len(dim_estimates)
print("Proportion of correct estimates:", prop_correct_S3)