# Dimension analysis:
a) Repeat the following example 2.10 on next page by using CorrDim and compare the results

In [1]:
import pyEDAkit as kit

help(kit.IntrinsicDimensionality.corr_dim)

Help on function corr_dim in module pyEDAkit.IntrinsicDimensionality:

corr_dim(X)



In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
from joblib import Memory
memory = Memory("../cache", verbose=0)

@memory.cache
def genLDdata(seed: int | None = 1):
  if seed is not None:
    np.random.seed(seed)
  # Sample from the surface of a sphere
  X1, X2, X3 = np.random.randn(3, 1000)
  lambda_ = np.sqrt(X1**2 + X2**2 + X3**2)
  X1, X2, X3 = X1 / lambda_, X2 / lambda_, X3 / lambda_
  X = np.column_stack((X1, X2, X3))
  
  # Sample from a cube
  X1, X2, X3 = np.random.rand(3, 1000) + 2
  XX = np.column_stack((X1, X2, X3))
  
  # Sample from lines attached to a sphere
  L1 = np.column_stack((np.zeros(1000), np.zeros(1000), 2 * np.random.rand(1000) + 1))
  L2 = np.column_stack((np.zeros(1000), np.zeros(1000), -2 * np.random.rand(1000) - 1))
  L3 = np.column_stack((np.zeros(1000), 2 * np.random.rand(1000) + 1, np.zeros(1000)))
  L4 = np.column_stack((np.zeros(1000), -2 * np.random.rand(1000) - 1, np.zeros(1000)))
  
  A = np.vstack((X, XX, L1, L2, L3, L4))
  return A

A = genLDdata()
A_df = pd.DataFrame(A, columns=["x", "y", "z"])
fig = px.scatter_3d(A_df, x="x", y="y", z="z", color="z")
fig.update_traces(marker=dict(size=1.5))
fig.show()

In [3]:
print("global intrinsic dimensionality (MLE):", kit.IntrinsicDimensionality.MLE(A))

global intrinsic dimensionality (MLE): 1.522632055740565


In [4]:
import scipy.spatial.distance as dist

@memory.cache
def compute_local_intrinsic_dimensionality(A, method, k=100):
  # Compute pairwise Euclidean distances
  Ad = dist.squareform(dist.pdist(A))
  
  # Get the dimensions of A
  nr, nc = A.shape
  Ldim = np.zeros(nr)
  
  # Sort distances and get indices
  Ads = np.sort(Ad, axis=1)
  J = np.argsort(Ad, axis=1)
  
  # Compute local intrinsic dimensionality
  for m in range(nr):
    Ldim[m] = method(A[J[m, :k], :])
  
  # Adjust local dimensions
  Ldim[Ldim > 3] = 4
  Ldim = np.ceil(Ldim).astype(int)
  
  # Tabulate results
  unique, counts = np.unique(Ldim, return_counts=True)
  percentages = (counts / nr) * 100
  tabulation_df = pd.DataFrame({'Dimension': unique, 'Count': counts, 'Percentage': np.round(percentages, 3)})
  
  return Ldim, tabulation_df
Ldim, tabulation_df = compute_local_intrinsic_dimensionality(A, kit.IntrinsicDimensionality.MLE)
tabulation_df

Unnamed: 0,Dimension,Count,Percentage
0,1,2798,46.633
1,2,1922,32.033
2,3,1257,20.95
3,4,23,0.383


In [5]:
import plotly.graph_objects as go
# Scatter plot with color map
colors = {1: 'red', 2: 'green', 3: 'blue', 4: 'black'}
fig = go.Figure()
labels = [1, 2, 3, 4]
for label in labels:
    indices = np.where(Ldim == label)[0]
    if len(indices) > 0:
        fig.add_trace(go.Scatter3d(
            x=A[indices, 0], y=A[indices, 1], z=A[indices, 2],
            mode='markers',
            marker=dict(color=colors[label], size=2),
            name=f"Dim {label}"
        ))

fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'), title="Intrinsic Dimensionality Scatterplot with MLE")
fig.show()

In [6]:
(kit.IntrinsicDimensionality.MLE(A), kit.IntrinsicDimensionality.corr_dim(A))

(np.float64(1.522632055740565), np.float64(0.9994435007154894))

In [7]:
Ldim, tabulation_df = compute_local_intrinsic_dimensionality(A, kit.IntrinsicDimensionality.corr_dim)
tabulation_df

Unnamed: 0,Dimension,Count,Percentage
0,1,2749,45.817
1,2,2170,36.167
2,3,1080,18.0
3,4,1,0.017


In [8]:
import plotly.graph_objects as go
# Scatter plot with color map
colors = {1: 'red', 2: 'green', 3: 'blue', 4: 'black'}
fig = go.Figure()
labels = [1, 2, 3, 4]
for label in labels:
    indices = np.where(Ldim == label)[0]
    if len(indices) > 0:
        fig.add_trace(go.Scatter3d(
            x=A[indices, 0], y=A[indices, 1], z=A[indices, 2],
            mode='markers',
            marker=dict(color=colors[label], size=2),
            name=f"Dim {label}"
        ))

fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'), title="Intrinsic Dimensionality Scatterplot with corr_dim")
fig.show()

b) Modify the code in genLDdata.m so that:

- The sphere will be replaced by an ellipsoid, e.g.
$$
\frac{x_1^2}{4^2} + \frac{x_2^2}{5^2} + \frac{x_3^2}{6^2} = 1
$$

- The vertical segment will be replaced by the curve,

$$
x_1 = \frac{t \cos(t^2)}{1 + t^2}, \quad 
x_2 = \frac{t \sin(t^2)}{1 + t^2}, \quad 
x_3 = t, \quad -2\pi \leq t \leq 2\pi
$$

- The horizontal segment will be replaced by a segment connecting the ellipsoid and the cub[e].

I have no idea what they mean by "a segment connecting the ellipsoid and the cube", so I will connect a random point of the 2 (not equally distanced)

In [9]:
@memory.cache
def genLDdata_mod(seed: int | None = 1):
  if seed is not None:
    np.random.seed(seed)
  
  # Sample from the surface of an ellipsoid
  X1, X2, X3 = np.random.randn(3, 1000)
  lambda_ = np.sqrt((X1/4)**2 + (X2/5)**2 + (X3/6)**2)
  X1, X2, X3 = X1 / lambda_, X2 / lambda_, X3 / lambda_
  X = np.column_stack((X1, X2, X3))
  
  # Sample from a cube
  X1, X2, X3 = np.random.rand(3, 1000) + 2
  XX = np.column_stack((X1, X2, X3))
  
  # Sample of the curve
  t = np.random.uniform(-2*np.pi, 2*np.pi, 1000)
  X1 = (t * np.cos(t))/(1 + t**2)
  X2 = (t * np.sin(t))/(1 + t**2)
  X3 = t
  X_curve = np.column_stack((X1, X2, X3))
  
  # Sample from segment connecting ellipsoid and the cube
  point1 = X[np.random.randint(len(X))]
  point2 = XX[np.random.randint(len(XX))]
  t = np.linspace(0, 1, 1000)
  # linear interpolation
  X1 = point1[0] + t * (point2[0] - point1[0])
  X2 = point1[1] + t * (point2[1] - point1[1])
  X3 = point1[2] + t * (point2[2] - point1[2])
  X_segment = np.column_stack((X1, X2, X3))
  
  A = np.vstack((X, XX, X_curve, X_segment))
  return A


A = genLDdata_mod()
A_df = pd.DataFrame(A, columns=["x", "y", "z"])
fig = px.scatter_3d(A_df, x="x", y="y", z="z", color="z")
fig.update_traces(marker=dict(size=1.5))
fig.show()

Then study the intrinsic dim by _PackningNumbers_ and also the local dim

In [10]:
help(kit.IntrinsicDimensionality.packing_numbers)

Help on function packing_numbers in module pyEDAkit.IntrinsicDimensionality:

packing_numbers(X)



In [11]:
kit.IntrinsicDimensionality.packing_numbers(A)

np.float64(0.6936865065488824)

In [14]:
Ldim, tabulation_df = compute_local_intrinsic_dimensionality(A, kit.IntrinsicDimensionality.packing_numbers)
tabulation_df

Unnamed: 0,Dimension,Count,Percentage
0,1,2936,73.4
1,2,665,16.625
2,3,399,9.975


In [13]:
import plotly.graph_objects as go
# Scatter plot with color map
colors = {1: 'red', 2: 'green', 3: 'blue', 4: 'black'}
fig = go.Figure()
labels = [1, 2, 3, 4]
for label in labels:
    indices = np.where(Ldim == label)[0]
    if len(indices) > 0:
        fig.add_trace(go.Scatter3d(
            x=A[indices, 0], y=A[indices, 1], z=A[indices, 2],
            mode='markers',
            marker=dict(color=colors[label], size=2),
            name=f"Dim {label}"
        ))

fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'), title="Intrinsic Dimensionality Scatterplot with corr_dim")
fig.show()