Example exam problem from notebook 13 - high dimensionality.

In [7]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider

# Do not rename the function below just replace what is XXX
def random_projection(X,d,k):
    '''Perform a random projection of the dataset X from 
    d-dimensions into k-dimensions'''
    assert X.shape[1] == d, "The array X does not have the shape (n_samples,d)"
    
    R = np.random.normal(0, 1, size=(d, k))
    R = R / np.linalg.norm(R, axis=0)

    f = X@R

    return f

# Perform the random projection
X = np.random.rand(200, 1000)
X_projected = random_projection(X, d=1000, k=10)

print(X_projected)

[[-0.19201899 -0.28668523  0.87315508 ... -0.85674057  0.4503255
   0.17738849]
 [-0.10113037 -0.30530471  0.92933661 ...  0.19111759  0.30048915
   0.65478168]
 [ 0.30333869 -0.52253684  1.19864907 ... -0.12763389  0.32304158
   0.63944475]
 ...
 [ 0.57374731  0.14230777  0.71275657 ...  0.38570811  0.55228939
   1.25061172]
 [ 0.30761452  0.01229702  0.80700594 ... -0.06182759  0.93265816
   0.75816139]
 [ 0.200113    0.10683814  0.95071797 ... -0.04223969  1.20221133
   0.81223284]]


In [6]:
# small test to see that you output the correct shape
import numpy as np
X_test = np.array([[1,1],[2,1],[3,1],[4,1]])
try:
    assert(random_projection(X_test,2,1).shape == (X_test.shape[0],1))
    print("You are outputting the correct dimension, hopefully it is correct.")
except AssertionError:
    print("Try again! and make sure you are producing an output of (n_samples,k)")

You are outputting the correct dimension, hopefully it is correct.


Below is taken straight from the notebook, just for testing purposes

In [4]:
# Lets simulate

@interact
def _(d=IntSlider(100,100,2000,100),k=IntSlider(2,2,100,1)):
    np.random.seed(1)
    v_pre = np.random.normal(size=d)
    v = v_pre / np.linalg.norm(v_pre)

    print("v has length: %.2f" % np.linalg.norm(v))

    num_simulations = 300

    error = []

    for i in range(num_simulations):
        uis = np.random.normal(size=(k,d))
        f = uis@v
        error.append(abs(np.linalg.norm(f)-np.linalg.norm(v)*np.sqrt(k))/(np.linalg.norm(v)*np.sqrt(k)))
    _=plt.hist(error)

    # P=histogram(error)
    # P.xmax(1)
    # P.show()

interactive(children=(IntSlider(value=100, description='d', max=2000, min=100, step=100), IntSlider(value=2, d…