In [2]:
import numpy as np
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D

In [3]:
# Basic example matrix
A = np.array([[-2, 2, 3],
              [ 4, 5, 6],
              [ 5,-1, 6],
              [ 3, 2, 3]])

## SVD on A (n x m) matrix

Complexity O(n * m^2) or O(n^2 * m)

In [4]:
# SVD
U, s, Vt = np.linalg.svd(A, full_matrices=True)

print(U, f"{U.shape}\n")
print(s, f"{s.shape}\n")
print(Vt, f"{Vt.shape}\n")

[[ 0.15705129 -0.59885217 -0.7606344   0.19531073]
 [ 0.70126166 -0.42291818  0.36377979 -0.44388802]
 [ 0.58117952  0.67998046 -0.43815005 -0.0887776 ]
 [ 0.38183418 -0.01195262  0.31164741  0.87002051]] (4, 4)

[12.06340941  4.94481239  2.83248721] (3,)

[[ 0.54232962  0.33182159  0.77186334]
 [ 0.58042164 -0.81220087 -0.05865554]
 [ 0.6074449   0.47981682 -0.63307702]] (3, 3)



In [5]:
print("Proof U * S * V")
np.dot(U, np.diag(np.hstack([s, [0]])))[:,0:3].dot(Vt)

Proof U * S * V


array([[-2.,  2.,  3.],
       [ 4.,  5.,  6.],
       [ 5., -1.,  6.],
       [ 3.,  2.,  3.]])

In [6]:
# Reduce to k dim
k = 2

Uk = U[:, :k]
sk = s[:k]
Vtk = Vt[:k, :]

print(Uk, f"{Uk.shape}\n")
print(sk, f"{sk.shape}\n")
print(Vtk, f"{Vtk.shape}\n")

[[ 0.15705129 -0.59885217]
 [ 0.70126166 -0.42291818]
 [ 0.58117952  0.67998046]
 [ 0.38183418 -0.01195262]] (4, 2)

[12.06340941  4.94481239] (2,)

[[ 0.54232962  0.33182159  0.77186334]
 [ 0.58042164 -0.81220087 -0.05865554]] (2, 3)



In [7]:
import plotly.graph_objects as go

#### Original points (matrix)

In [8]:
A

array([[-2,  2,  3],
       [ 4,  5,  6],
       [ 5, -1,  6],
       [ 3,  2,  3]])

In [9]:
fig = go.Figure( 
    data = [
            go.Scatter3d( 
                x=A[:, 0],
                y=A[:, 1],
                z=A[:, 2],
                mode='markers',
                name='Original'
            )
        ]
    )
fig.show()

#### Reconstruction

In [10]:
print(f"Reconstruction using {k=}")
A_rec = np.dot(Uk, np.diag(sk)).dot(Vtk)
A_rec

Reconstruction using k=2


array([[-0.69126773,  3.03375921,  1.63604366],
       [ 3.3740878 ,  4.50559598,  6.65232357],
       [ 5.75387218, -0.40452121,  5.21431697],
       [ 2.46378573,  1.5764478 ,  3.55884069]])

In [11]:
fig = go.Figure( 
    data = [
            go.Scatter3d( 
                x=A[:, 0],
                y=A[:, 1],
                z=A[:, 2],
                mode='markers',
                name='Original'
            ),
            go.Scatter3d( 
                x=A_rec[:, 0],
                y=A_rec[:, 1],
                z=A_rec[:, 2],
                mode='markers',
                name='Reconstructed'
            )
        ]
    )
fig.show()

#### Projection (2d)

In [12]:
print("Projection")
A_proj = np.dot(Uk, np.diag(sk))
A_proj

Projection


array([[ 1.89457397, -2.96121163],
       [ 8.45960647, -2.09125103],
       [ 7.01100655,  3.36237581],
       [ 4.60622206, -0.05910345]])

In [13]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=A_proj[:, 0], y=A_proj[:, 1],
    name='Projection',
    mode='markers'
))

In [14]:
fig = go.Figure( 
    data = [
            go.Scatter3d( 
                x=A[:, 0],
                y=A[:, 1],
                z=A[:, 2],
                mode='markers',
                name='Original'
            ),
            go.Scatter3d( 
                x=A_rec[:, 0],
                y=A_rec[:, 1],
                z=A_rec[:, 2],
                mode='markers',
                name='Reconstructed'
            ),
            go.Scatter3d( 
                x=A_proj[:, 0],
                y=A_proj[:, 1],
                z=A_rec[:, 1]*0,
                mode='markers',
                name=f'Projection for {k=}'
            )
        ]
    )
fig.show()

#### Adding a new point to 3d original space

In [15]:
# New data
new_data_vector = np.array([5,4,5]) # Close to [ 4,  5,  6] -> 2nd row of A

In [16]:
fig = go.Figure( 
    data = [
            go.Scatter3d( 
                x=A[:, 0],
                y=A[:, 1],
                z=A[:, 2],
                mode='markers',
                name='Original'
            ),
            go.Scatter3d( 
                x=A_rec[:, 0],
                y=A_rec[:, 1],
                z=A_rec[:, 2],
                mode='markers',
                name='Reconstructed'
            ),
            go.Scatter3d( 
                x=[new_data_vector[0]],
                y=[new_data_vector[1]],
                z=[new_data_vector[2]],
                mode='markers',
                name='New point'
            )
        ]
    )
fig.show()

#### Project new data vector onto reduced space

In [17]:
projected_vector = np.dot([new_data_vector], Vtk.T)
projected_vector

array([[ 7.89825116, -0.63997299]])

In [18]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=A_proj[:, 0], y=A_proj[:, 1],
    name='Projections',
    mode='markers'
))

fig.add_trace(go.Scatter(
    x=projected_vector[:, 0], y=projected_vector[:, 1],
    name='Projection of new point',
    mode='markers'
))

In [19]:
fig = go.Figure( 
    data = [
            go.Scatter3d( 
                x=A[:, 0],
                y=A[:, 1],
                z=A[:, 2],
                mode='markers',
                name='Original 3d'
            ),
            go.Scatter3d( 
                x=A_rec[:, 0],
                y=A_rec[:, 1],
                z=A_rec[:, 2],
                mode='markers',
                name='Reconstructed 3d'
            ),
            go.Scatter3d( 
                x=A_proj[:, 0],
                y=A_proj[:, 1],
                z=A_rec[:, 1]*0,
                mode='markers',
                name=f'Projection for {k=}'
            ),
            go.Scatter3d( 
                x=[new_data_vector[0]],
                y=[new_data_vector[1]],
                z=[new_data_vector[2]],
                mode='markers',
                name='New point 3d'
            ),
            go.Scatter3d( 
                x=[projected_vector[0, 1]],
                y=[projected_vector[0, 1]],
                z=[0],
                mode='markers',
                name=f'New point projected for {k=}'
            )
        ]
    )
fig.show()

## KDTree

In [20]:
from sklearn.neighbors import KDTree

In [21]:
# KDTree using projection
tree = KDTree(A_proj)

In [22]:
# Query
dist, ind = tree.query(projected_vector, k=1)

closest_point_reduced = A_proj[ind[0][0]]
closest_point_reduced

array([ 8.45960647, -2.09125103])

In [23]:
# Recover original vector from most closed point on the projected space
closest_point_original = np.dot(closest_point_reduced, Vtk)

In [24]:
print("New data point:", new_data_vector)
print("\nOriginal matrix:\n", A)

print("\nReconstructed matrix:\n", np.round(A_rec, 1))

print("\nNew data point projected:", np.round(projected_vector[0], 1))
print("\nProjection matrix:\n", np.round(A_proj, 1))


print("\nOriginal vector to reduced (2d) dim space:", np.round(closest_point_reduced, 1))
print("Original vector to reconstructed (3d) space", np.round(closest_point_original,1))


New data point: [5 4 5]

Original matrix:
 [[-2  2  3]
 [ 4  5  6]
 [ 5 -1  6]
 [ 3  2  3]]

Reconstructed matrix:
 [[-0.7  3.   1.6]
 [ 3.4  4.5  6.7]
 [ 5.8 -0.4  5.2]
 [ 2.5  1.6  3.6]]

New data point projected: [ 7.9 -0.6]

Projection matrix:
 [[ 1.9 -3. ]
 [ 8.5 -2.1]
 [ 7.   3.4]
 [ 4.6 -0.1]]

Original vector to reduced (2d) dim space: [ 8.5 -2.1]
Original vector to reconstructed (3d) space [3.4 4.5 6.7]
