In [1]:
import numpy as np
import pandas as pd

from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA

import plotly.plotly as py
from plotly.graph_objs import *

In [2]:
coords = pd.DataFrame(columns=['x', 'y', 'z'])

In [3]:
# Start with some coordinates
coords.loc['A'] = [0, 0, 0]
coords.loc['B'] = [12, 15, 18]
coords.loc['C'] = [25, 35, 28]
coords.loc['F'] = [21, 12, 29]
coords.loc['E'] = [21, 14, 15]
coords.loc['J'] = [22, 32, 27]

In [4]:
coords

Unnamed: 0,x,y,z
A,0,0,0
B,12,15,18
C,25,35,28
F,21,12,29
E,21,14,15
J,22,32,27


In [5]:
# Compute the distance matrix 
similarities = euclidean_distances(coords.values)
similarities = pd.DataFrame(similarities, index=coords.index,
                            columns=coords.index)

In [6]:
similarities

Unnamed: 0,A,B,C,F,E,J
A,0.0,26.324893,51.32251,37.762415,29.359837,47.296934
B,26.324893,0.0,25.865034,14.525839,9.539392,21.679483
C,51.32251,25.865034,0.0,23.366643,25.019992,4.358899
F,37.762415,14.525839,23.366643,0.0,14.142136,20.124612
E,29.359837,9.539392,25.019992,14.142136,0.0,21.656408
J,47.296934,21.679483,4.358899,20.124612,21.656408,0.0


In [7]:
# Multi-dimensional scaling 
# distance matrix ----> coordinates on some basis
mds = manifold.MDS(n_components=3, max_iter=3000, eps=1e-9,
                   dissimilarity='precomputed', n_jobs=1)

pos = mds.fit(similarities.values).embedding_

In [8]:
# Rotate both sets of coordinates to the same basis in order to compare them

clf = PCA(n_components=3)
coords_rotated = clf.fit_transform(coords.values)
pos_rotated = clf.fit_transform(pos)

coords_rotated = pd.DataFrame(coords_rotated, 
                              index=coords.index,
                              columns=coords.columns)

pos_rotated = pd.DataFrame(pos_rotated,
                           index=coords.index,
                           columns=coords.columns)

In [9]:
# Original points
coords_rotated

Unnamed: 0,x,y,z
A,31.152687,-4.051572,-0.840229
B,5.161337,-0.362556,-2.800048
C,-20.153147,-4.657603,0.319169
F,-3.29474,11.396498,-1.695772
E,3.275963,1.763572,6.306266
J,-16.142101,-4.088339,-1.289386


In [10]:
# Points obtained from multi-dimensional scaling (MDS)
pos_rotated

Unnamed: 0,x,y,z
A,31.152716,-4.051502,-0.840006
B,5.161292,-0.362393,-2.800622
C,-20.153397,-4.657092,0.318458
F,-3.294788,11.396541,-1.695452
E,3.276087,1.763302,6.305963
J,-16.14191,-4.088856,-1.288341


In [11]:
# Distance matrix of the original coordinates after rotation
pd.DataFrame(euclidean_distances(coords_rotated),
             index=coords.index, columns=coords.index)

Unnamed: 0,A,B,C,F,E,J
A,0.0,26.324893,51.32251,37.762415,29.359837,47.296934
B,26.324893,0.0,25.865034,14.525839,9.539392,21.679483
C,51.32251,25.865034,0.0,23.366643,25.019992,4.358899
F,37.762415,14.525839,23.366643,0.0,14.142136,20.124612
E,29.359837,9.539392,25.019992,14.142136,0.0,21.656408
J,47.296934,21.679483,4.358899,20.124612,21.656408,0.0


In [12]:
# Distance matrix of the MDS coordinates after rotation
pd.DataFrame(euclidean_distances(pos_rotated),
             index=coords.index, columns=coords.index)

Unnamed: 0,A,B,C,F,E,J
A,0.0,26.325038,51.322762,37.762472,29.359551,47.296765
B,26.325038,0.0,25.865161,14.525812,9.539522,21.679482
C,51.322762,25.865161,0.0,23.366379,25.020239,4.358523
F,37.762472,14.525812,23.366379,0.0,14.142077,20.124905
E,29.359551,9.539522,25.020239,14.142077,0.0,21.655942
J,47.296765,21.679482,4.358523,20.124905,21.655942,0.0


In [13]:
# Visualise the two sets of coordinates

label = coords.index

trace0 = Scatter3d(
    x=coords_rotated['x'],
    y=coords_rotated['y'],
    z=coords_rotated['z'],
    mode='markers',
    opacity=.5,
    text=label,
    marker=dict(size=2),
    name="original")

trace1 = Scatter3d(
    x=pos_rotated['x'],
    y=pos_rotated['y'],
    z=pos_rotated['z'],
    mode='markers',
    opacity=.3,
    text=label,
    name='from distance matrix')

trace2 = Scatter3d(
    x=coords['x'],
    y=coords['y'],
    z=coords['z'],
    mode='markers',
    opacity=.5,
    text=label,
    marker=dict(size=2, color='red'),
    name='ooorginal')

data = Data([trace0, trace1])

py.iplot(data)