In [6]:
import pandas as pd
import numpy as np
from sklearn.manifold import MDS

#perform multi-dimensional scaling
mds = MDS(random_state=0, dissimilarity='precomputed', normalized_stress="auto")

# MDS Algorithm

1. Compute the distance matrix D
2. Compute the matrix B = -0.5 * (D^2)
3. Apply double centering to B
4. Compute the eigenvalues and eigenvectors of B
5. Choose the k largest eigenvalues and their corresponding eigenvectors
6. Construct the matrix X from the eigenvectors
7. Return X

In [7]:
data = np.array([
    [0, 570, 710, 1550, 210, 890, 5050, 1550, 1090, 1170],
    [570,0,520, 1000, 540, 730, 4570, 1080, 1450, 1480],
    [710, 520, 0, 1020, 830, 1240, 4360, 1520, 1790, 1080],
    [1550,1000,1020, 0, 1540, 1470, 3600, 1090, 2410, 2060],
    [210,540,830,1540, 0, 680, 5100, 1040, 960, 1380],
    [890,730,1240,1470,680, 0, 5050, 850, 1030, 2040],
    [5050, 4570, 4360, 3600, 5100, 5050, 0, 4380, 6010, 4900],
    [1550, 1080, 1520, 1090, 1040, 850, 4380, 0, 1870, 2560],
    [1090, 1450, 1790, 2410, 960, 1030, 6010, 1870, 0, 2050],
    [1170, 1480, 1080, 2060, 1380, 2040, 4900, 2560, 2050, 0]
])
df = pd.DataFrame(data)
cities = ["London", "Berlin", "Oslo", "Moscow", "Paris", "Rome", "Beijing", "Istanbul", "Gibraltar", "Reykjavik"]
df.index = cities
df.columns = cities
df

Unnamed: 0,London,Berlin,Oslo,Moscow,Paris,Rome,Beijing,Istanbul,Gibraltar,Reykjavik
London,0,570,710,1550,210,890,5050,1550,1090,1170
Berlin,570,0,520,1000,540,730,4570,1080,1450,1480
Oslo,710,520,0,1020,830,1240,4360,1520,1790,1080
Moscow,1550,1000,1020,0,1540,1470,3600,1090,2410,2060
Paris,210,540,830,1540,0,680,5100,1040,960,1380
Rome,890,730,1240,1470,680,0,5050,850,1030,2040
Beijing,5050,4570,4360,3600,5100,5050,0,4380,6010,4900
Istanbul,1550,1080,1520,1090,1040,850,4380,0,1870,2560
Gibraltar,1090,1450,1790,2410,960,1030,6010,1870,0,2050
Reykjavik,1170,1480,1080,2060,1380,2040,4900,2560,2050,0


In [10]:
fit_data = mds.fit_transform(df) # fit data into 2D space
fit_data

array([[  238.78595987,  -787.77592661],
       [  -40.05306339,  -286.86861619],
       [  438.79006279,   -95.51606666],
       [ -224.92894556,   699.22061076],
       [  -30.75465147,  -784.47945943],
       [ -647.18479938,  -723.46637742],
       [  223.45931712,  4255.24234373],
       [-1041.51105416,    -5.40496647],
       [ -311.80660026, -1714.41989195],
       [ 1395.20377444,  -556.53164976]])

In [11]:
fit_df = pd.DataFrame(fit_data)
fit_df.columns = ["x", "y"]
fit_df["cities"] = cities
fit_df

Unnamed: 0,x,y,cities
0,238.78596,-787.775927,London
1,-40.053063,-286.868616,Berlin
2,438.790063,-95.516067,Oslo
3,-224.928946,699.220611,Moscow
4,-30.754651,-784.479459,Paris
5,-647.184799,-723.466377,Rome
6,223.459317,4255.242344,Beijing
7,-1041.511054,-5.404966,Istanbul
8,-311.8066,-1714.419892,Gibraltar
9,1395.203774,-556.53165,Reykjavik


In [13]:
fit_df.iloc[0]

x          238.78596
y        -787.775927
cities        London
Name: 0, dtype: object

In [14]:
import plotly.express as px

In [15]:
px.scatter(fit_df, x="x", y="y", text = "cities", hover_name="cities")

In [None]:
# - MDS algorithm is used to reduce the dimensionality of the data, by finding a lower-dimensional representation of the data that preserves the distances between the data points as well as possible.