In [None]:
######################################################
## MDS - MultiDimensional Scaling
## 
## I think of MDS as more of a unsupervized visualization technique
## Reduce the dimensions of our data (almost always 2d)
## objective is to keep the similar objects close to each other
## ------------------------------------
## brings back the concept of distance
## ------------------------------------
## 
## My first exposure years ago: distances between airports - reconstruct map(metric = true observed distance, default in sklearn)
## 
##
## Inputs
## a feature/attribute dataset (traditional) or distance matrix (if we pass this, set dissimilarity='precomputed')
##
## Why MDS?
## visual representation of the objects to help understand relationships
## Applications
## Survey research = ask respondents to rate various items
##                   visually represent the items spatially and percieved sim/diffs
##                   helps get a sense of the dataset or to show how certain items are related



In [None]:
# bring in the "classic" iris dataset
iris = load_iris()

In [None]:
type(iris)

In [None]:
X = iris.data
y = iris.target

ylabs = [iris.target_names[i] for i in y]

In [None]:
mds = MDS(2)
im = mds.fit_transform(X)

In [None]:
sns.scatterplot(x=im[:,0], y=im[:,1], hue=ylabs)

In [None]:
# you can see how mds is able to put the data into a 2d space, and 
# even though its a well discussed dataset, it helps take a 4d dataset
# into 2d and helps visuailze the grouping of the categories

# This is one of the goals of the UML techniques we will wrap up with.
# reduce the space, but keep natural ordering, mostly through the use of distance!



In [None]:
####### MDS Hands-on exericse
##
## grab the mtcars dataset from Big Query
## questrom.datasets.mtcars
## apply mds to get 2 new coordinates
## plot the new 2d space
## BONUS if you can overlay the labels!

cars = pd.read_gbq("select * from `questrom.datasets.mtcars`", "questrom")



In [None]:
# cars.index = cars.model
# del cars['model']

mds = MDS()
ce = mds.fit_transform(cars)

In [None]:
type(ce)
ce.shape

# plot it up
plt.figure(figsize=(10,6))
sns.scatterplot(x=ce[:,0], y = ce[:,1])
for i,model in enumerate(cars.index):
  plt.text(x=ce[i, 0], y=ce[i, 1], s=model)
plt.show()

In [None]:
###################### MDS summary
# other examples - applications
# 
# example of how "relationahips/distances" are retained -> 
#     https://ars.els-cdn.com/content/image/1-s2.0-S092234879680030X-gr1.gif
# 
# example in consumer research ->
#     https://46gyn61z4i0t1u1pnq2bbk2e-wpengine.netdna-ssl.com/wp-content/uploads/2018/07/MDS-non-metric.png
#     https://www.displayr.com/what-is-multidimensional-scaling-mds/
#
# Our square datasets tend to talk about distance and summaries of rows (pdist)
# but invert it!   transpose the dataset to think about column values, especially when we dummy encode categories
# in short, think about what you want to summarize, b/c its all about the axis, not just rows!
# 
