In [1]:
from dstaster import *

<h2>Visualizing metrics</h2>

A useful way to understand different metrics is to look at their <i>unit circles</i>: the set of all points that lie at distance one from the origin. For the euclidean distance, this is simply a circle of radius one&mdash;but for other metrics, the unit &lsquo;circle&rsquo; takes on a different shape.

In the plot below we measure the distance from the gray disks to the origin (marked green). The disks that are at distance one are coloured pink, those that are almost at distance one blue and the ones that have a distance much larger or much smaller than one are kept gray.

<div class="task">
    <div class="no">1</div>
    <div class="text">
        Use the interactive plot below to investigate how the different unit circles of the four different metrics look like.
    </div>
</div>


In [2]:
def vis_metric(metric):
    xs, ys = [], []
    for y in range(40):
        for i in range(40):
            ys.append(y)
            if y % 2 == 1:
                x = i + .5
            else:
                x = i
            xs.append(x)

    xs = np.array(xs) / 10 - 2
    ys = np.array(ys) / 10 - 2

    if metric == 'Euclidean':
        dist = np.sqrt(xs**2 + ys**2)
    elif metric == 'Manhattan':
        dist = np.abs(xs)+np.abs(ys)
    elif metric == 'Hamming':
        dist = 1*(xs != 0) + 1*(ys != 0)
    elif metric == 'Chebyshev':
        dist = np.max(np.abs(np.dstack([xs,ys])), axis=2)[0]
    cols = cmap_distance(dist/2)

    fig, ax = plt.subplots(dpi=300, figsize=(4.5,4.5))
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_bounds(-1.5,1.5)
    ax.spines['bottom'].set_bounds(-1.5,1.5)
    ax.set_xticks(np.arange(-1.5, 1.51, 0.5))
    ax.set_yticks(np.arange(-1.5, 1.51, 0.5))

    ax.scatter(xs, ys, s=20, c=cols)

config = {
    'options': ['Euclidean', 'Manhattan', 'Hamming', 'Chebyshev'],
    'value': 'Euclidean',
    'description': 'Metrics'
}

layout = {
    'width': '60%',
}

choice = widgets.RadioButtons(**config,layout=layout)

interact(vis_metric, metric=choice)
pass

interactive(children=(RadioButtons(description='Metrics', layout=Layout(width='60%'), options=('Euclidean', 'M…

A common use of metrics is to group or partition data points according to their distance to a few selected points often called <b>centres</b>. As you can imagined from the previous example, the choice of metrics has an influence of how this partition looks like, in particular for those data points that are close to more than one centre.

In the output below, you should see five points marked with <b>X</b>s, these are our centres. The circles around them are our data and we assign each circle to the centre it is closest to according to the selected metric. The partition is indicted by the colour of the circles.

<div class="task">
    <div class="no">2</div>
    <div class="text">
        Use the interactive plot below to investigate how the different metrics influence the partition of the data.
    </div>
</div>


In [3]:
import random
xs, ys = [], []

random.seed(42)

centroids = [(0,0), (1,4), (-3,1),(-2,-4),(4,0)]

xs, ys = [], []
for y in range(40):
    for i in range(40):
        ys.append(y)
        if y % 2 == 1:
            x = i + .5
        else:
            x = i
        xs.append(x)
    
xs = np.array(xs)/4 - 5 
ys = np.array(ys)/4 - 5
cxs = np.array([x for x,_ in centroids])
cys = np.array([y for _,y in centroids])

ccols = np.array([colors[n] for n in ['blue', 'green', 'yellow', 'purple','pink']])

def vis_metric(metric):
    dists = []
    for x,y in centroids:
        if metric == 'Euclidean':
            dist = np.sqrt((xs-x)**2 + (ys-y)**2)
        elif metric == 'Manhattan':
            dist = np.abs(xs-x)+np.abs(ys-y)
        elif metric == 'Chebyshev':
            dist = np.max(np.abs(np.dstack([xs-x,ys-y])), axis=2)[0] 
        dists.append(dist)

    cols = np.argmin(np.stack(dists), axis=0)
    
    fig, ax = plt.subplots(dpi=300, figsize=(4.5,4.5))
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_bounds(-4,4)
    ax.spines['bottom'].set_bounds(-4,4)
    ax.set_xticks(np.arange(-4, 4.01, 1))
    ax.set_yticks(np.arange(-4, 4.01, 1))                                   
                                   

    ax.scatter(xs, ys, s=20, c=ccols[cols])
    ax.scatter(cxs, cys, c=ccols, s=120, marker='X', lw=1, ec='black')
    ax.axis('equal')

config = {
    'options': ['Euclidean', 'Manhattan', 'Chebyshev'],
    'value': 'Euclidean',
    'description': 'Metrics'
}

layout = {
    'width': '60%',
}

choice = widgets.RadioButtons(**config,layout=layout)

interact(vis_metric, metric=choice)
pass

interactive(children=(RadioButtons(description='Metrics', layout=Layout(width='60%'), options=('Euclidean', 'M…