# Lab 1 - Color Search

The listings in this notebook are for the RGB color similarity exercises

In [None]:
import json
import math
from IPython.display import display, HTML

In [None]:
def show_colors(colors,annotate=False):
    html = []
    for color in colors:
        result = color["payload"] if "payload" in color else color["result"]
        score = f'<br/><em style="font-size:0.88em;">{color["distance"]}</em>' if "distance" in color else ""
        anno = color["vector"] if annotate else "&nbsp;"
        html.append(f'''
            <div style="float:left;margin:10px;">
                <strong>{result["name"]}</strong>{score}
                <div style="width:120px;height:120px;text-align:center;padding-top:50px;background-color:{result["hex"]}">{anno}</div>
            </div>''')
    display(HTML('\n'.join(html)))

def show_color(result):
    display(HTML(f'<h3>{result["name"]}</h3><div style="width:120px;height:120px;background-color:{result["hex"]}">&nbsp;</div>'))

# Listing 1.1
Load the color vector documents

In [None]:
with open('color_vectors.json') as fd:
    vectors = json.load(fd)
vectors[0:3]

# Listing 1.2
Show the first 12 alphabetically ordered colors

In [None]:
show_colors(vectors[:12])

# Listing 1.3
Euclidean distance between two embeddings, each with three dimensions

In [None]:
def euclidean_distance_3d(a,b):
    dist = float(pow(a[0]-b[0],2) + pow(a[1]-b[1],2) + pow(a[2]-b[2],2))
    norm = math.sqrt(dist) if dist>0.0 else 0
    return norm

# Listing 1.4
The distance between color id=0 and color id=1

In [None]:
euclidean_distance_3d(vectors[0]["vector"],vectors[1]["vector"])

In [None]:
euclidean_distance_3d(vectors[0]["vector"],vectors[2]["vector"])

In [None]:
euclidean_distance_3d(vectors[0]["vector"],vectors[3]["vector"])

# Listing 1.5
Get the top 'k' nearest colors given an embedding 'a'

In [None]:
def nearest_euclidean_3d(a,k=3):
    distances = []
    for j,vector in enumerate(vectors):
        b = vector["vector"]
        distance = euclidean_distance_3d(a,b)
        distances.append({
            "idx":j,
            "distance":distance
        })
    distances.sort(key=lambda item: item.get("distance"))
    results = [{"result":vectors[d["idx"]]["payload"],"distance":d["distance"]} for d in distances[0:k]]
    return results

# Listing 1.6
The three nearest colors to color id=1

In [None]:
nearest_euclidean_3d(vectors[1]["vector"],k=3)

# Listing 1.7
Shows the 12 nearest colors to color id=505

In [None]:
show_colors(nearest_euclidean_3d(vectors[1]["vector"],k=3))

In [None]:
show_colors(nearest_euclidean_3d(vectors[505]["vector"],k=12))

# Listing 1.8
Shows the 12 nearest colors to rgb=[150,0,200]

In [None]:
show_colors(nearest_euclidean_3d([150,0,200],k=12))

In [None]:
show_colors(nearest_euclidean_3d([255,255,255],k=12))

# The wrong metric

What happens when we use, for example, dot product for this euclidean RGB space?

In [None]:
#Dot Product
def dot_product(a,b):
    dot = sum([an*bn for an,bn in zip(a,b)])
    return dot

In [None]:
print(dot_product([9,1],[9,1]))
print(dot_product([7,2],[9,1]))
print(dot_product([7,2],[4,8]))
print(dot_product([4,8],[9,1]))
print(dot_product([4,8],[2,1]))

In [None]:
def nearest_dot_3d(a,k=3):
    distances = []
    for j,vector in enumerate(vectors):
        b = vector["vector"]
        distance = dot_product(a,b)
        distances.append({
            "idx":j,
            "distance":distance
        })
    distances.sort(key=lambda item: item.get("distance"), reverse=True)
    results = [{"result":vectors[d["idx"]]["payload"],"distance":d["distance"]} for d in distances[0:k]]
    return results

In [None]:
show_colors(nearest_dot_3d(vectors[505]["vector"],k=12))

In [None]:
show_colors(nearest_dot_3d(vectors[1010]["vector"],k=12))

## Appendix

The cells below were used to create images in the slides

In [None]:
examples = [
    {"vector":[255,51,51],"payload":{"name":"red","hex":"#FF3333"}},
    {"vector":[255,51,153],"payload":{"name":"pink","hex":"#FF3399"}},
    {"vector":[102,255,0],"payload":{"name":"green","hex":"#66FF00"}}
]

In [None]:
show_colors(examples,annotate=True)

In [None]:
print(euclidean_distance_3d(examples[0]["vector"],examples[1]["vector"]))
print(euclidean_distance_3d(examples[0]["vector"],examples[2]["vector"]))
print(euclidean_distance_3d(examples[1]["vector"],examples[2]["vector"]))