# Lab 1 - Color Search

The listings in this notebook are for the RGB color similarity exercises

In [None]:
require 'json'
require 'erb'

In [None]:
def show_colors(colors, annotate: false)
  html = colors.map do |color|
    result = color["payload"] || color["result"]
    score = color["distance"] ? "<br/><em style=\"font-size:0.88em;\">#{color["distance"]}</em>" : ""
    anno = annotate ? color["vector"].to_s : "&nbsp;"
    <<~HTML
      <div style="float:left;margin:10px;">
        <strong>#{result["name"]}</strong>#{score}
        <div style="width:120px;height:120px;text-align:center;padding-top:50px;background-color:#{result["hex"]}">#{anno}</div>
      </div>
    HTML
  end
  IRuby.display IRuby.html(html.join("\n"))
end

def show_color(result)
  html = <<~HTML
    <h3>#{result["name"]}</h3>
    <div style="width:120px;height:120px;background-color:#{result["hex"]}">&nbsp;</div>
  HTML
  IRuby.display IRuby.html(html)
end

# Listing 1.1
Load the color vector documents

In [None]:
vectors = JSON.parse(File.read('color_vectors.json'))
vectors[0..2]

# Listing 1.2
Show the first 12 alphabetically ordered colors

In [None]:
show_colors(vectors[0..11])

# Listing 1.3
Euclidean distance between two embeddings, each with three dimensions

In [None]:
def euclidean_distance_3d(a, b)
  dist = (a[0] - b[0])**2 + (a[1] - b[1])**2 + (a[2] - b[2])**2
  dist > 0.0 ? Math.sqrt(dist) : 0
end

# Listing 1.4
The distance between color id=0 and color id=1

In [None]:
euclidean_distance_3d(vectors[0]["vector"], vectors[1]["vector"])

In [None]:
euclidean_distance_3d(vectors[0]["vector"], vectors[2]["vector"])

In [None]:
euclidean_distance_3d(vectors[0]["vector"], vectors[3]["vector"])

# Listing 1.5
Get the top 'k' nearest colors given an embedding 'a'

In [None]:
def nearest_euclidean_3d(vectors,a, k=3)
  distances = vectors.each_with_index.map do |vector, j|
    b = vector["vector"]
    {
      "idx" => j,
      "distance" => euclidean_distance_3d(a, b)
    }
  end
  
  distances.sort_by! { |item| item["distance"] }
  
  distances[0...k].map do |d|
    {
      "result" => vectors[d["idx"]]["payload"],
      "distance" => d["distance"]
    }
  end
end

# Listing 1.6
The three nearest colors to color id=1

In [None]:
nearest_euclidean_3d(vectors, vectors[1]["vector"], k=3)

# Listing 1.7
Shows the 12 nearest colors to color id=505

In [None]:
show_colors(nearest_euclidean_3d(vectors,vectors[1]["vector"], k=3))

In [None]:
show_colors(nearest_euclidean_3d(vectors,vectors[505]["vector"], k=12))

# Listing 1.8
Shows the 12 nearest colors to rgb=[150,0,200]

In [None]:
show_colors(nearest_euclidean_3d(vectors,[150,0,200], k=12))

In [None]:
show_colors(nearest_euclidean_3d(vectors,[255,255,255], k=12))

# The wrong metric

What happens when we use, for example, dot product for this euclidean RGB space?

In [None]:
def dot_product(a, b)
  a.zip(b).sum { |x, y| x * y }
end

In [None]:
puts dot_product([9,1], [9,1])
puts dot_product([7,2], [9,1])
puts dot_product([7,2], [4,8])
puts dot_product([4,8], [9,1])
puts dot_product([4,8], [2,1])

In [None]:
def nearest_dot_3d(vectors,a, k=3)
  distances = vectors.each_with_index.map do |vector, j|
    b = vector["vector"]
    {
      "idx" => j,
      "distance" => dot_product(a, b)
    }
  end
  
  distances.sort_by! { |item| -item["distance"] }  # Sort in descending order
  
  distances[0...k].map do |d|
    {
      "result" => vectors[d["idx"]]["payload"],
      "distance" => d["distance"]
    }
  end
end

In [None]:
show_colors(nearest_dot_3d(vectors,vectors[505]["vector"], k=12))

In [None]:
show_colors(nearest_dot_3d(vectors,vectors[1010]["vector"], k=12))

## Appendix

The cells below were used to create images in the slides

In [None]:
examples = [
  {"vector" => [255,51,51], "payload" => {"name" => "red", "hex" => "#FF3333"}},
  {"vector" => [255,51,153], "payload" => {"name" => "pink", "hex" => "#FF3399"}},
  {"vector" => [102,255,0], "payload" => {"name" => "green", "hex" => "#66FF00"}}
]

In [None]:
show_colors(examples, annotate: true)

In [None]:
puts euclidean_distance_3d(examples[0]["vector"], examples[1]["vector"])
puts euclidean_distance_3d(examples[0]["vector"], examples[2]["vector"])
puts euclidean_distance_3d(examples[1]["vector"], examples[2]["vector"])