Data clearning and acquisition for colors
1. Download the github repo https://github.com/rkc007/KNN_color_recognition/tree/717490e62ab581aa6962588f4711d485cbb04dc1
2. Unzip
3. Copy `training_dataset` into the ML-Python folder where the notebooks are
4. Rename `training_daaset` to `colors`

1. List all the subfolders of `colors`
2. Go into each, and open each image file inside
    * extract the RGB values of the topleft pixel of each image
3. Build a mapping from color names to lists of RGB triples

In [28]:
import os #part of the Python standard library, comes with Python
from PIL import Image # separaes from the standard library, but camae with Anaconda

In [29]:
mapping = {} # initially the mapping is empty

for color_subdir in os.listdir('colors'):
    # need to ignore hidden directory .DS_Store
    if color_subdir.startswith('.'): continue
        
    mapping[color_subdir] = []
    
    for image_name in os.listdir('colors/' + color_subdir):
        # construct full path to the image file
        image_path = 'colors/' + color_subdir + '/' + image_name
        
        # load the image
        image = Image.open(image_path)
        
        width, height = image.size
        
                
        # extract the topleft pixel
        pixel = image.getpixel( (width//2, height//2) ) #<--- topleft pixel
        
        # data cleaning: ignore the weird 0 pixels
        if type(pixel) == int: continue
        
        # drop the alpha component of any RGBA values to convert to pure RGB    
        pixel = pixel[:3]

        mapping[color_subdir].append(pixel)

In [30]:
# Problem: there are weird boundary problem in the color images
# Solution: take the midle pixel
# need to look at the Pillow docs for the Image
#class to get the width and height of the Image 
#Diving both by teo will give the coors of the center pixel
mapping

{'black': [(47, 47, 47),
  (7, 13, 11),
  (0, 0, 0),
  (6, 6, 6),
  (24, 29, 32),
  (49, 54, 57),
  (29, 29, 29),
  (37, 37, 37),
  (42, 41, 47),
  (35, 30, 34)],
 'blue': [(0, 0, 254),
  (3, 91, 188),
  (3, 4, 148),
  (0, 0, 255),
  (0, 48, 143),
  (1, 119, 193)],
 'green': [(37, 202, 38),
  (0, 255, 0),
  (35, 67, 17),
  (64, 189, 85),
  (159, 217, 140),
  (0, 166, 82),
  (0, 128, 1),
  (33, 83, 54),
  (125, 194, 75),
  (123, 252, 1),
  (125, 232, 88)],
 'orange': [(255, 128, 0),
  (255, 122, 1),
  (255, 102, 0),
  (254, 101, 33),
  (255, 153, 0),
  (252, 79, 19),
  (255, 127, 0)],
 'red': [(61, 13, 3),
  (254, 0, 2),
  (254, 0, 0),
  (210, 7, 36),
  (209, 23, 23),
  (207, 22, 1),
  (221, 0, 12),
  (176, 33, 27),
  (128, 24, 24)],
 'violet': [(130, 0, 220),
  (127, 0, 255),
  (145, 0, 255),
  (145, 0, 255),
  (148, 0, 210),
  (127, 0, 255),
  (145, 0, 255),
  (125, 0, 240),
  (148, 0, 211)],
 'white': [(253, 253, 255),
  (248, 249, 254),
  (255, 255, 244),
  (253, 245, 230),
  (255, 

In [33]:
import csv
import sys

In [36]:
file = open('colors.csv', 'w')
writer = csv.writer(file)
for color_name, rgb_values in mapping.items():
    for r,g,b, in rgb_values:
        writer.writerow([color_name, r,g,b])
file.close()

In [35]:
#Alternative way of opening files with automatic closure
with open('colors.csv', 'w') as file:
    writer = csv.writer(file)
    for color_name, rgb_values in mapping.items():
        for r,g,b, in rgb_values:
            writer.writerow([color_name, r,g,b])

        

black,47,47,47
black,7,13,11
black,0,0,0
black,6,6,6
black,24,29,32
black,49,54,57
black,29,29,29
black,37,37,37
black,42,41,47
black,35,30,34
blue,0,0,254
blue,3,91,188
blue,3,4,148
blue,0,0,255
blue,0,48,143
blue,1,119,193
green,37,202,38
green,0,255,0
green,35,67,17
green,64,189,85
green,159,217,140
green,0,166,82
green,0,128,1
green,33,83,54
green,125,194,75
green,123,252,1
green,125,232,88
orange,255,128,0
orange,255,122,1
orange,255,102,0
orange,254,101,33
orange,255,153,0
orange,252,79,19
orange,255,127,0
red,61,13,3
red,254,0,2
red,254,0,0
red,210,7,36
red,209,23,23
red,207,22,1
red,221,0,12
red,176,33,27
red,128,24,24
violet,130,0,220
violet,127,0,255
violet,145,0,255
violet,145,0,255
violet,148,0,210
violet,127,0,255
violet,145,0,255
violet,125,0,240
violet,148,0,211
white,253,253,255
white,248,249,254
white,255,255,244
white,253,245,230
white,255,237,231
white,243,239,227
white,229,224,221
white,242,233,228
white,23

k-NN colors

In [38]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import csv
import pandas as pd

In [46]:
df = pd.read_csv('colors.csv', names=['color', 'r', 'g', 'b'])

In [47]:
df

Unnamed: 0,color,r,g,b
0,black,47,47,47
1,black,7,13,11
2,black,0,0,0
3,black,6,6,6
4,black,24,29,32
...,...,...,...,...
66,yellow,255,183,9
67,yellow,255,255,0
68,yellow,255,166,0
69,yellow,249,217,94


In [49]:
#convert to numpy array
array = df.to_numpy()

In [50]:
#select a column in numpy array
array[:, 0]

#v2 df.color

array(['black', 'black', 'black', 'black', 'black', 'black', 'black',
       'black', 'black', 'black', 'blue', 'blue', 'blue', 'blue', 'blue',
       'blue', 'green', 'green', 'green', 'green', 'green', 'green',
       'green', 'green', 'green', 'green', 'green', 'orange', 'orange',
       'orange', 'orange', 'orange', 'orange', 'orange', 'red', 'red',
       'red', 'red', 'red', 'red', 'red', 'red', 'red', 'violet',
       'violet', 'violet', 'violet', 'violet', 'violet', 'violet',
       'violet', 'violet', 'white', 'white', 'white', 'white', 'white',
       'white', 'white', 'white', 'white', 'white', 'yellow', 'yellow',
       'yellow', 'yellow', 'yellow', 'yellow', 'yellow', 'yellow',
       'yellow'], dtype=object)

In [51]:
#get numbers
y = array[:, 0]
X = array[:, 1:]

In [54]:
# STep 0: train-test-split; set aside data for an 'exam' for our model at the end
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)  #random_state=0 <-- to get the exact same split every time someone do the split

In [55]:
# Step 1: Initiate the Classifier
classifier = KNeighborsClassifier(n_neighbors=5)

In [57]:
# Step 2: Train the classifier
classifier.fit(X_train, y_train)


KNeighborsClassifier()

In [58]:
y_predicted = classifier.predict(X_test)

In [60]:
accuracy_score(y_test, y_predicted)

1.0

In [None]:
# Let's choose a random color from online RGB color Codes Chart

In [62]:
classifier.predict([(255, 74, 235)])

array(['violet'], dtype=object)

In [63]:
classifier.predict([(50, 168, 82)])

array(['green'], dtype=object)

In [64]:
classifier.predict([(50, 98, 168)])

array(['blue'], dtype=object)

In [68]:
classifier.predict([(168, 158, 50)])

array(['yellow'], dtype=object)