# Read in the images (X) and their hair color labels (Y)

In [12]:
from skimage import io as skimage_io
from skimage.transform import resize
import os, io, glob

X =[]
Y = []


# these hair color pics were downloaded from IMDB's list of actresses that were
# "blonde", "brunette", and "red heads"
# example https://www.imdb.com/list/ls058386015/  -- a list of blonde actresses
folders = ["pics/blonde/blonde*jpg", "pics/brunette/brown*jpg", "pics/redheads/red*jpg"]

# assign jpg_files all of the hair-colored jpgs
jpg_files = []
for folder in folders:
    jpg_files.extend (glob.glob (folder))

training_data = []

IMG_PX_SIZE = 20 # pixels. All pictures will be resized to IMG_PX_SIZE x IMG_PX_SIZE

for jpg_file in jpg_files:
    
    # assign the hair color of this image to label.
    # e.g. filename "red1.jpg" becomes red, "brown2.jpg" becomes brown
    label = os.path.basename (jpg_file).replace (".jpg", "")
    label = ''.join([i for i in label if not i.isdigit()]) 

    # read the image into rgb_img
    rgb_img = skimage_io.imread (jpg_file)
    
    # resize the image, so all images have the same size
    rgb_img = resize (rgb_img, (IMG_PX_SIZE,IMG_PX_SIZE))

    # store the picture information in X, and the hair color in Y
    # we will try to predict Y based on X (predict hair color based on a picture)
    X.append (rgb_img.reshape (1, IMG_PX_SIZE*IMG_PX_SIZE*3)[0]) # flatten into a 1-D array
    Y.append (label)
    
print ("read in " + str (len(X)) + " images")

read in 33 images


# Create a classifier

This is a machine learning method that creates groups based on the labels we provide it. In this case, we are creating 3 groups (blonde, red, brunette). The model will learn from the blonde, red, brunette pictures (X) and will make a classifier based on the labels provided (Y). 


In [30]:
from sklearn.neighbors import KNeighborsClassifier

num_groups = 3  # blonde, red, brunette

# initialize the model with KNeighborsClassifier and the number of groups
model = KNeighborsClassifier(n_neighbors=num_groups)

# make the classifier based on X (pictures) and Y (their hair color labels)
model.fit (X, Y)



KNeighborsClassifier(n_neighbors=3)

# Calculate classifier's performance

Given an image (X), what does the model predict
Compare the model's predictions with the true labels (Y)

In [31]:
from sklearn.metrics import accuracy_score

predicted_labels = model.predict (X)

# get accuracy of K-neighbors classifier
accuracy_score (Y,predicted_labels )

0.9393939393939394

The classifier is 94% accurate, not 100%. Let's see where is it getting it wrong?

In [21]:
print ("Predicted Hair Color\tFilename")
for i, jpg_file in enumerate (jpg_files):
    print (predicted_labels[i] + "\t" + jpg_file)


Predicted Hair Color	Filename
blonde	pics/blonde/blonde4.jpg
blonde	pics/blonde/blonde10.jpg
blonde	pics/blonde/blonde2.jpg
blonde	pics/blonde/blonde5.jpg
blonde	pics/blonde/blonde6.jpg
red	pics/blonde/blonde9.jpg
blonde	pics/blonde/blonde1.jpg
blonde	pics/blonde/blonde8.jpg
red	pics/blonde/blonde7.jpg
blonde	pics/blonde/blonde3.jpg
brown	pics/brunette/brown7.jpg
brown	pics/brunette/brown2.jpg
brown	pics/brunette/brown3.jpg
brown	pics/brunette/brown9.jpg
brown	pics/brunette/brown5.jpg
brown	pics/brunette/brown6.jpg
brown	pics/brunette/brown4.jpg
brown	pics/brunette/brown1.jpg
brown	pics/brunette/brown10.jpg
brown	pics/brunette/brown8.jpg
red	pics/redheads/red10.jpg
red	pics/redheads/red9.jpg
red	pics/redheads/red5.jpg
red	pics/redheads/red7.jpg
red	pics/redheads/red6.jpg
red	pics/redheads/red11.jpg
red	pics/redheads/red1.jpg
red	pics/redheads/red3.jpg
red	pics/redheads/red4.jpg
red	pics/redheads/red2.jpg
red	pics/redheads/red8.jpg
red	pics/redheads/red13.jpg
red	pics/redheads/red12.jpg

There are 2 blondes that are classified as red. (You can look at blonde7.jpg and blonde9.jpg, and see if this is reasonable)

# Save the hair color classifier

After training the machine learning model, we want to save the classifier for future use.

In [33]:
import pickle

#save the hair-color classifier
pickle.dump (model, open ('data/hair_color.pkl', 'wb'))
