In [1]:
# imports

import os
import numpy as np
import pandas as pd
import cv2
from sklearn.cluster import KMeans
from collections import Counter
import colorsys


In [2]:
# given a numpy array with dimensions (height, width, 4)
# convert it to a numpy array with dimensions (height, width, 10)
# each four-ple of the input array is (b, g, r, a)
# b, g, r range from 0 to 255
# each ten-ple of the output array is (r, g, b, h, l, s, a, row, col, dist)
# r, g, b, h, l, s, a range from 0 to 1

def augment(im):
    
    # find the distance to the center
    (height, width, four) = im.shape
    (center_x, center_y) = (width // 2, height // 2)
    augmented = np.zeros((height, width, 10)) # ten features total
    for row in range(height):
        for col in range(width):
            [b, g, r, a] = im[row, col]
            dist = abs(center_x - row) + abs(center_y - col)
            (r, b, g, a) = r / 255, b / 255, g / 255, a / 255
            (h, l, s) = colorsys.rgb_to_hls(r, g, b)
            augmented[row, col] = [r, g, b, h, l, s, a, row, col, dist]
    
    return augmented


In [3]:
# given a numpy array with dimensions (rows, 10)
# convert it to a pandas dataframe with dimensions (?, 10)
# cols: r, g, b, h, l, s, a, row, col, dist
# r, g, b, h, l, s, a range from 0 to 1
# remove all rows where a < 0.5

def make_df(im):
    
    names = ["r", "g", "b", "h", "l", "s", "a", "row", "col", "center"]
    data = {}
    for i in range(len(names)):
        col_name = names[i]
        column = im[:, i]
        data[col_name] = list(column)
    
    df_all = pd.DataFrame(data)
    df = df_all.query("a > 0.5")
    
    return df


In [4]:
# an object!!
# this could technically be a tuple or something, since there aren't actual methods
# however it is nice to hide all the initialization behind an __init__

class Face():
    
    def __init__(self, image):
        
        # keep the og image
        self.im = image
        
        # add a bunch of features
        self.augmented = augment(self.im)
        
        # store a center patch
        (height, width, four) = self.im.shape
        (center_x, center_y) = (width // 2, height // 2)
        dim = min(center_x, center_y) // 2
        self.patch = self.im[center_x - dim : center_x + dim, center_y - dim : center_y + dim]
        self.patch_augmented = self.augmented[center_x - dim : center_x + dim, center_y - dim : center_y + dim]
    
        # save flat versions of everything
        my_flatten = lambda a, dim: a.transpose(2, 0, 1).reshape(dim, -1).transpose()
        self.flat = my_flatten(self.im, 4)
        self.augmented_flat = my_flatten(self.augmented, 10)
        self.patch_flat = my_flatten(self.patch, 4)
        self.patch_augmented_flat = my_flatten(self.patch_augmented, 10)
    
        # make data frames of each-- these include only the "valuable" (opaque) pixels
        self.df = make_df(self.augmented_flat)
        self.patch_df = make_df(self.patch_augmented_flat)
        

In [5]:
# given an object, and hyperparameters
# fit a kmeans model
# determine which label corresponds to skin
# determine which pixels are skin and which aren't

def fit_model(face, features, n_clusters):

    # use the data frame of the full image to fit a model
    df = face.df[features]
    k_means = KMeans(n_clusters=n_clusters)
    k_means.fit(df)

    # store the labels (easier image building later on)
    label_dict = {}
    all_labels = k_means.predict(df)
    count = 0
    for index, [a, b, center, col, g, h, l, r, row, s] in face.df.iterrows():
        key = (row, col)
        label = all_labels[count]
        label_dict[key] = label
        count += 1

    # predict labels for the patch df to get the skin label
    patch_df = face.patch_df[features]
    patch_labels = k_means.predict(patch_df)
    c = Counter(patch_labels)
    skin_label = c.most_common()[0][0]
    
    # which pixels are skin?
    labels = (all_labels == skin_label)
    return skin_label, label_dict, labels


In [6]:
# given an object, and information about the labels
# create two images
# white = transparent
# light blue = classified as non skin
# dark blue OR original pixel = classified as skin

def make_images(face, skin_label, label_dict):

    # create blank images to fill in
    (height, width, four) = face.im.shape
    classified_image = np.zeros((height, width, 3), dtype=np.uint8)
    skin_image = np.zeros((height, width, 3), dtype=np.uint8)

    # loop through and fill in the pixels
    for i in range(height):
        for j in range(width):
            [r, g, b, h, l, s, a, row, col, dist] = face.augmented[i, j]

            # remove the thresholded pixels
            if (a <= 0.5):
                skin_image[i, j] = [255, 255, 255]
                classified_image[i, j] = [255, 255, 255]

            # if it's not thresholded, grab the label from the dictionary
            else:
                label = label_dict[(row, col)]

                # color according to if it's skin
                if (label == skin_label):
                    skin_image[i, j] = [round(b * 255), round(g * 255), round(r * 255)]
                    classified_image[i, j] = [255, 0, 0]
                else:
                    skin_image[i, j] = [255, 240, 180]
                    classified_image[i, j] = [255, 240, 180]

    return skin_image, classified_image


In [7]:
# given an object, and information about the labels
# get the median color

def get_median_color(face, labels):

    # identify which pixels are skin
    skin_rows = face.df[labels]
    skin_pixels = skin_rows[["b", "g", "r"]] # curse open cv2 and it's bgr format!! why!!??

    # take the median of these skin pixels and coerce into numpy array
    median_pd = skin_pixels.median(axis=0)
    median_np = np.array(median_pd)
    
    # make a color block
    color_block = np.zeros((100, 100, 3), dtype=np.uint8)
    color_block[:, :] = median_np * 255
    return color_block

In [8]:
MODELS = [
    (["g", "l"], 3),
    (["h"], 2),
    (["h", "s"], 2),
    (["h", "s"], 3),
    (["r"], 3),
    (["r", "g", "b"], 2),
    (["r", "g", "b"], 3),
    (["r", "g", "b", "h"], 3),
    (["r", "g", "b", "h", "s"], 2),
    (["r", "g", "b", "h", "s", "l"], 2),
    (["r", "g", "b", "h", "s", "l"], 3),
    (["r", "h"], 2),
    (["r", "h", "s"], 2),
    (["s"], 2)
]

In [9]:
# given a PNG image (ie, four channels) that has been read in with cv2,
# identify the skin pixels using the models given,
# save images for each model
# save a single csv describing the pixels

def cluster_face(image, directory, models=MODELS):
    
    face = Face(image)
    cv2.imwrite(directory + "/0face.png", image)
    os.mkdir(directory + "/0valid")
    output = face.df.loc[:, ["r", "g", "b", "row", "col"]]
    
    for (features, n_clusters) in models:
        
        # do all the fitting
        skin_label, label_dict, labels = fit_model(face, features, n_clusters)
        skin_image, classified_image = make_images(face, skin_label, label_dict)
        median_color = get_median_color(face, labels)
        
        # save all the info
        model_name = "".join(features) + str(n_clusters)
        print(model_name, end=" ")
        model_path = directory + "/" + model_name + "_{}.jpg"
        cv2.imwrite(model_path.format("skin"), skin_image)
        cv2.imwrite(model_path.format("cluster"), classified_image)
        cv2.imwrite(model_path.format("color"), median_color)
        output[model_name] = labels
        
    # save the csv
    output.to_csv(directory + "/0pixels.csv", index=False)
    
    return
        

In [17]:
# go through all the faces, and cluster each one

so_far = set(os.listdir("all_pictures/median"))

def cluster_faces(face_dir):
    
    # loop through all faces
    count = 0
    for image in os.listdir(face_dir):
        
        print("\n{}/262...".format(count), end=" ")
        count += 1
        
        # image paths from os.listdir don't include the name of the directory
        # so it needs to be added in
        image_path = "{}/{}".format(face_dir, image)
        
        # read the image
        face = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
        
        # determine the name of the directory
        vol_issue_yr_face = image.split("_")[1 :]
        joined = "_".join(vol_issue_yr_face)
        splitted = joined.split(".")[0]
        
        if (splitted not in so_far):
            directory = "all_pictures/median/" + splitted
            print(directory, end=" ")
            os.mkdir(directory)

            # cluster!
            cluster_face(face, directory)

In [18]:
cluster_faces("all_pictures/faces_nobg")


0/262... 
1/262... 
2/262... 
3/262... 
4/262... 
5/262... 
6/262... 
7/262... 
8/262... 
9/262... 
10/262... 
11/262... all_pictures/median/190_11_2000_0 gl3 h2 hs2 hs3 r3 rgb2 rgb3 rgbh3 rgbhs2 rgbhsl2 rgbhsl3 rh2 rhs2 s2 
12/262... 
13/262... 
14/262... 
15/262... 
16/262... 
17/262... 
18/262... 
19/262... 
20/262... 
21/262... 
22/262... 
23/262... 
24/262... 
25/262... 
26/262... 
27/262... 
28/262... 
29/262... 
30/262... 
31/262... 
32/262... 
33/262... 
34/262... 
35/262... 
36/262... 
37/262... 
38/262... 
39/262... 
40/262... 
41/262... 
42/262... 
43/262... 
44/262... 
45/262... 
46/262... 
47/262... 
48/262... 
49/262... 
50/262... 
51/262... 
52/262... 
53/262... 
54/262... 
55/262... 
56/262... 
57/262... 
58/262... 
59/262... 
60/262... 
61/262... 
62/262... 
63/262... 
64/262... 
65/262... 
66/262... 
67/262... 
68/262... 
69/262... 
70/262... 
71/262... 
72/262... 
73/262... 
74/262... 
75/262... 
76/262... 
77/262... 
78/262... 
79/262... 
80/262... 
81/262... 
82/2

In [27]:
def combine(df, dims):
    
    image = np.zeros(dims, dtype=np.uint8)
    
    df["summed"] = (df.iloc[:, 5:]).sum(axis=1)
    df = df.query("summed > 10")
    median_color = np.array(df[["b", "g", "r"]].median(axis=0)) * 255
    
    for index, row in df.iterrows():
        r, c = row[3], row[4]
        image[int(r), int(c)] = median_color
        
    # make a color block
    color_block = np.zeros((100, 100, 3), dtype=np.uint8)
    color_block[:, :] = median_color
        
    # save image
    cv2.imwrite("all_pictures/median/190_01_2000_1/median_face.jpg", image)
    cv2.imwrite("all_pictures/median/190_01_2000_1/median.jpg", color_block)
    
    return
            

In [28]:
df = pd.read_csv("all_pictures/median/190_01_2000_1/pixels.csv")
print(df.head())
combine(df, (500, 500, 3))

          r         g         b  row   col    gl3    h2   hs2   hs3     r3  \
0  0.494118  0.286275  0.176471  0.0   3.0  False  True  True  True  False   
1  0.537255  0.294118  0.196078  0.0   4.0  False  True  True  True  False   
2  0.670588  0.372549  0.282353  0.0   5.0  False  True  True  True  False   
3  0.952941  0.419608  0.235294  0.0  11.0  False  True  True  True   True   
4  0.980392  0.411765  0.231373  0.0  12.0  False  True  True  True   True   

   ...    rgb2   rgb3  rgbh3  rgbhs2  rgbhsl2  rgbhsl3   rh2  rhs2  \
0  ...   False  False  False   False    False    False  True  True   
1  ...   False  False  False   False    False    False  True  True   
2  ...   False  False  False   False    False    False  True  True   
3  ...    True  False   True    True     True     True  True  True   
4  ...    True  False   True    True     True     True  True  True   

   rhscenter3    s2  
0       False  True  
1       False  True  
2       False  True  
3       False  True  
