# Comparative Analysis of Color and Outline in Andy Warhol's "Shot Marilyns" Series

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import skimage.io as skio
from skimage.color import rgb2gray, rgba2rgb, hsv2rgb
from tqdm import notebook as tqdm
from skimage.transform import resize
import os
import gdown

from tqdm.notebook import tqdm


import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
filepaths = glob('shot marilyns/*')

# filepaths = glob('Shot Marilyn/*')

print(filepaths)

In [None]:
img_A = skio.imread(filepaths[0])
skio.imshow(img_A)

In [None]:
img_B = skio.imread(filepaths[1])
skio.imshow(img_B)

In [None]:
img_C = skio.imread(filepaths[2])
skio.imshow(img_C)

In [None]:
img_D = skio.imread(filepaths[3])
skio.imshow(img_D)

In [None]:
img_E = skio.imread(filepaths[4])
skio.imshow(img_E)

In [None]:
print(f'Image dimension is {img_A.shape}')
print(f'Few pixels: \n {img_A[1:3, 1:5,:]}')

In [None]:
# It appears the images has an extra coordinate. 
# However, the extra coordinate contains no real information
# Therefore, redefine the matrix
img_A = img_A[:, :, :3]
img_B = img_B[:, :, :3]
img_C = img_C[:, :, :3]
img_D = img_D[:, :, :3]
img_E = img_E[:, :, :3]

In [None]:
# Resize all images to 512 by 512
def resize_img(filepaths, height=1000, width=1000, save_dir='shot marilyns', do_zip = False):
    """
    To resize the images, the code uses os.makedirs(save_dir, exist_ok = True) and iterates through each filepath in the list. 
    The image is read using skio.imread(filepath) and then resized to the desired height and width using resize(img_, (height, width)). 
    The resized image is then saved using plt.imsave(). 
    If needed, the images can be zipped using the command os.system("zip -r shot\ marilyns.zip shot\ marilyns"). 
    The progress is displayed using tqdm().
    """
    
    os.makedirs(save_dir, exist_ok = True)
    for i, filepath in enumerate(tqdm(filepaths, desc='Resizing')):
        img_ = skio.imread(filepath)
        img_ = resize(img_, (height, width))
        plt.imsave(os.path.join(save_dir, ''.join(filepath.split('/')[1:])), img_)
    if do_zip:
        os.system("zip -r shot\ marilyns.zip shot\ marilyns")

In [None]:
img_shape = 512, 512
resize_img(filepaths, height=img_shape[0], width=img_shape[1], save_dir='shot marilyns', do_zip=True)

In [None]:
def plot_shot_marilyns(filepaths):
    """
    This function plots the images located in the given filepaths.
    It creates a figure with a height of 20 and a width of 25. 
    The images are arranged in a 2x3 grid. For each filepath, the function reads the image and places it in the appropriate position on the grid. 
    The final image in the bottom right corner is turned off. 
    Simply call the function with the appropriate filepaths to display the images.
    """
    fig, ax = plt.subplots(2,3)
    fig.set_figheight(20)
    fig.set_figwidth(25)
    row_, col_ = -1, 0
    for i, filepath in enumerate(filepaths):
        img_ = skio.imread(filepath)
        row_, col_ = (row_ + 1,  0) if (i%3 == 0) else (row_,  col_ + 1)
        ax[row_, col_].imshow(img_)
    ax[1,2].set_axis_off()

In [None]:
plot_shot_marilyns(filepaths)

In [None]:
def get_channels(filepaths):
    """
    convert images in filepath to a dataframe containing their RGB channel values.
    The number suffix represents the index of the image
    """
    img_list = []
    for i, filepath in enumerate(filepaths):
        img_ = skio.imread(filepath)[:, :, :3]
        img_ = img_.reshape((img_.shape[0] * img_.shape[1], img_.shape[2]))
        img_list.append(pd.DataFrame(img_, columns=[item + str(i) for item in ['R', 'G', 'B']]))

    return(pd.concat(img_list, axis = 1))

In [None]:
# Channels for all the images
rgb_df = get_channels(filepaths)    
rgb_df

In [None]:
def get_pixel_val(file, row_, col_, img_indx=None):
    """
    get the RGB value for a pixel (row, col)
    """
    try:
        if isinstance(file, pd.DataFrame) and img_indx != None:
            res = file.iloc[row_ * 750 + col_, (img_indx * 3):(img_indx * 3)+3 ]
        else:

            img_ = skio.imread(file)[:, :, :3]
            res = img_[row_, col_, :]
        return res
    except Exception as e:
        print(e)
        print('File parameter must be either a dataframe or an image file path')
        print('If you provided a dataframe, make sure to provide the image index parameter')

In [None]:
print(get_pixel_val(file = filepaths[0], row_=2, col_=0))
print(get_pixel_val(file = get_channels(filepaths) , row_=2, col_=0, img_indx=0))

In [None]:
def axis_mask(val, base):
    """
    Construct sliding kernel for centers based on cube dimension i.e base
    """
    if val == 0:
        inf_mask = 0
        sup_mask = base
    elif val == base:
        inf_mask = 0
        sup_mask = base
    elif val % base == 0:
        inf_mask = val - base
        sup_mask = val
    else:
        inf_mask = base * np.floor(val/base) # closest mulitple of 
        sup_mask = base * np.ceil(val/base)
    return((int(inf_mask), int(sup_mask)))


def get_bounding_cubes(data, step = 3):
    """
    Get centers using mask
    """
    data_masks = [] 
    mask_center = []
    for pos, row in tqdm(enumerate(data), total=len(data)):
        x_mask = axis_mask(val=row[0], base=step )
        y_mask = axis_mask(val=row[1], base=step )
        z_mask = axis_mask(val=row[2], base=step)
        center_x = np.median(range(x_mask[0], x_mask[1] + 1))
        center_y = np.median(range(y_mask[0], y_mask[1] + 1))
        center_z = np.median(range(z_mask[0], z_mask[1] + 1))
        data_masks.append((x_mask,y_mask, z_mask))
        mask_center.append((int(center_x), int(center_y), int(center_z)))

    return(data, data_masks, mask_center)

In [None]:
def make_center_df(df, cols=[], img_shape=(1000, 1000), cube_width = 3, to_plot=False, return_result=True):
    """
    Return dataframe of image representing the RGB of reduced dimension (i.e using centers)
    """
    if not isinstance(cols, list):
        raise Exception("cols must be a list of columns from the dataframe")
    if len(cols) < 1:
        cols = df.columns
        df_ = df.copy
    else:
        df_ = df.copy()
        df_ = df_[cols]

    boxes = get_bounding_cubes(df_.values, step = cube_width)

    old_img = np.array(boxes[2]).reshape([img_shape[0], img_shape[1], 3])

    new_img = np.array(df_.values).reshape([img_shape[0], img_shape[1], 3])
    center_df = pd.DataFrame(boxes[2])
    center_df.columns = [col + ' center' for col in df_.columns]
    res = pd.concat([df_, center_df], axis=1)
    if to_plot:
        plt.figure(figsize=(8, 6), dpi=80)
        fig, (ax1, ax2) = plt.subplots(1,2)
        fig.set_figheight(15)
        fig.set_figwidth(15)

        ax1.imshow(old_img)
        ax2.imshow(new_img)
    if return_result:
        return(res)

In [None]:
# Get the dataframe representing each pixel and their centers using a 10 by 10 cube. 
# We also plot the original image and the center image
center_df_A = make_center_df(df=rgb_df, cols=list(rgb_df.columns[:3]), img_shape=img_shape, cube_width = 10, to_plot=True)

In [None]:
center_df_B = make_center_df(df=rgb_df, cols=list(rgb_df.columns[3:6]), img_shape=img_shape, cube_width = 10, to_plot=True)

In [None]:
center_df_C = make_center_df(df=rgb_df, cols=list(rgb_df.columns[6:9]), img_shape=img_shape, cube_width = 10, to_plot=True)

In [None]:
center_df_D = make_center_df(df=rgb_df, cols=list(rgb_df.columns[9:12]), img_shape=img_shape, cube_width = 10, to_plot=True)

In [None]:
center_df_E = make_center_df(df=rgb_df, cols=list(rgb_df.columns[12:15]), img_shape=img_shape, cube_width = 10, to_plot=True)

In [None]:
#center_df_A.head()
#center_df_B.head()
#center_df_C.head()
#center_df_D.head()
#pd.set_option('display.max_rows', None)
#center_df_E.tail(1000)

In [None]:
df_ = center_df_E.copy()
boxes = get_bounding_cubes(df_.values, step = 10)

In [None]:
#boxes

## Clusters

In [None]:
import scipy.cluster.hierarchy as sch
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.axes_grid1 import make_axes_locatable


def rgb2hex(rgb):
    """
    Convert RGB to hexadecimal
    """
    try:
        r, g, b = tuple(rgb)
        return "#{:02x}{:02x}{:02x}".format(int(r),int(g),int(b))
    except:
        print(f'failed rgb {rgb}')

def make_plot_dendogram(df, rgb_cols, cluster_color_col, link):
    df_ = df.copy()
    clustered_leaf_labels = df_[cluster_color_col].values
    unclustered_leaf_labels = list(map(rgb2hex, df_[rgb_cols].values))

    plt.clf()
    fig = plt.figure(figsize=(25, 10))
    dn = sch.dendrogram(link )
    ax = plt.gca()

    x_points = ax.get_xticks()
    y_points = np.zeros(len(x_points))
    xlbls = ax.get_xmajorticklabels()
    # print(ax.get_position())
    color_idx = []
    clustered_colors_list = []
    unclustered_colors_list = []

    # retrieve clustered colors and unclustered colors
    for lbl in xlbls:
        color_idx.append(int(lbl.get_text()))
        clustered_colors_list.append(clustered_leaf_labels[int(lbl.get_text())])
        unclustered_colors_list.append(unclustered_leaf_labels[int(lbl.get_text())])
    # get axis position [x0,y0,width,height]
    ax_bbox = ax.get_position()
    cax1 = fig.add_axes([ax_bbox.x0, 0.01, ax_bbox.width, 0.05])

    # create custom colorbar
    cm1 = LinearSegmentedColormap.from_list('custom_colormap', clustered_colors_list, N=len(clustered_colors_list))
    sc1 = plt.scatter(x_points, y_points, c=color_idx, cmap=cm1)
    clustered_cb = plt.colorbar(sc1, cax=cax1,  orientation='horizontal')

    # set the size of nex axis [x0,y0,width,height]
    cax2 = fig.add_axes([ax_bbox.x0, 0.065, ax_bbox.width, 0.05])
    cm2 = LinearSegmentedColormap.from_list('custom_colormap', unclustered_colors_list, N=len(unclustered_colors_list))
    sc2 = plt.scatter(x_points, y_points, c=color_idx, cmap=cm2)
    unclustered_cb = plt.colorbar(sc2, cax=cax2,  orientation='horizontal', pad = 0.3)

    # remove ticks and labels
    ax.set_xticklabels([])

    # clustered_cb.ax.set_ylabel(ylabel='Clustered', rotation=270, loc='center', fontsize=20, labelpad=25)
    # unclustered_cb.ax.set_ylabel(ylabel='Unclustered', rotation=270, loc='center', fontsize=20, labelpad=25)
    clustered_cb.ax.tick_params(size=0)
    unclustered_cb.ax.tick_params(size=0)
    clustered_cb.ax.set_xticklabels([])
    unclustered_cb.ax.set_xticklabels([])
    plt.show()

def ColorsSequence3DScatterPerCluster(df):
    df_ = df.copy()
    clusters = df_['Clusters'].drop_duplicates()
    for ii in range(clusters.shape[0]):
        colors = df_.query('Clusters == {}'.format(ii))
        #color_rep = colors.sample(n=1, random_state = 1).values
        #color_rep = color_rep[0][0:3]
        color_rep = np.around(np.mean(colors.values[:,0:3], axis=0).astype(float)).astype(int)
        df_.loc[df_['Clusters'] == ii, 'ClusterColor'] = rgb2hex(color_rep)

    return df_

def mapClusters(df, link, n_clusters = 2, height = None):
    df_ = df.copy()
    if height == None:
        clusters = sch.cut_tree(Z=link, n_clusters=n_clusters)
    else:
        clusters = sch.cut_tree(Z=link, height = height) 
    df_['Clusters'] = clusters
    return df_

def cluster_center(df, center_cols, n_cluster, plot_dendogram = True):
    df_ = df.copy()
    df_center_unique = df_[center_cols].drop_duplicates()
    link = sch.linkage(df_center_unique, method='ward')
    df_center_unique = ColorsSequence3DScatterPerCluster(mapClusters(df_center_unique, link, n_cluster))
    df__ = pd.merge(df_, df_center_unique, how='right', on=center_cols).drop_duplicates(subset=center_cols)
    df_ = pd.merge(df_, df_center_unique, how='left', on=center_cols)

    if plot_dendogram:
        make_plot_dendogram(df=df__, rgb_cols=center_cols, cluster_color_col='ClusterColor', link=link)
    return(df_)

In [None]:
# Plot the dendogram of unique centers and their representative colors. 
# Representative via average of all the RGB in that cluster 
cluster_center_df_A = cluster_center(df=center_df_A, center_cols=['R0 center', 'G0 center', 'B0 center'], n_cluster=15)

In [None]:
cluster_center_df_B = cluster_center(df=center_df_B, center_cols=['R1 center', 'G1 center', 'B1 center'], n_cluster=15)

In [None]:
cluster_center_df_C = cluster_center(df=center_df_C, center_cols=['R2 center', 'G2 center', 'B2 center'], n_cluster=15)

In [None]:
cluster_center_df_D = cluster_center(df=center_df_D, center_cols=['R3 center', 'G3 center', 'B3 center'], n_cluster=15)

In [None]:
cluster_center_df_E = cluster_center(df=center_df_E, center_cols=['R4 center', 'G4 center', 'B4 center'], n_cluster=15)

In [None]:
cluster_center_df_A_1 = cluster_center(df=center_df_A, center_cols=['R0 center', 'G0 center', 'B0 center'], n_cluster=1)

In [None]:
cluster_center_df_B_1 = cluster_center(df=center_df_B, center_cols=['R1 center', 'G1 center', 'B1 center'], n_cluster=1)

In [None]:
cluster_center_df_C_1 = cluster_center(df=center_df_C, center_cols=['R2 center', 'G2 center', 'B2 center'], n_cluster=1)

In [None]:
cluster_center_df_D_1 = cluster_center(df=center_df_D, center_cols=['R3 center', 'G3 center', 'B3 center'], n_cluster=1)

In [None]:
cluster_center_df_E_1 = cluster_center(df=center_df_E, center_cols=['R4 center', 'G4 center', 'B4 center'], n_cluster=1)

In [None]:
print(cluster_center_df_A[['R0 center', 'G0 center', 'B0 center']].mean().round().astype(int))
print(cluster_center_df_A_1["ClusterColor"].value_counts())

In [None]:
print(cluster_center_df_B[['R1 center', 'G1 center', 'B1 center']].mean().round().astype(int))
print(cluster_center_df_B_1["ClusterColor"].value_counts())

In [None]:
print(cluster_center_df_C[['R2 center', 'G2 center', 'B2 center']].mean().round().astype(int))
print(cluster_center_df_C_1["ClusterColor"].value_counts())

In [None]:
print(cluster_center_df_D[['R3 center', 'G3 center', 'B3 center']].mean().round().astype(int))
print(cluster_center_df_D_1["ClusterColor"].value_counts())

In [None]:
print(cluster_center_df_E[['R4 center', 'G4 center', 'B4 center']].mean().round().astype(int))
print(cluster_center_df_E_1["ClusterColor"].value_counts())

## 3D Plot

In [None]:
def plot_3D_centers(df, center_cols, color_col, cluster_col):
    """
    Plot the 3D centers
    """
    df_ = df.copy()
    x, y, z = center_cols
    fig = go.Figure()
    fig.add_trace(
        go.Scatter3d(
            x=df_[x],
            y=df_[y],
            z=df_[z],
            # customdata=np.stack((df_by_continent['country'], df_by_continent['pop']), axis=-1),
            customdata= df_[[cluster_col]].values,
            mode='markers',
            opacity=0.7,
            marker={'size': 8, 'color':df_[color_col]},
            hovertemplate='<b>x</b>: %{x}<br>' +
                          '<b>y</b>: %{y}<br>' +
                          '<b>z</b>: %{z}<br>' +
                          '<b>cluster</b>: %{customdata[0]}<br>' +
                          '<extra></extra>' 
        )
    )

    fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
    fig.show()

In [None]:
avg_RGB = np.matrix([[154, 177, 142, 1],
                     [169, 103, 59, 2],
                     [191, 133, 100, 3],
                     [137, 130, 135, 4],
                     [161, 151, 126, 5]])
avg_RGB_df = pd.DataFrame(avg_RGB, columns=["avg_R", "avg_G", "avg_B", "Clusters"])

In [None]:
ClusterColor = []
for i in range(len(avg_RGB_df)):
    hex = rgb2hex(avg_RGB_df[['avg_R', 'avg_G', 'avg_B']].to_numpy()[i])
    ClusterColor.append(hex)
    
avg_RGB_df["ClusterColor"] = ClusterColor

In [None]:
plot_3D_centers(df=avg_RGB_df, center_cols=['avg_R', 'avg_G', 'avg_B'], color_col='ClusterColor', cluster_col='Clusters')

In [None]:
avg_RGB_df

In [None]:
plot_3D_centers(df=cluster_center_df_A, center_cols=['R0 center', 'G0 center', 'B0 center'], color_col='ClusterColor', cluster_col='Clusters')

In [None]:
plot_3D_centers(df=cluster_center_df_B, center_cols=['R1 center', 'G1 center', 'B1 center'], color_col='ClusterColor', cluster_col='Clusters')

In [None]:
plot_3D_centers(df=cluster_center_df_C, center_cols=['R2 center', 'G2 center', 'B2 center'], color_col='ClusterColor', cluster_col='Clusters')

In [None]:
plot_3D_centers(df=cluster_center_df_D, center_cols=['R3 center', 'G3 center', 'B3 center'], color_col='ClusterColor', cluster_col='Clusters')

In [None]:
 plot_3D_centers(df=cluster_center_df_E, center_cols=['R4 center', 'G4 center', 'B4 center'], color_col='ClusterColor', cluster_col='Clusters')

## Comparisons

In [None]:
import matplotlib.patches as matpatches

def plotRGB(rgb_value):
    if (not isinstance(rgb_value, list)) or (not isinstance(rgb_value, tuple)):
        rgb_value = tuple(rgb_value)
    rect1 = matpatches.Rectangle((0,0), 10, 10, color=rgb2hex(rgb_value))
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.add_patch(rect1)
    plt.show()

plotRGB(rgb_value=[106,13,173])

In [None]:
def changeRGB4Image(df, rgb_cols, center_cols, cluster_color_dict, cluster_col, img_shape):
    df_ = df.copy()
    img = np.array(df_[rgb_cols]).reshape([img_shape[0], img_shape[1], 3])

    new_img = np.copy(img)
    channels_arr = df_[center_cols].values

    len_cluster_match= {}
    for key, val in cluster_color_dict.items():
        indx = df_.query(f'{cluster_col} =={key}').index
        len_cluster_match[key] = [len(indx), indx]
        channels_arr[indx] = val
        new_img[:,:,0:3]= channels_arr[:,0:3].reshape([img_shape[0], img_shape[1],3])
    DATA = np.array([img, new_img])
    fig = px.imshow(DATA, facet_col=0)
    
    fig.show()
    return(len_cluster_match)

In [None]:
# Redefine function to find the representative cluster color
def ColorsSequence3DScatterPerCluster(df):
    df_ = df.copy()
    clusters = df_['Clusters'].drop_duplicates()
    for ii in range(clusters.shape[0]):
        colors = df_.query('Clusters == {}'.format(ii))
        color_rep = colors.sample(n=1, random_state = 260).values
        color_rep = color_rep[0][0:3]
        df_.loc[df_['Clusters'] == ii, 'ClusterColor'] = rgb2hex(color_rep)

    return df_

In [None]:
cluster_center_df_C_10 = cluster_center(df=center_df_C, center_cols=['R2 center', 'G2 center', 'B2 center'], n_cluster=10)

In [None]:
cluster_center_df_C_10['ClusterColor'].unique()

In [None]:
cluster_center_df_C_10['ClusterColor'].value_counts()

In [None]:
for cluster_indx in range(10):
    clusterIndColorPairs = { cluster_indx:[106,13,173]} #color the point that matches the cluster with purple
    len_cluster_match = changeRGB4Image(df=cluster_center_df_C_10, rgb_cols=['R2', 'G2', 'B2'], 
                                        center_cols=['R2 center', 'G2 center', 'B2 center'],
                                        cluster_color_dict=clusterIndColorPairs, cluster_col='Clusters', img_shape=img_shape)

In [None]:
cluster_center_df_C_10.loc[(cluster_center_df_C_10["Clusters"] == 1)].head() #hair #cdaf37

In [None]:
cluster_center_df_C_10.loc[(cluster_center_df_C_10["Clusters"] == 9)] #lip #b9372d

### Hair

In [None]:
cluster_center_df_C_10.loc[(cluster_center_df_C_10["Clusters"] == 1)].describe()

In [None]:
cluster_center_df_C["Class"] = 999
cluster_center_df_C.loc[((cluster_center_df_C["R2 center"] >= 175) & (cluster_center_df_C["R2 center"] <= 235)) & 
                        ((cluster_center_df_C["G2 center"] >= 145) & (cluster_center_df_C["G2 center"] <= 195)) & 
                        ((cluster_center_df_C["B2 center"] >= 55) & (cluster_center_df_C["B2 center"] <= 135)), "Class"] = 1
cluster_center_df_C["Class"].value_counts()

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_C, rgb_cols=['R2', 'G2', 'B2'], 
                                    center_cols=['R2 center', 'G2 center', 'B2 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class', img_shape=img_shape)

In [None]:
cluster_center_df_A["Class"] = 999
cluster_center_df_A.loc[((cluster_center_df_A["R0 center"] >= 175) & (cluster_center_df_A["R0 center"] <= 235)) & 
                        ((cluster_center_df_A["G0 center"] >= 145) & (cluster_center_df_A["G0 center"] <= 195)) & 
                        ((cluster_center_df_A["B0 center"] >= 55) & (cluster_center_df_A["B0 center"] <= 135)), "Class"] = 1
cluster_center_df_A["Class"].value_counts()

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_A, rgb_cols=['R0', 'G0', 'B0'], 
                                    center_cols=['R0 center', 'G0 center', 'B0 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class', img_shape=img_shape)

In [None]:
cluster_center_df_B["Class"] = 999
cluster_center_df_B.loc[((cluster_center_df_B["R1 center"] >= 175) & (cluster_center_df_B["R1 center"] <= 235)) & 
                        ((cluster_center_df_B["G1 center"] >= 145) & (cluster_center_df_B["G1 center"] <= 195)) & 
                        ((cluster_center_df_B["B1 center"] >= 55) & (cluster_center_df_B["B1 center"] <= 135)), "Class"] = 1
cluster_center_df_B["Class"].value_counts()

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_B, rgb_cols=['R1', 'G1', 'B1'], 
                                    center_cols=['R1 center', 'G1 center', 'B1 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class', img_shape=img_shape)

In [None]:
cluster_center_df_D["Class"] = 999
cluster_center_df_D.loc[((cluster_center_df_D["R3 center"] >= 175) & (cluster_center_df_D["R3 center"] <= 235)) & 
                        ((cluster_center_df_D["G3 center"] >= 145) & (cluster_center_df_D["G3 center"] <= 195)) & 
                        ((cluster_center_df_D["B3 center"] >= 55) & (cluster_center_df_D["B3 center"] <= 135)) &
                        (cluster_center_df_D["ClusterColor"] != "#c07d54"), "Class"] = 1
cluster_center_df_D["Class"].value_counts()

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_D, rgb_cols=['R3', 'G3', 'B3'], 
                                    center_cols=['R3 center', 'G3 center', 'B3 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class', img_shape=img_shape)

In [None]:
cluster_center_df_E["Class"] = 999
cluster_center_df_E.loc[((cluster_center_df_E["R4 center"] >= 175) & (cluster_center_df_E["R4 center"] <= 235)) & 
                        ((cluster_center_df_E["G4 center"] >= 145) & (cluster_center_df_E["G4 center"] <= 195)) & 
                        ((cluster_center_df_E["B4 center"] >= 55) & (cluster_center_df_E["B4 center"] <= 135)), "Class"] = 1
cluster_center_df_E["Class"].value_counts()

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_E, rgb_cols=['R4', 'G4', 'B4'], 
                                    center_cols=['R4 center', 'G4 center', 'B4 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class', img_shape=img_shape)

In [None]:
# 3D plot for C, A, D

In [None]:
hair_df_C = cluster_center_df_C_10.loc[(cluster_center_df_C_10["Clusters"] == 1)] #yellow

In [None]:
hair_df_C.head() ##cdaf37

In [None]:
cluster_center_df_A_10 = cluster_center(df=center_df_A, center_cols=['R0 center', 'G0 center', 'B0 center'], n_cluster=10)

In [None]:
cluster_center_df_D_10 = cluster_center(df=center_df_D, center_cols=['R3 center', 'G3 center', 'B3 center'], n_cluster=13)

In [None]:
cluster_center_df_A_10['ClusterColor'].unique() ##f5e119

In [None]:
cluster_center_df_D_10['ClusterColor'].unique() ##e1b969

In [None]:
hair_df_A = cluster_center_df_A_10.loc[(cluster_center_df_A_10["ClusterColor"]=="#f5e119")]
hair_df_D = cluster_center_df_D_10.loc[(cluster_center_df_D_10["ClusterColor"]=="#e1b969")]

In [None]:
hair_df_C.head(1)

In [None]:
hair_df_A.head(1)

In [None]:
hair_df_D.head(1)

In [None]:
hair_df_C.rename(columns={"R2 center":"R center", "G2 center":"G center", "B2 center":"B center"}, inplace=True)
hair_df_A.rename(columns={"R0 center":"R center", "G0 center":"G center", "B0 center":"B center"}, inplace=True)
hair_df_D.rename(columns={"R3 center":"R center", "G3 center":"G center", "B3 center":"B center"}, inplace=True)

hair_df_C.drop(["R2", "G2", "B2"], axis=1, inplace=True)
hair_df_A.drop(["R0", "G0", "B0"], axis=1, inplace=True)
hair_df_D.drop(["R3", "G3", "B3"], axis=1, inplace=True)

In [None]:
hair_df_CA = pd.concat([hair_df_C, hair_df_A], axis=0, ignore_index=True)
hair_df_CD = pd.concat([hair_df_C, hair_df_D], axis=0, ignore_index=True)
hair_df_CAD = pd.concat([hair_df_CA, hair_df_D], axis=0, ignore_index=True)

In [None]:
hair_df_CAD["Clusters"].unique()

In [None]:
 plot_3D_centers(df=hair_df_CA, center_cols=['R center', 'G center', 'B center'], color_col='ClusterColor', cluster_col='Clusters')

In [None]:
plot_3D_centers(df=hair_df_CD, center_cols=['R center', 'G center', 'B center'], color_col='ClusterColor', cluster_col='Clusters')

### Lip

In [None]:
cluster_center_df_C_10.loc[(cluster_center_df_C_10["Clusters"] == 9)].describe()

In [None]:
cluster_center_df_C["Class2"] = 999
cluster_center_df_C.loc[((cluster_center_df_C["R2 center"] >= 135) & (cluster_center_df_C["R2 center"] <= 195)) & 
                        ((cluster_center_df_C["G2 center"] >= 35) & (cluster_center_df_C["G2 center"] <= 85)) & 
                        ((cluster_center_df_C["B2 center"] >= 25) & (cluster_center_df_C["B2 center"] <= 85)), "Class2"] = 1
cluster_center_df_C["Class2"].value_counts()

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_C, rgb_cols=['R2', 'G2', 'B2'], 
                                    center_cols=['R2 center', 'G2 center', 'B2 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class2', img_shape=img_shape)

In [None]:
cluster_center_df_A["Class2"] = 999
cluster_center_df_A.loc[((cluster_center_df_A["R0 center"] >= 135) & (cluster_center_df_A["R0 center"] <= 195)) & 
                        ((cluster_center_df_A["G0 center"] >= 35) & (cluster_center_df_A["G0 center"] <= 85)) & 
                        ((cluster_center_df_A["B0 center"] >= 25) & (cluster_center_df_A["B0 center"] <= 85)), "Class2"] = 1
cluster_center_df_A["Class2"].value_counts()

In [None]:
cluster_center_df_B["Class2"] = 999
cluster_center_df_B.loc[((cluster_center_df_B["R1 center"] >= 135) & (cluster_center_df_B["R1 center"] <= 195)) & 
                        ((cluster_center_df_B["G1 center"] >= 35) & (cluster_center_df_B["G1 center"] <= 85)) & 
                        ((cluster_center_df_B["B1 center"] >= 25) & (cluster_center_df_B["B1 center"] <= 85)), "Class2"] = 1
cluster_center_df_B["Class2"].value_counts()

In [None]:
cluster_center_df_D["Class2"] = 999
cluster_center_df_D.loc[((cluster_center_df_D["R3 center"] >= 135) & (cluster_center_df_D["R3 center"] <= 195)) & 
                        ((cluster_center_df_D["G3 center"] >= 35) & (cluster_center_df_D["G3 center"] <= 85)) & 
                        ((cluster_center_df_D["B3 center"] >= 25) & (cluster_center_df_D["B3 center"] <= 85)), "Class2"] = 1
cluster_center_df_D["Class2"].value_counts()

In [None]:
cluster_center_df_E["Class2"] = 999
cluster_center_df_E.loc[((cluster_center_df_E["R4 center"] >= 135) & (cluster_center_df_E["R4 center"] <= 195)) & 
                        ((cluster_center_df_E["G4 center"] >= 35) & (cluster_center_df_E["G4 center"] <= 85)) & 
                        ((cluster_center_df_E["B4 center"] >= 25) & (cluster_center_df_E["B4 center"] <= 85)), "Class2"] = 1
cluster_center_df_E["Class2"].value_counts()

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_A, rgb_cols=['R0', 'G0', 'B0'], 
                                    center_cols=['R0 center', 'G0 center', 'B0 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class2', img_shape=img_shape)

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_B, rgb_cols=['R1', 'G1', 'B1'], 
                                    center_cols=['R1 center', 'G1 center', 'B1 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class2', img_shape=img_shape)

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_D, rgb_cols=['R3', 'G3', 'B3'], 
                                    center_cols=['R3 center', 'G3 center', 'B3 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class2', img_shape=img_shape)

In [None]:
clusterIndColorPairs = {1:[106,13,173]} #color the point that matches the cluster with purple
len_cluster_match = changeRGB4Image(df=cluster_center_df_E, rgb_cols=['R4', 'G4', 'B4'], 
                                    center_cols=['R4 center', 'G4 center', 'B4 center'],
                                    cluster_color_dict=clusterIndColorPairs, 
                                    cluster_col='Class2', img_shape=img_shape)

In [None]:
color = []
for i in range(len(cluster_center_df_C)):
    hex = rgb2hex(cluster_center_df_C[['R2 center', 'G2 center', 'B2 center']].to_numpy()[i])
    color.append(hex)

In [None]:
cluster_center_df_C["Colors"] = color

In [None]:
cluster_center_df_C["Colors"].value_counts()

In [None]:
cluster_center_df_C["Class"] = 999
cluster_center_df_C.loc[(cluster_center_df_C["Colors"] == "#cdaf41"), "Class"] = 1

In [None]:
cluster_center_df_C["Class"].value_counts()