In [16]:

# imports

import numpy as np
import cv2
import os
import pandas as pd
from sklearn.cluster import KMeans
from collections import Counter
import colorsys

In [29]:
# use a openCV cascade to find all the faces in an image
# input:  image (already read in with cv2)
#         cascade file
# output: LIST of tuples, (x, y, width, height) for a box that encases each face

CASCADE = "insumos-python/haarcascade_frontalface_default.xml"

def detect_faces(image, cascade_path=CASCADE):
    
    # read in the cascade
    face_cascade = cv2.CascadeClassifier(cascade_path)
    
    # convert the image to grayscale-- this makes processing easier
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # detect the faces with adjust params
    faces = face_cascade.detectMultiScale(
            grayscale_image,
            scaleFactor=1.1,
            minNeighbors=5)
    
    # output a LIST of tuples, each representing a face
    return faces

In [30]:
# find the faces in a single image, and crop the photo down around them
# input:  image path
# output: None
#         saves the cropped images to the "faces" directory

OUTPUT = "fotos/caras/{}_{}.jpg"

def get_faces_in_image(image_path, output_format=OUTPUT):
    
    # read in the image
    # path is in the form: "all_pictures/covers/vvv_ii_yyyy.jpg
    image_name = image_path.split("/")[1].split(".")[0]
    image = cv2.imread(image_path)
    print(image_name)
    faces = [detect_faces(image)[0]]
        
    # save each face as its own image
    face_number = 0
    for (x, y, width, height) in faces:
        
        # openCV treats each image as a numpy array, so you can "slice" the pixels
        cropped = image[y : y + height, x : x + width]
        
        # record how many faces are in each picture
        file_name = output_format.format(image_name, face_number)
        cv2.imwrite(file_name, cropped)
        face_number += 1
    
    # were there any faces found?
    return (face_number != 0)

In [31]:

DIR = "photos"
    
def get_all_faces(directory=DIR):
    
    # loop through all cover images
    for image in os.listdir(directory):
        
        # image paths from os.listdir don't include the name of the directory
        # so it needs to be added in
        image_path = "{}/{}".format(DIR, image)
        
        # save the cropped faces of each image
        get_faces_in_image(image_path)

In [32]:
get_all_faces()


1608__ERNESTO_NUNEZ_AGUILAR__SIGAMOS_HACIENDO_HISTORIA
0503__THEODOROS_KALIONCHIZ_DE_LA_FUENTE__FUERZA_Y_CORAZÓN_POR_MÉXICO
2104__JUAN_ANTONIO_GONZALEZ_HERNANDEZ__SIGAMOS_HACIENDO_HISTORIA
3007__MONICA_HERRERA_VILLAVICENCIO__SIGAMOS_HACIENDO_HISTORIA
0704__JOAQUÍN_ZEBADÚA_ALVA__SIGAMOS_HACIENDO_HISTORIA
3018__Benito_Aguas_Atlahua__MORENA
2107__CLAUDIA_RIVERA_VIVANCO__SIGAMOS_HACIENDO_HISTORIA
0204__SOCORRO_IRMA_ANDAZOLA_GÓMEZ__Morena
0702__KARINA_MARGARITA_DEL_RIO_ZENTENO__SIGAMOS_HACIENDO_HISTORIA
1515__JOSEFINA_ANAYA_MARTINEZ__SIGAMOS_HACIENDO_HISTORIA
2108__Carlos_Ignacio_Mier_BaÃ±uelos__PVEM
3006__JAIME_HUMBERTO_PEREZ_BERNABE__SIGAMOS_HACIENDO_HISTORIA
1801__ANY_MARILU_PORRAS_BAYLON__SIGAMOS_HACIENDO_HISTORIA
1611__VANESSA_LÓPEZ_CARRILLO__SIGAMOS_HACIENDO_HISTORIA
1105__ECTOR_JAIME_RAMIREZ_BARBA__FUERZA_Y_CORAZON_POR_MEXICO
1402__TECUTLI_JOSÉ_GUADALUPE_GÓMEZ_VILLALOBOS__MC
1532__LUIS_ENRIQUE_MARTINEZ_VENTURA__SIGAMOS_HACIENDO_HISTORIA
0913__Francisco_Javier_SÃ¡nchez_Cervantes__PT
1

# Obteniending la piel

In [58]:
def rgb_to_hex(rgb):
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]), int(rgb[1]), int(rgb[2]))



In [59]:
rgb_to_hex([10,10,10])

'#0a0a0a'

In [65]:
def cluster_face(ruta,archivo):
    
    image = cv2.imread(ruta+archivo)
    
    # save a patch of pixels from the center of the pic to identify the skin label
    (height, width, three) = image.shape
    (center_x, center_y) = (width // 2, height // 2)
    patch = image[center_x - 10 : center_x + 10, center_y - 10 : center_y + 10]
    flattened_patch = patch.transpose(2, 0, 1). reshape(3, -1).transpose()
    
    # remove white pixels, and fit a classifier
    flattened = image.transpose(2, 0, 1).reshape(3, -1).transpose()
    pixeles = np.array([pixel for pixel in flattened ])
    k_means = KMeans(n_clusters=2)
    k_means.fit(pixeles)
    
    # identify the skin label
    patch_labels = k_means.predict(flattened_patch)
    skin_label = Counter(patch_labels).most_common()[0][0]
    
    # save the average skin pixel
    all_labels = k_means.predict(pixeles)
    skin_mask = (all_labels == skin_label)
    skin_pixels = pixeles[skin_mask]
    para_exportar = [all_labels]

    mascara = np.array([np.array(pixeles[i_pix]) if skin_mask[i_pix] else np.array([255,255,225]) for i_pix in range(len(pixeles)) ]).reshape(width,height,3)
    average_color = np.average(skin_pixels, axis=0)
    print(archivo,average_color)

    mascara[:int(width/3),:int(height/3)] = average_color
    cv2.imwrite("fotos/color_promedio/"+archivo,mascara)
    return (rgb_to_hex(average_color),archivo.split("_")[0])

In [66]:
ruta = "fotos/caras/"
archivos = os.listdir(ruta)
diccionario_cvedis_hex={}
for archivo in archivos:
    
    color, cvedis = cluster_face(ruta,archivo)
    diccionario_cvedis_hex[cvedis] = color

0807__JESUS_ROBERTO_CORRAL_ORDONEZ__SIGAMOS_HACIENDO_HISTORIA_0.jpg [146.30355651 162.4468874  201.79261542]
3008__Jorge_Alberto_Mier_Acolt__MORENA_0.jpg [ 92.49411343 114.72334501 184.88709366]
3204__ANA_LUISA_DEL_MURO_GARCÍA__SIGAMOS_HACIENDO_HISTORIA_0.jpg [113.37536574 138.42680146 210.62582481]
1525__LEIDE_AVILES_DOMINGUEZ__SIGAMOS_HACIENDO_HISTORIA_0.jpg [104.35317604 131.52945307 188.68755267]
1607__MARCELA_VELAZQUEZ_VAZQUEZ__SIGAMOS_HACIENDO_HISTORIA_0.jpg [150.29056281 157.54802376 178.1400592 ]
1512__ARMANDO_CORONA_ARVIZU__SIGAMOS_HACIENDO_HISTORIA_0.jpg [ 97.21711742 131.37730692 191.35856522]
2005__CAROL_ANTONIO_ALTAMIRANO__MORENA_0.jpg [153.28136501 161.75225828 204.05202409]
1408__PAOLA_MILAGROS_ESPINOSA_SANCHEZ__FUERZA_Y_CORAZON_POR_MEXICO_0.jpg [167.19272072 173.34108765 191.66323011]
1103__FERNANDO_TORRES_GRACIANO__FUERZA_Y_CORAZÓN_POR_MÉXICO_0.jpg [152.9407314  169.63458036 207.00434356]
0917__CARLOS_ARTURO_MADRAZO_SILVA__SIGAMOS_HACIENDO_HISTORIA_0.jpg [174.05461165 

In [79]:
ladata = pd.read_csv("resources/datos/distdata_base.csv")
ladata.CVEDIS = ladata.CVEDIS.apply(lambda x : str(x).zfill(4))
ladata["TONO_PIEL_KMEDIAS"] = ladata.CVEDIS.map(diccionario_cvedis_hex)
ladata.set_index("CVEDIS").to_csv("resources/datos/distdata_base.csv")