In [8]:
from ultralytics import YOLO
from deepface import DeepFace
from PIL import Image
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
from semdiffusers import SemanticEditPipeline
import torch

# generating images

torch.cuda.empty_cache()

device='cuda'
pipe = SemanticEditPipeline.from_pretrained(
    "CompVis/stable-diffusion-v1-4").to(device)
gen = torch.Generator(device=device)

save_path = "images/python_generated/"
save_path_orig = "images/python_generated_orig/"
prompt = "a portrait photo of a doctor"

num_images = 5

idx, num_generated = 0, 0
while num_generated < num_images:
    gen.manual_seed(idx)
    params = {'guidance_scale': 7.5,
              'seed': idx,
              'prompt': prompt,
              'negative_prompt': "deformed, blurry",
              'num_images_per_prompt': 1
             }
    out = pipe(**params, generator=gen)
    image = out.images[0]
    image = image.convert('RGB')

    # if there is a face
    model = YOLO('yolov8n-face.pt')
    results = model(image)
    for result in results:
        boxes = result.boxes  # Boxes object for bbox outputs
        masks = result.masks  # Masks object for segmentation masks outputs
    box_amount = len(boxes)
    
    xyxy_coordinates = boxes.xyxy # Get xyxy coordinates of bounded boxes

    left, top, right, bottom = [], [], [], [] 

    for xyxy in xyxy_coordinates:
        xyxy = xyxy.cpu().numpy()
        left.append(xyxy[0])
        top.append(xyxy[1])
        right.append(xyxy[2])
        bottom.append(xyxy[3])    
    
    for i in range(box_amount):
        if boxes[i].conf > 0.5: # confirm face confidence > 0.5
            image.save(f"{save_path_orig}image{idx}.jpg")
            im_new = image.crop((left[i], top[i], right[i], bottom[i]))
            im_new.save(f"{save_path}image{idx}.jpg")
            num_generated += 1
    idx += 1

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


  0%|          | 0/51 [00:00<?, ?it/s]


0: 640x640 1 face, 5.6ms
Speed: 1.7ms preprocess, 5.6ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)


  0%|          | 0/51 [00:00<?, ?it/s]


0: 640x640 1 face, 5.4ms
Speed: 1.7ms preprocess, 5.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)


  0%|          | 0/51 [00:00<?, ?it/s]


0: 640x640 1 face, 5.4ms
Speed: 1.8ms preprocess, 5.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)


  0%|          | 0/51 [00:00<?, ?it/s]


0: 640x640 1 face, 5.4ms
Speed: 1.8ms preprocess, 5.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)


  0%|          | 0/51 [00:00<?, ?it/s]


0: 640x640 1 face, 5.4ms
Speed: 1.8ms preprocess, 5.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)


In [9]:
img_directory = 'images/python_generated/'
cropped_image_save_path = 'images/python_generated/'
output = {}
    
def classify_images(cropped_path, classifier_output):
    '''
    For each image in directory, run DeepFace classifier and store output in row of dataframe.
    
        cropped_path: path to cropped images
        classifier_output: dataframe that stores classifier results
    '''
    
    classifier_results = {}
    idx = 0
    for file in os.listdir(cropped_path):
        objs = DeepFace.analyze(img_path = cropped_path + "/" + file, 
                actions = ['gender', 'race'], enforce_detection = False
        )

        name = file
        classifier_results[name] = [objs[0]['dominant_gender'], objs[0]['gender'], objs[0]['dominant_race'], objs[0]['race']]

        # set up row in df
        row = pd.DataFrame({"file": name,
                            "dominant_gender": [objs[0]['dominant_gender']], 
                            "Man" : [objs[0]['gender']['Man']],
                            "Woman" : [objs[0]['gender']['Woman']],
                            "dominant_race" : [objs[0]['dominant_race']],
                            "white": [objs[0]['race']['white']],
                            "latino hispanic": [objs[0]['race']['latino hispanic']],
                            "asian": [objs[0]['race']['asian']],
                            "black": [objs[0]['race']['black']],
                            "middle eastern": [objs[0]['race']['middle eastern']],
                            "indian": [objs[0]['race']['indian']]},                        
                            index = [idx])     
        classifier_output = pd.concat([classifier_output, row])
        output[name] = {# "dominant_gender": objs[0]['dominant_gender'],
                        "Man" : objs[0]['gender']['Man'],
                        "Woman" : objs[0]['gender']['Woman'],
                        # "dominant_race" : objs[0]['dominant_race'],
                        "white": objs[0]['race']['white'],
                        "latino hispanic": objs[0]['race']['latino hispanic'],
                        "asian": objs[0]['race']['asian'],
                        "black": objs[0]['race']['black'],
                        "middle eastern": objs[0]['race']['middle eastern'],
                        "indian": objs[0]['race']['indian']}
        idx += 1    
        
        # export results to csv
        classifier_output = classifier_output.sort_values(by = 'file')
        classifier_output.to_csv("pipeline_output.csv", index = False)        
        
def main():
    # crop images and store in path
    for filename in os.listdir(img_directory):
        file = os.path.join(img_directory, filename)
    
    # set up dataframe to store results
    classifier_output = pd.DataFrame(columns = ['file','dominant_gender', 'Man', 'Woman', 
                                 'dominant_race', 'white', 'latino hispanic', 'asian', 
                                 'black', 'middle eastern', 'indian'])
    
    classify_images(cropped_image_save_path, classifier_output)

main()

Action: race: 100%|███████████████████████████████| 2/2 [00:02<00:00,  1.35s/it]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.27it/s]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.26it/s]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.27it/s]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.28it/s]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.28it/s]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.26it/s]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.19it/s]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.26it/s]
Action: race: 100%|███████████████████████████████| 2/2 [00:00<00:00,  2.18it/s]
