In [None]:
# !pip install requests
# !pip install Pillow
# !pip install deepface
# !pip install pandas

In [None]:
import os
import numpy as np
from deepface import DeepFace
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import threading

# Read CSV file
df = pd.read_csv("input.csv")

# Separate dataframes for male and female
df_male = df[df["Gender"] == "Male"]
df_female = df[df["Gender"] == "Female"]

dir_male = "images/male/"
dir_female = "images/female/"

# Create directories if they don't exist
os.makedirs(dir_male, exist_ok=True)
os.makedirs(dir_female, exist_ok=True)

# Counters for male and female names
male_counter = 0
female_counter = 0

# Locks for thread-safe counter increments
male_lock = threading.Lock()
female_lock = threading.Lock()

# Prompt the user for the acceptable age range
min_age = int(input("Enter the minimum acceptable age: "))
max_age = int(input("Enter the maximum acceptable age: "))


def process_image():
    global male_counter
    global female_counter

    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
    }
    response = requests.get(
        "https://www.thispersondoesnotexist.com", headers=headers)

    if response.status_code == 200 and "image" in response.headers["Content-Type"]:
        img = Image.open(BytesIO(response.content))

        # Crop the image
        width, height = img.size
        img = img.crop((20, 20, width - 20, height - 20))

        # Convert the PIL Image to a numpy array
        img_np = np.array(img)

        try:
        # Predict gender and age using DeepFace
        predictions = DeepFace.analyze(img_np, actions=['gender', 'age'])

        # Determine the gender based on the higher probability
        predicted_gender = "Man" if predictions[0]['gender']['Man'] > predictions[0]['gender']['Woman'] else "Woman"

        # Check if the predicted age is within the acceptable range
        predicted_age = predictions[0]['age']
        if min_age <= predicted_age <= max_age:
            # Save image in the appropriate directory
            if predicted_gender == "Man" and male_counter < len(df_male):
                with male_lock:
                    file_name = df_male.iloc[male_counter]["Name"]
                    full_file_name = dir_male + file_name + ".jpg"
                    male_counter += 1
                img.save(full_file_name)
                print(
                    f"Image {file_name} is predicted to be a {predicted_gender} of age {predicted_age} and saved to {full_file_name}"
                )
            elif predicted_gender == "Woman" and female_counter < len(df_female):
                with female_lock:
                    file_name = df_female.iloc[female_counter]["Name"]
                    full_file_name = dir_female + file_name + ".jpg"
                    female_counter += 1
                img.save(full_file_name)
                print(
                    f"Image {file_name} is predicted to be a {predicted_gender} of age {predicted_age} and saved to {full_file_name}"
                )
        else:
            print(
                f"Image is predicted to be a {predicted_gender} of age {predicted_age}, which is outside the acceptable range. Not saving the image."
            )
    except Exception as e:
        print(f"An error occurred while processing the image: {e}")


# Prompt the user for the number of threads
num_threads = int(input("How many threads do you want to run? "))

# Use ThreadPoolExecutor to process images in parallel
with ThreadPoolExecutor(max_workers=num_threads) as executor:
    tasks = {executor.submit(process_image) for _ in range(max(len(df_male), len(df_female)))}

    # Use tqdm with as_completed to track progress
    for task in tqdm(as_completed(tasks), total=len(tasks)):
        task.result()