In [153]:
import pandas as pd
import itertools
import random
random.seed(0)

In [154]:
hobbies_dict = {
    "Sport": [
        "Tennis", "Soccer", "Basketball", "Baseball", "Volleyball",
        "Swimming", "Cycling", "Running", "Hiking", "Surfing",
        "Snowboarding", "Skiing", "Golf", "Boxing", "Martial Arts",
        "Rock Climbing", "Horseback Riding", "Bowling", "Yoga", "Pilates"
    ],
    "Music": [
        "Playing Guitar", "Playing Piano", "Playing Drums", "Singing",
        "Composing Music", "DJing", "Producing Music", "Writing Songs",
        "Attending Concerts", "Listening to Music", "Music Theory Study",
        "Playing Violin", "Playing Saxophone", "Karaoke", "Music Critique",
        "Playing Flute", "Playing Trumpet", "Playing Cello", "Music Collecting", "Choir Singing"
    ],
    "Art": [
        "Painting", "Drawing", "Sculpting", "Photography",
        "Graphic Design", "Animation", "Calligraphy", "Printmaking",
        "Fashion Design", "Interior Design", "Ceramics", "Woodworking",
        "Jewelry Making", "Digital Art", "Street Art", "Mosaics",
        "Film Making", "Theater", "Dance", "Architecture"
    ],
    "Travel": [
        "Backpacking", "Cruising", "Road Tripping", "Mountain Climbing",
        "Cultural Tourism", "Eco-Tourism", "Adventure Tourism", "Volunteer Tourism",
        "Historical Tourism", "Luxury Travel", "Camping", "Beach Holidays",
        "City Breaks", "Safari", "Ski Trips", "Food Tourism",
        "Island Hopping", "Scuba Diving", "Spa Visits", "Business Travel"
    ]
}


In [155]:
ethnicities = ["White", "South Asian", "East Asian", "Black", "Hispanic"]
genders = ["Man", "Woman", "Androgynous Person"]
hobbies = ["Sport", "Music", "Art", "Travel"]
age_ranges = ["20-30", "31-40", "41-50"]
tone = ["fun", "serious"]
size = ["thin", "mid-size", "plus-size"]
combinations = list(itertools.product(ethnicities, genders, hobbies, age_ranges, size, tone))
df = pd.DataFrame(combinations, columns=["Ethnicity", "Gender", "Hobby", "Age Range", "Size", "Tone"])
# df['Profile_ID'] = range(1, len(df) + 1)

In [156]:
def get_two_specific_hobbies(hobby_category):
    sub_hobbies = hobbies_dict[hobby_category]
    return random.sample(sub_hobbies, 2)

df[['SpecificHobby1', 'SpecificHobby2']] = df['Hobby'].apply(lambda x: pd.Series(get_two_specific_hobbies(x)))


def get_random_age_from_range(age_range):
    start_age, end_age = map(int, age_range.split('-'))
    return random.randint(start_age, end_age)

df['Age'] = df['Age Range'].apply(get_random_age_from_range)

In [157]:
eye_colors = {
    'default': ["brown", "green", "hazel"],
    'White': ["brown", "blue", "green", "hazel"]
}
hair_colors = {
    'default': ["black", "brown"],
    'White': ["black", "blonde", "brown", "red"]
}
dyed_hair_colors = ["dyed blonde", "dyed red"]
hair_lengths = ["short", "medium length", "long"]
hair_texture = ["curly", "wavy", "straight"]
clothing_styles = ["casual", "formal", "sporty", "vintage"]
nose_sizes = ["small", "medium", "large"]
eye_sizes = ["small", "medium", "large"]
lip_sizes = ["narrow", "medium", "full"]

random.seed(0)

def assign_features(row):
    if row['Ethnicity'] in ['Black', 'East Asian', 'South Asian']:
        hair_color = random.choice(dyed_hair_colors)
        eye_color = random.choice(eye_colors['default'])
    else:
        hair_color = random.choice(hair_colors['White'])
        eye_color = random.choice(eye_colors['White'])
    
    return pd.Series({
        'Eye Color': eye_color,
        'Hair Color': hair_color,
        'Hair Length': random.choice(hair_lengths),
        'Hair Texture': random.choice(hair_texture),
        'Clothing Style': random.choice(clothing_styles),
        'Nose Size': random.choice(nose_sizes),
        'Eye Size': random.choice(eye_sizes),
        'Lip Size': random.choice(lip_sizes)
    })

df[['Eye Color', 'Hair Color', 'Hair Length', 'Hair Texture', 'Clothing Style', 'Nose Size', 'Eye Size', 'Lip Size']] = df.apply(assign_features, axis=1)

def create_detailed_prompt(row):
    return (
        f"Create a photo-realistic profile picture for a {row['Size']}, {row['Age']} year-old {row['Ethnicity']} {row['Gender'].lower()} with "
        f"{row['Eye Size']} {row['Eye Color']} eyes, {row['Hair Length']} "
        f"{row['Hair Color']} {row['Hair Texture']} hair, "
        f"a {row['Nose Size']} nose, and {row['Lip Size']} lips, "
        f"dressed in {row['Clothing Style']} style, standing in front of a white backdrop. "
        f"Make the image look like a realistic person, not following beauty standards."
    )

df['Dalle Prompt'] = df.apply(create_detailed_prompt, axis=1)

df

Unnamed: 0,Ethnicity,Gender,Hobby,Age Range,Size,Tone,SpecificHobby1,SpecificHobby2,Age,Eye Color,Hair Color,Hair Length,Hair Texture,Clothing Style,Nose Size,Eye Size,Lip Size,Dalle Prompt
0,White,Man,Sport,20-30,thin,fun,Golf,Boxing,21,hazel,red,short,wavy,vintage,medium,medium,medium,Create a photo-realistic profile picture for a...
1,White,Man,Sport,20-30,thin,serious,Soccer,Hiking,28,blue,brown,long,curly,sporty,small,small,full,Create a photo-realistic profile picture for a...
2,White,Man,Sport,20-30,mid-size,fun,Horseback Riding,Rock Climbing,23,blue,brown,medium length,curly,casual,large,medium,medium,Create a photo-realistic profile picture for a...
3,White,Man,Sport,20-30,mid-size,serious,Golf,Surfing,22,green,black,medium length,wavy,formal,large,medium,medium,Create a photo-realistic profile picture for a...
4,White,Man,Sport,20-30,plus-size,fun,Rock Climbing,Skiing,30,brown,brown,long,curly,casual,large,medium,full,Create a photo-realistic profile picture for a...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,Hispanic,Androgynous Person,Travel,41-50,thin,serious,Mountain Climbing,Island Hopping,42,blue,black,short,curly,sporty,medium,medium,medium,Create a photo-realistic profile picture for a...
1076,Hispanic,Androgynous Person,Travel,41-50,mid-size,fun,Road Tripping,Spa Visits,41,brown,red,long,wavy,formal,small,medium,full,Create a photo-realistic profile picture for a...
1077,Hispanic,Androgynous Person,Travel,41-50,mid-size,serious,Cultural Tourism,Food Tourism,42,blue,blonde,medium length,curly,sporty,large,large,narrow,Create a photo-realistic profile picture for a...
1078,Hispanic,Androgynous Person,Travel,41-50,plus-size,fun,Cruising,Island Hopping,46,green,red,short,straight,vintage,large,small,medium,Create a photo-realistic profile picture for a...


In [159]:
df.iloc[1000]["Dalle Prompt"]

'Create a photo-realistic profile picture for a plus-size, 40 year-old Hispanic woman with large hazel eyes, short red curly hair, a large nose, and narrow lips, dressed in sporty style, standing in front of a white backdrop. Make the image look like a realistic person, not following beauty standards.'