In [1]:
import pandas as pd
import random
import duckdb 
from faker import Faker 

fake = Faker()

# Number of chickens
n = 1000

def generate_ids(num_chickens):
    identifiers = set() #ensure uniqueness in an empty set
    while len(identifiers) < num_chickens:
        identifier = random.randint(10000000, 99999999)
        identifiers.add(identifier)
    return list(identifiers)

chickens = generate_ids(n)
chicken_df = pd.DataFrame(chickens, columns=['chicken_id'])

In [2]:
# because we need to create lineage, i'm creating 'generations', where generation 3 is the most recent set of chickens
# gen 3's parents are gen 2, and gen 2's parents are gen 1
# assuming hens give birth to a lot of chickens, so setting a 10-30-60 ratio across gens 1 thru 3

num_gen1 = int(n * 0.1)
num_gen2 = int(n * 0.3)
num_gen3 = n - (num_gen1 + num_gen2)

generation_list = [1] * num_gen1 + [2] * num_gen2 + [3] * num_gen3
random.shuffle(generation_list)
chicken_df['generation'] = generation_list

chicken_df

Unnamed: 0,chicken_id,generation
0,59721732,3
1,11681797,3
2,40562694,3
3,16771078,3
4,84971526,2
...,...,...
995,98177004,2
996,74330097,2
997,82098166,1
998,50833399,3


In [3]:
# function to assign rooster or hen to each chicken
def assign_gender_with_split(chickens, gender_split, gender1, gender2):
    half = len(chickens) // 2
    chickens.loc[chickens.index[:half], 'gender'] = gender1
    chickens.loc[chickens.index[half:], 'gender'] = gender2
    return chickens

# ensure 50-50 split between gender within each generation
gen1_df = chicken_df[chicken_df['generation'] == 1].copy()
gen2_df = chicken_df[chicken_df['generation'] == 2].copy()
gen3_df = chicken_df[chicken_df['generation'] == 3].copy()

gen1_df = assign_gender_with_split(gen1_df, 0.5, 'Rooster', 'Hen')
gen2_df = assign_gender_with_split(gen2_df, 0.5, 'Rooster', 'Hen')
gen3_df = assign_gender_with_split(gen3_df, 0.5, 'Rooster', 'Hen')

chicken_df = pd.concat([gen1_df, gen2_df, gen3_df], ignore_index=True)
chicken_df

Unnamed: 0,chicken_id,generation,gender
0,86956099,1,Rooster
1,27424865,1,Rooster
2,42530930,1,Rooster
3,54378654,1,Rooster
4,41015487,1,Rooster
...,...,...,...
995,20131807,3,Hen
996,42010596,3,Hen
997,87465960,3,Hen
998,50833399,3,Hen


In [4]:
# validate that the function works correctly
duckdb.query("select generation, gender, count(*) as count from chicken_df group by 1,2").to_df()

Unnamed: 0,generation,gender,count
0,1,Rooster,50
1,1,Hen,50
2,2,Rooster,150
3,2,Hen,150
4,3,Rooster,300
5,3,Hen,300


In [5]:
# using the faker library to generate full names for each chicken dependent on gender
fake = Faker()

chicken_df['name'] = [
    f"{fake.first_name_male()} {fake.last_name()}" if gender == 'Rooster'
    else f"{fake.first_name_female()} {fake.last_name()}"
    for gender in chicken_df['gender']
]

chicken_df

Unnamed: 0,chicken_id,generation,gender,name
0,86956099,1,Rooster,John Johnson
1,27424865,1,Rooster,Matthew Robinson
2,42530930,1,Rooster,Bradley Simmons
3,54378654,1,Rooster,Johnathan Oconnor
4,41015487,1,Rooster,Benjamin Ramirez
...,...,...,...,...
995,20131807,3,Hen,Sarah Mccarty
996,42010596,3,Hen,Cynthia Tanner
997,87465960,3,Hen,Heather Diaz
998,50833399,3,Hen,Margaret Booth


In [6]:
# generate random feather colors for each chicken
def random_feather_color():
    colors = ['white', 'black', 'brown', 'red', 'gray', 'gold']
    return random.choice(colors)

chicken_df['feather_color'] = [random_feather_color() for i in range(len(chicken_df))]

chicken_df

Unnamed: 0,chicken_id,generation,gender,name,feather_color
0,86956099,1,Rooster,John Johnson,gold
1,27424865,1,Rooster,Matthew Robinson,red
2,42530930,1,Rooster,Bradley Simmons,gray
3,54378654,1,Rooster,Johnathan Oconnor,gray
4,41015487,1,Rooster,Benjamin Ramirez,gray
...,...,...,...,...,...
995,20131807,3,Hen,Sarah Mccarty,brown
996,42010596,3,Hen,Cynthia Tanner,black
997,87465960,3,Hen,Heather Diaz,black
998,50833399,3,Hen,Margaret Booth,red


In [7]:
# using a free API to get the billboard top 10 songs from a given date
import requests

url = "https://billboard-api2.p.rapidapi.com/hot-100"

querystring = {"date":"2023-03-01","range":"1-10"}

headers = {
	"X-RapidAPI-Key": "79daae12cdmsh6e4351700c61e60p18544ejsnb1b1f2f8b40d",
	"X-RapidAPI-Host": "billboard-api2.p.rapidapi.com"
}

response = requests.request("GET", url, headers=headers, params=querystring)

top_10 = response.json()

def create_song_list(json_data):
    content = json_data['content']
    song_list = [f"{song['title']} by {song['artist']}" for song in content.values()]
    return song_list

song_list = create_song_list(top_10)

chicken_df['favorite_song'] = [random.choice(song_list) for i in range(len(chicken_df))]

chicken_df


Unnamed: 0,chicken_id,generation,gender,name,feather_color,favorite_song
0,86956099,1,Rooster,John Johnson,gold,Unholy by Sam Smith & Kim Petras
1,27424865,1,Rooster,Matthew Robinson,red,"Boy's A Liar, Pt. 2 by PinkPantheress & Ice Spice"
2,42530930,1,Rooster,Bradley Simmons,gray,Die For You by The Weeknd
3,54378654,1,Rooster,Johnathan Oconnor,gray,Anti-Hero by Taylor Swift
4,41015487,1,Rooster,Benjamin Ramirez,gray,Die For You by The Weeknd
...,...,...,...,...,...,...
995,20131807,3,Hen,Sarah Mccarty,brown,Die For You by The Weeknd
996,42010596,3,Hen,Cynthia Tanner,black,Kill Bill by SZA
997,87465960,3,Hen,Heather Diaz,black,Cuff It by Beyonce
998,50833399,3,Hen,Margaret Booth,red,Anti-Hero by Taylor Swift


In [8]:
# function to get parent hens and roosters for each generation
def get_parents(chicken_df, current_generation):
    parents_df = chicken_df[chicken_df['generation'] == current_generation - 1]
    hens = parents_df[parents_df['gender'] == 'Hen']['chicken_id'].tolist()
    roosters = parents_df[parents_df['gender'] == 'Rooster']['chicken_id'].tolist()
    return hens, roosters

# function to assign parent IDs to each chicken
def assign_parents(chicken_df, generation, hens, roosters):
    chicken_df.loc[chicken_df['generation'] == generation, 'parent_hen_id'] = [
        random.choice(hens) for i in range(len(chicken_df[chicken_df['generation'] == generation]))]
    chicken_df.loc[chicken_df['generation'] == generation, 'parent_rooster_id'] = [
        random.choice(roosters) for i in range(len(chicken_df[chicken_df['generation'] == generation]))]

# get parent hens and roosters for each generation
gen2_hen_parents, gen2_rooster_parents = get_parents(chicken_df, 2)
gen3_hen_parents, gen3_rooster_parents = get_parents(chicken_df, 3)

# assign parent IDs for chickens in generation 2 and 3
assign_parents(chicken_df, 2, gen2_hen_parents, gen2_rooster_parents)
assign_parents(chicken_df, 3, gen3_hen_parents, gen3_rooster_parents)

# parent_hen_id and parent_rooster_id are nullable, so we need to replace NaN with -1
chicken_df[['parent_hen_id', 'parent_rooster_id']] = chicken_df[['parent_hen_id', 'parent_rooster_id']].fillna(-1)
chicken_df[['parent_hen_id', 'parent_rooster_id']] = chicken_df[['parent_hen_id', 'parent_rooster_id']].astype('int64')

chicken_df

Unnamed: 0,chicken_id,generation,gender,name,feather_color,favorite_song,parent_hen_id,parent_rooster_id
0,86956099,1,Rooster,John Johnson,gold,Unholy by Sam Smith & Kim Petras,-1,-1
1,27424865,1,Rooster,Matthew Robinson,red,"Boy's A Liar, Pt. 2 by PinkPantheress & Ice Spice",-1,-1
2,42530930,1,Rooster,Bradley Simmons,gray,Die For You by The Weeknd,-1,-1
3,54378654,1,Rooster,Johnathan Oconnor,gray,Anti-Hero by Taylor Swift,-1,-1
4,41015487,1,Rooster,Benjamin Ramirez,gray,Die For You by The Weeknd,-1,-1
...,...,...,...,...,...,...,...,...
995,20131807,3,Hen,Sarah Mccarty,brown,Die For You by The Weeknd,59393838,18090883
996,42010596,3,Hen,Cynthia Tanner,black,Kill Bill by SZA,79275226,26935504
997,87465960,3,Hen,Heather Diaz,black,Cuff It by Beyonce,26908222,28428402
998,50833399,3,Hen,Margaret Booth,red,Anti-Hero by Taylor Swift,69619260,93129404


In [9]:
# function to find first cousins for a given chicken
def find_cousins(chicken_id, df):
    chicken_df = df.loc[df['chicken_id'] == chicken_id]
    if chicken_df.empty:
        return []

    chicken_df = chicken_df.iloc[0]
    parent_hen = chicken_df['parent_hen_id']
    parent_rooster = chicken_df['parent_rooster_id']

    parent_hen_df = df.loc[df['chicken_id'] == parent_hen]
    if parent_hen_df.empty:
        return []

    parent_hen_df = parent_hen_df.iloc[0]
    parent_hen_parent_hen = parent_hen_df['parent_hen_id']  # grandmother hen side
    parent_hen_parent_rooster = parent_hen_df['parent_rooster_id']  # grandfather hen side

    parent_hen_siblings = df.loc[
        (df['parent_hen_id'] == parent_hen_parent_hen)
        & (df['parent_rooster_id'] == parent_hen_parent_rooster)
        & (df['chicken_id'] != parent_hen)
    ]

    cousins_parent_hen_side = df.loc[
        (df['parent_hen_id'].isin(parent_hen_siblings['chicken_id']))
        | (df['parent_rooster_id'].isin(parent_hen_siblings['chicken_id']))
    ]

    parent_rooster_df = df.loc[df['chicken_id'] == parent_rooster]
    if parent_rooster_df.empty:
        return []

    parent_rooster_df = parent_rooster_df.iloc[0]
    parent_rooster_parent_hen = parent_rooster_df['parent_hen_id']  # grandmother rooster side
    parent_rooster_parent_rooster = parent_rooster_df['parent_rooster_id']  # grandfather rooster side

    parent_rooster_siblings = df.loc[
        (df['parent_hen_id'] == parent_rooster_parent_hen)
        & (df['parent_rooster_id'] == parent_rooster_parent_rooster)
        & (df['chicken_id'] != parent_rooster)
    ]

    cousins_parent_rooster_side = df.loc[
        (df['parent_hen_id'].isin(parent_rooster_siblings['chicken_id']))
        | (df['parent_rooster_id'].isin(parent_rooster_siblings['chicken_id']))
    ]

    all_cousins = pd.concat([cousins_parent_rooster_side, cousins_parent_hen_side]).drop_duplicates()['chicken_id'].to_list()

    return all_cousins

# only finding cousins for chicken in gen 3, since we don't know who grandparents are of chickens in gen 2
chicken_df['cousins'] = chicken_df.apply(lambda row: find_cousins(row['chicken_id'], chicken_df) if row['generation'] == 3 else [], axis=1)

chicken_df

Unnamed: 0,chicken_id,generation,gender,name,feather_color,favorite_song,parent_hen_id,parent_rooster_id,cousins
0,86956099,1,Rooster,John Johnson,gold,Unholy by Sam Smith & Kim Petras,-1,-1,[]
1,27424865,1,Rooster,Matthew Robinson,red,"Boy's A Liar, Pt. 2 by PinkPantheress & Ice Spice",-1,-1,[]
2,42530930,1,Rooster,Bradley Simmons,gray,Die For You by The Weeknd,-1,-1,[]
3,54378654,1,Rooster,Johnathan Oconnor,gray,Anti-Hero by Taylor Swift,-1,-1,[]
4,41015487,1,Rooster,Benjamin Ramirez,gray,Die For You by The Weeknd,-1,-1,[]
...,...,...,...,...,...,...,...,...,...
995,20131807,3,Hen,Sarah Mccarty,brown,Die For You by The Weeknd,59393838,18090883,[]
996,42010596,3,Hen,Cynthia Tanner,black,Kill Bill by SZA,79275226,26935504,"[73605553, 30284402, 27377219, 91877141, 52252..."
997,87465960,3,Hen,Heather Diaz,black,Cuff It by Beyonce,26908222,28428402,[]
998,50833399,3,Hen,Margaret Booth,red,Anti-Hero by Taylor Swift,69619260,93129404,[]


In [10]:
# creating a 'random cousin' column for use in name tags

chicken_df['random_cousin'] = chicken_df['cousins'].apply(lambda x: random.choice(x) if x else None)
chicken_df['random_cousin'] = chicken_df['random_cousin'].fillna(-1)
chicken_df['random_cousin'] = chicken_df['random_cousin'].astype('int64')

chicken_df

Unnamed: 0,chicken_id,generation,gender,name,feather_color,favorite_song,parent_hen_id,parent_rooster_id,cousins,random_cousin
0,86956099,1,Rooster,John Johnson,gold,Unholy by Sam Smith & Kim Petras,-1,-1,[],-1
1,27424865,1,Rooster,Matthew Robinson,red,"Boy's A Liar, Pt. 2 by PinkPantheress & Ice Spice",-1,-1,[],-1
2,42530930,1,Rooster,Bradley Simmons,gray,Die For You by The Weeknd,-1,-1,[],-1
3,54378654,1,Rooster,Johnathan Oconnor,gray,Anti-Hero by Taylor Swift,-1,-1,[],-1
4,41015487,1,Rooster,Benjamin Ramirez,gray,Die For You by The Weeknd,-1,-1,[],-1
...,...,...,...,...,...,...,...,...,...,...
995,20131807,3,Hen,Sarah Mccarty,brown,Die For You by The Weeknd,59393838,18090883,[],-1
996,42010596,3,Hen,Cynthia Tanner,black,Kill Bill by SZA,79275226,26935504,"[73605553, 30284402, 27377219, 91877141, 52252...",27377219
997,87465960,3,Hen,Heather Diaz,black,Cuff It by Beyonce,26908222,28428402,[],-1
998,50833399,3,Hen,Margaret Booth,red,Anti-Hero by Taylor Swift,69619260,93129404,[],-1
