In [3]:
import os
import time
import json
from dotenv import load_dotenv
import pandas as pd
from google import genai


In [4]:
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
client = genai.Client(api_key="AIzaSyArPg8FJF3YFab64UZvdgWLB_MhOZZn32Y")


In [None]:

# uses exponential backoff when calling gemini api
def generate_text(prompt, model="gemini-2.0-flash-lite-preview-02-05", max_retries=5, initial_wait=2):

    attempt = 0
    wait_time = initial_wait

    while attempt < max_retries:
        try:
            response = client.models.generate_content(
                model=model,
                contents=[prompt]
            )
            return response.text
        except Exception as e:
            print(f"Error encountered: {e}. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
            wait_time *= 2  # backoff
            attempt += 1

    raise Exception("Max retries reached. The request could not be completed.")

def validate_profile(profile):
    required_keys = ["bio", "movies", "tv", "religion", "music", "sports", "books", "politics"]
    
    # checking all keys present or not
    for key in required_keys:
        if key not in profile:
            return False
    
    # checking range of numeric entries
    for key in ["movies", "tv", "religion", "music", "sports", "books", "politics"]:
        value = profile[key]
        if not isinstance(value, int):
            return False
        if value < 1 or value > 10:
            return False

    if not isinstance(profile["bio"], str):
        return False

    return True

def generate_profiles(n_profiles=500):
    prompt = f"""
You are a profile generator. Create {n_profiles} unique dating profiles.
Return them as a valid JSON array of objects. Each object has these fields:
    "bio": (string) a short creative biography (1-2 sentences),
    "movies": (integer 1-10),
    "tv": (integer 1-10),
    "religion": (integer 1-10),
    "music": (integer 1-10),
    "sports": (integer 1-10),
    "books": (integer 1-10),
    "politics": (integer 1-10).
Example for one item in the array:
{{
    "bio": "Coffee fanatic. Amateur traveler. Always up for a new adventure!",
    "movies": 7,
    "tv": 5,
    "religion": 2,
    "music": 9,
    "sports": 4,
    "books": 8,
    "politics": 3
}}
"""
    response_text = generate_text(prompt)
    
    try:
        # converting JSON string into list of dictionaries
        raw_profiles = json.loads(response_text)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}. Raw Response: {response_text}")
        return []
    
    valid_profiles = []
    for p in raw_profiles:
        if validate_profile(p):
            valid_profiles.append(p)

    print(f"Generated {len(valid_profiles)} valid profiles out of requested {n_profiles}.")
    return valid_profiles

if __name__ == "__main__":
    total_needed = 500
    batch_size = 250

    all_profiles = []
    total_collected = 0
 
    while total_collected < total_needed:
        needed_now = min(batch_size, total_needed - total_collected)
        batch = generate_profiles(needed_now)
        all_profiles.extend(batch)
        total_collected = len(all_profiles)
        
        time.sleep(2)
    
    df = pd.DataFrame(all_profiles)
    df.rename(
        columns={
            "bio": "Bios",
            "movies": "Movies",
            "tv": "TV",
            "religion": "Religion",
            "music": "Music",
            "sports": "Sports",
            "books": "Books",
            "politics": "Politics"
        },
        inplace=True
    )
    
    print(df.head(10))  
    print(f"\nTotal valid profiles generated: {len(df)}")
    
    df.to_csv("dating_profiles.csv", index=False)
    print("\nSaved to 'dating_profiles.csv'.")


Error decoding JSON: Expecting value: line 1 column 1 (char 0). Raw Response: ```json
[
  {
    "bio": "Lover of vintage finds and cozy nights in. Currently binge-watching documentaries.",
    "movies": 6,
    "tv": 8,
    "religion": 4,
    "music": 7,
    "sports": 2,
    "books": 9,
    "politics": 5
  },
  {
    "bio": "Aspiring chef with a passion for travel and a quirky sense of humor. Let's explore new places and flavors together!",
    "movies": 9,
    "tv": 6,
    "religion": 7,
    "music": 8,
    "sports": 3,
    "books": 5,
    "politics": 6
  },
  {
    "bio": "Tech enthusiast and outdoor adventurer. Seeking someone who appreciates both intellectual conversations and nature's beauty.",
    "movies": 7,
    "tv": 5,
    "religion": 3,
    "music": 6,
    "sports": 8,
    "books": 7,
    "politics": 4
  },
  {
    "bio": "Passionate about art, music, and deep conversations. Enjoying life's simple pleasures and searching for meaningful connections.",
    "movies": 8,
    "tv"