In [6]:
import os
import json
from typing import List
from tqdm import tqdm
from pydantic import BaseModel, Field

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

os.environ["GOOGLE_API_KEY"] = "XYZ"

# ---------- schema ----------

class Driver(BaseModel):
    role: str
    age: int

class DrivingUsage(BaseModel):
    commute_distance_km: str
    driving_environment: str
    daily_parking: str
    business_use: bool
    annual_mileage_band: str
    primary_drivers: List[Driver]

class SegmentProfile(BaseModel):
    price_sensitivity: str
    buyer_type: str
    switching_reason: str
    claims_trauma: str

class ServicePreferences(BaseModel):
    communication_style: str
    decision_style: str
    information_format: str
    preferred_channel: str

class ProductPreferences(BaseModel):
    coverage_goal: str
    deductible_appetite: str
    valued_addons: List[str]

class Persona(BaseModel):
    persona_id: str
    archetype: str
    driving_usage_profile: DrivingUsage
    customer_segment_profile: SegmentProfile
    service_preferences: ServicePreferences
    product_preferences: ProductPreferences


class PersonaList(BaseModel):
    items: List[Persona]

# ---------- model ----------

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-pro", 
    temperature=1.1,
    top_p=0.95,
)

structured_llm = llm.with_structured_output(PersonaList)


# ---------- archetypes ----------

ARCHETYPES = [
    "Young urban professional",
    "Student / early career youth",
    "Family household with kids",
    "Self-employed / small business owner",
    "Gig worker / delivery driver",
    "Corporate executive / high income owner",
    "Homemaker / occasional driver",
    "Senior citizen / retired",
    "Rural or semi-urban resident",
    "Budget-conscious daily commuter"
]


# ---------- prompt ----------

prompt = ChatPromptTemplate.from_messages([
    ("system",
     "Generate realistic and diverse Indian motor insurance customer personas."),
    ("human",
     "Generate exactly {n} personas for archetype: {arch}. "
     "Fill all fields realistically and vary behavior.")
])

chain = prompt | structured_llm



def generate_all(per_type=1, out_file="personas.json"):
    all_personas = []
    pid = 1

    print(f"Generating {per_type} personas per archetype...")

    for arch in tqdm(ARCHETYPES):
        try:
             
            result_wrapper = chain.invoke({
                "n": per_type,
                "arch": arch
            })
            
          
            personas = result_wrapper.items 

            for p in personas:
                p.persona_id = f"P_{pid:03d}"
                p.archetype = arch
                all_personas.append(p.model_dump())
                pid += 1
                
        except Exception as e:
            print(f"Error generating {arch}: {e}")

    with open(out_file, "w") as f:
        json.dump(all_personas, f, indent=2)

    print(f"saved {len(all_personas)} personas to {out_file}")


if __name__ == "__main__":
    generate_all(per_type=1)

Generating 1 personas per archetype...


100%|██████████| 10/10 [02:08<00:00, 12.89s/it]

saved 10 personas to personas.json



