In [3]:
#!/usr/bin/env python3

"""
This script generates a synthetic SaaS customer dataset
and saves it to a JSON file.
"""

import json
import random
import datetime

def generate_saas_data(num_customers):
    """
    Generates a synthetic SaaS customer dataset.
    Args:
        num_customers (int): The number of customer records to generate.
    Returns:
        A list of dictionaries representing the customer data.
    """
    customers = []
    plan_types = ['Basic', 'Pro', 'Enterprise']
    for i in range(1, num_customers + 1):
        # Generate random data for each customer
        start_date = datetime.date.today() - datetime.timedelta(days=random.randint(1, 1000))
        mrr = random.uniform(1000, 5000)

        # Introduce churn for some customers after 3 months
        churn_date = None
        if random.random() < 0.2:  # 20% churn rate
            churn_days = random.randint(90, 300)
            churn_date = start_date + datetime.timedelta(days=churn_days)
            churn_date = churn_date.strftime('%Y-%m-%d')

        plan_type = random.choice(plan_types)

        customer = {
            "customer_id": i,
            "subscription_start_date": start_date.strftime('%Y-%m-%d'),
            "monthly_recurring_revenue": mrr,
            "churn_date": churn_date,
            "plan_type": plan_type
        }
        customers.append(customer)

    return customers

def main():
    """
    Main function to orchestrate data generation and saving.
    """
    num_records = 5000
    file_name = "raw_saas_data.json"

    print(f"Generating a synthetic dataset of {num_records} records...")
    saas_data = generate_saas_data(num_records)

    try:
        # Save the data to a JSON file
        with open(file_name, 'w') as raw_saas_customers:
            json.dump(saas_data, raw_saas_customers, indent=4)

        print(f"Data successfully generated and saved to '{file_name}'.")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

Generating a synthetic dataset of 5000 records...
Data successfully generated and saved to 'raw_saas_data.json'.
