In [0]:
# %sql
# -- Create volume for JSON file storage
# CREATE VOLUME IF NOT EXISTS {CATALOG_NAME}.{SCHEMA_NAME}.customer_json_files;


In [0]:
# Generate synthetic JSON files that simulate real-time customer events
import json
import uuid
import random
from datetime import datetime, timedelta

def generate_customer_json():
    """Generate a single customer record as JSON"""
    return {
        "Customer_ID": str(uuid.uuid4()),
        "Last_Login_Date": (datetime.today() - timedelta(days=random.randint(0,60))).strftime("%Y-%m-%d"),
        "Ebooks_Downloaded_6_Months": random.randint(0,50),
        "Average_Session_Time": round(random.uniform(0.5, 60.0), 2),
        "Subscription_Plan_Type": random.choice(["Premium","Standard","Basic"]),
        "Primary_Discipline": random.choice(["Science","Arts","Engineering","Business"]),
        "Days_Since_Last_Activity": round(random.uniform(0,60), 2),
        "event_timestamp": datetime.now().isoformat()
    }

# Generate multiple JSON files (simulating file drops)
def create_batch_files():
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Create 3 files with 10 records each (simulating small batches)
    for file_num in range(3):
        records = [generate_customer_json() for _ in range(10)]
        
        # Write to volume path
        filename = f"customer_batch_{timestamp}_{file_num}.json"
        file_path = f"/Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/customer_json_files/{filename}"
        
        # Write JSON Lines format (one JSON per line)
        with open(file_path, 'w') as f:
            for record in records:
                f.write(json.dumps(record) + '\n')
    
    print(f"✅ Generated 3 JSON files with 30 total records in /Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/customer_json_files/")
    print("DLT pipeline will auto-detect and process these files!")

# Execute this to generate files
create_batch_files()


✅ Generated 3 JSON files with 30 total records in /Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/customer_json_files/
DLT pipeline will auto-detect and process these files!
