# Fake Data Generator 
This notebook generates fake user data with evolving schemas to demonstrate:
- Schema evolution over time
- Nested JSON structures
- Optional fields appearing in different batches
- Array and object data types


In [1]:
# Install faker if needed
%pip install faker==23.0.0 -q


Note: you may need to restart the kernel to use updated packages.


In [2]:
from pyspark.sql import SparkSession
from faker import Faker
import json
import random
from datetime import datetime, timedelta

# =============================================================================
# CONFIGURATION PARAMETERS
# =============================================================================
CATALOG = "pavan_naidu"                    # Unity Catalog name
SCHEMA = "json"                            # Schema name within the catalog
VOLUME = "raw_data"                        # Volume name for storing data
# =============================================================================

def get_spark() -> SparkSession:
    try:
        from databricks.connect import DatabricksSession
        return DatabricksSession.builder.getOrCreate()
    except Exception:
        return SparkSession.builder.getOrCreate()

spark = get_spark()
fake = Faker()

# Set catalog and schema
spark.sql(f"USE CATALOG {CATALOG}")
spark.sql(f"USE SCHEMA {SCHEMA}")

print("✅ Spark session initialized")
print(f"Current catalog: {spark.sql('SELECT current_catalog()').collect()[0][0]}")
print(f"Current schema: {spark.sql('SELECT current_schema()').collect()[0][0]}")
print(f"Configuration: CATALOG={CATALOG}, SCHEMA={SCHEMA}, VOLUME={VOLUME}")


✅ Spark session initialized


HBox(children=(IntProgress(value=0, bar_style='success'), Label(value='')))

Current catalog: pavan_naidu


HBox(children=(IntProgress(value=0, bar_style='success'), Label(value='')))

Current schema: json
Configuration: CATALOG=pavan_naidu, SCHEMA=json, VOLUME=raw_data


## Setup Volume and Folder

Create Unity Catalog volume and folder for storing evolving data.


In [3]:
# Create or verify volume exists
try:
    spark.sql(f"CREATE VOLUME IF NOT EXISTS {CATALOG}.{SCHEMA}.{VOLUME}")
    print(f"✅ Volume '{VOLUME}' is ready")
except Exception as e:
    print(f"Volume might already exist or error: {e}")

# Get the volume path
volume_path = f"/Volumes/{CATALOG}/{SCHEMA}/{VOLUME}"
print(f"Volume path: {volume_path}")

# Create a users folder in the volume
users_folder = f"{volume_path}/users_stream"
try:
    dbutils.fs.mkdirs(users_folder)
    print(f"✅ Created folder: {users_folder}")
except Exception as e:
    print(f"Folder might already exist: {e}")

# Clean existing data (optional - run if you want fresh start)
try:
    dbutils.fs.rm(users_folder, recurse=True)
    dbutils.fs.mkdirs(users_folder)
    print("🧹 Cleaned existing data")
except:
    pass

print(f"\n📂 Data will be saved to: {users_folder}")


✅ Volume 'raw_data' is ready
Volume path: /Volumes/pavan_naidu/json/raw_data
✅ Created folder: /Volumes/pavan_naidu/json/raw_data/users_stream
🧹 Cleaned existing data

📂 Data will be saved to: /Volumes/pavan_naidu/json/raw_data/users_stream


## Phase 1: Generate Basic Schema Data (Records 0-300)

First batch of users with **basic schema** including:
- Core user fields: user_id, username, email, name, age
- Nested profile: bio, occupation, company, interests, skills
- Nested address: street, city, state, country, coordinates
- Nested preferences: newsletter, notifications, privacy


In [4]:
def generate_phase1_users(num_records=300):
    """
    Generate Phase 1 users with BASIC SCHEMA ONLY
    No social_media, subscription, or metrics fields
    """
    users = []
    
    print(f"🔨 Generating Phase 1: Basic schema ({num_records} records)...")
    
    for i in range(num_records):
        # Basic user structure
        user = {
            "user_id": str(fake.uuid4()),
            "username": fake.user_name(),
            "email": fake.email(),
            "name": fake.name(),
            "age": random.randint(18, 80),
            "created_at": str(fake.date_time_between(start_date='-5y', end_date='now')),
            "last_login": str(fake.date_time_between(start_date='-30d', end_date='now'))
        }
        
        # Profile information (nested structure)
        user["profile"] = {
            "bio": fake.text(max_nb_chars=200),
            "occupation": fake.job(),
            "company": fake.company(),
            "interests": [fake.word() for _ in range(random.randint(1, 5))],
            "skills": [fake.job() for _ in range(random.randint(0, 3))]
        }
        
        # Address information (nested with multiple levels)
        user["address"] = {
            "street": fake.street_address(),
            "city": fake.city(),
            "state": fake.state(),
            "country": fake.country(),
            "postal_code": fake.postcode(),
            "coordinates": {
                "latitude": float(fake.latitude()),
                "longitude": float(fake.longitude())
            }
        }
        
        # Preferences (nested boolean flags)
        user["preferences"] = {
            "newsletter": bool(random.choice([True, False])),
            "notifications": {
                "email": bool(random.choice([True, False])),
                "sms": bool(random.choice([True, False])),
                "push": bool(random.choice([True, False])),
                "frequency": random.choice(["daily", "weekly", "monthly", "never"])
            },
            "privacy": {
                "profile_visible": bool(random.choice([True, False])),
                "show_email": bool(random.choice([True, False])),
                "show_location": bool(random.choice([True, False]))
            }
        }
        
        # Some users have optional fields (simulate sparse data)
        if random.random() > 0.7:
            user["phone"] = fake.phone_number()
        
        if random.random() > 0.8:
            user["referral_code"] = fake.bothify(text='REF-####-????')
            user["referred_by"] = str(fake.uuid4()) if random.random() > 0.5 else None
        
        users.append(user)
    
    return users

# Generate Phase 1 data
phase1_users = generate_phase1_users(300)
print(f"✅ Generated {len(phase1_users)} Phase 1 users")

# Show sample
print("\n📄 Sample Phase 1 record:")
print(json.dumps(phase1_users[0], indent=2))

# Save Phase 1 data in batches
batch_size = 100
phase1_batches = [phase1_users[i:i+batch_size] for i in range(0, len(phase1_users), batch_size)]

print(f"\n💾 Saving Phase 1: {len(phase1_batches)} batches...")
for idx, batch in enumerate(phase1_batches):
    batch_file = f"{users_folder}/phase1_batch_{idx:03d}.json"
    jsonl_content = "\n".join([json.dumps(user) for user in batch])
    dbutils.fs.put(batch_file, jsonl_content, overwrite=True)
    print(f"  ✓ Saved batch {idx}: {len(batch)} records")

print(f"\n✅ Phase 1 complete: {len(phase1_users)} records saved")


🔨 Generating Phase 1: Basic schema (300 records)...
✅ Generated 300 Phase 1 users

📄 Sample Phase 1 record:
{
  "user_id": "3d378e8d-1f04-4371-82f5-65a996805bb7",
  "username": "jonesnicole",
  "email": "josephwilson@example.com",
  "name": "Julie Anderson",
  "age": 38,
  "created_at": "2025-04-04 09:37:33.823373",
  "last_login": "2025-09-26 04:09:33.067595",
  "profile": {
    "bio": "Speech respond money base list. Movement race never clear.\nPlace attack especially of baby she. Offer his structure knowledge. Travel side image modern behavior attack.",
    "occupation": "Public relations officer",
    "company": "Young PLC",
    "interests": [
      "company",
      "pattern",
      "over",
      "middle"
    ],
    "skills": [
      "Programmer, systems"
    ]
  },
  "address": {
    "street": "93602 Hahn Mountains Apt. 505",
    "city": "Meganshire",
    "state": "Ohio",
    "country": "Malaysia",
    "postal_code": "88162",
    "coordinates": {
      "latitude": 31.14642,
      

## Phase 2: Add Social Media Fields (Records 300-600)

Second batch adds **social_media** object with:
- twitter handle
- linkedin URL  
- github profile

This demonstrates **schema evolution** - new nested object appears!


In [6]:
def generate_phase2_users(num_records=300):
    """
    Generate Phase 2 users with BASIC SCHEMA + SOCIAL_MEDIA
    This simulates schema evolution - adds new fields
    """
    users = []
    
    print(f"🔨 Generating Phase 2: Adding social_media fields ({num_records} records)...")
    
    for i in range(num_records):
        # Basic user structure (same as Phase 1)
        user = {
            "user_id": str(fake.uuid4()),
            "username": fake.user_name(),
            "email": fake.email(),
            "name": fake.name(),
            "age": random.randint(18, 80),
            "created_at": str(fake.date_time_between(start_date='-5y', end_date='now')),
            "last_login": str(fake.date_time_between(start_date='-30d', end_date='now'))
        }
        
        # Profile, address, preferences (same as Phase 1)
        user["profile"] = {
            "bio": fake.text(max_nb_chars=200),
            "occupation": fake.job(),
            "company": fake.company(),
            "interests": [fake.word() for _ in range(random.randint(1, 5))],
            "skills": [fake.job() for _ in range(random.randint(0, 3))]
        }
        
        user["address"] = {
            "street": fake.street_address(),
            "city": fake.city(),
            "state": fake.state(),
            "country": fake.country(),
            "postal_code": fake.postcode(),
            "coordinates": {
                "latitude": float(fake.latitude()),
                "longitude": float(fake.longitude())
            }
        }
        
        user["preferences"] = {
            "newsletter": bool(random.choice([True, False])),
            "notifications": {
                "email": bool(random.choice([True, False])),
                "sms": bool(random.choice([True, False])),
                "push": bool(random.choice([True, False])),
                "frequency": random.choice(["daily", "weekly", "monthly", "never"])
            },
            "privacy": {
                "profile_visible": bool(random.choice([True, False])),
                "show_email": bool(random.choice([True, False])),
                "show_location": bool(random.choice([True, False]))
            }
        }
        
        # 🆕 NEW FIELD: Social media (this is the schema evolution!)
        user["social_media"] = {
            "twitter": f"@{fake.user_name()}" if random.random() > 0.3 else None,
            "linkedin": fake.url() if random.random() > 0.5 else None,
            "github": f"github.com/{fake.user_name()}" if random.random() > 0.7 else None
        }
        
        # Optional fields
        if random.random() > 0.7:
            user["phone"] = fake.phone_number()
        
        if random.random() > 0.8:
            user["referral_code"] = fake.bothify(text='REF-####-????')
            user["referred_by"] = str(fake.uuid4()) if random.random() > 0.5 else None
        
        users.append(user)
    
    return users

# Generate Phase 2 data
phase2_users = generate_phase2_users(300)
print(f"✅ Generated {len(phase2_users)} Phase 2 users")

# Show sample highlighting the new field
print("\n📄 Sample Phase 2 record (notice 'social_media' field):")
sample = phase2_users[0]
print(json.dumps({
    "user_id": sample["user_id"],
    "name": sample["name"],
    "email": sample["email"],
    "social_media": sample["social_media"],  # NEW FIELD!
    "...": "other fields..."
}, indent=2))

# Save Phase 2 data
phase2_batches = [phase2_users[i:i+batch_size] for i in range(0, len(phase2_users), batch_size)]

print(f"\n💾 Saving Phase 2: {len(phase2_batches)} batches...")
for idx, batch in enumerate(phase2_batches):
    batch_file = f"{users_folder}/phase2_batch_{idx:03d}.json"
    jsonl_content = "\n".join([json.dumps(user) for user in batch])
    dbutils.fs.put(batch_file, jsonl_content, overwrite=True)
    print(f"  ✓ Saved batch {idx}: {len(batch)} records")

print(f"\n✅ Phase 2 complete: {len(phase2_users)} records saved")
print("🔄 Schema evolved: Added 'social_media' object!")


🔨 Generating Phase 2: Adding social_media fields (300 records)...
✅ Generated 300 Phase 2 users

📄 Sample Phase 2 record (notice 'social_media' field):
{
  "user_id": "5b6fc3d5-83d9-4d32-ad64-7b7520e09071",
  "name": "Jennifer Webb",
  "email": "lbeasley@example.net",
  "social_media": {
    "twitter": "@yburke",
    "linkedin": null,
    "github": "github.com/kbrooks"
  },
  "...": "other fields..."
}

💾 Saving Phase 2: 3 batches...
  ✓ Saved batch 0: 100 records
  ✓ Saved batch 1: 100 records
  ✓ Saved batch 2: 100 records

✅ Phase 2 complete: 300 records saved
🔄 Schema evolved: Added 'social_media' object!


## Phase 3: Add Subscription & Metrics Fields (Records 600-1000)

Third batch adds even more fields:
- **subscription** object: tier, start_date, auto_renew
- **metrics** object: login_count, posts_created, comments_made, last_activity

This demonstrates **multiple schema evolutions** over time!


In [7]:
def generate_phase3_users(num_records=400):
    """
    Generate Phase 3 users with ALL FIELDS
    Basic + Social Media + Subscription + Metrics
    This is the fully evolved schema
    """
    users = []
    
    print(f"🔨 Generating Phase 3: Adding subscription & metrics ({num_records} records)...")
    
    for i in range(num_records):
        # Basic user structure
        user = {
            "user_id": str(fake.uuid4()),
            "username": fake.user_name(),
            "email": fake.email(),
            "name": fake.name(),
            "age": random.randint(18, 80),
            "created_at": str(fake.date_time_between(start_date='-5y', end_date='now')),
            "last_login": str(fake.date_time_between(start_date='-30d', end_date='now'))
        }
        
        # Profile, address, preferences
        user["profile"] = {
            "bio": fake.text(max_nb_chars=200),
            "occupation": fake.job(),
            "company": fake.company(),
            "interests": [fake.word() for _ in range(random.randint(1, 5))],
            "skills": [fake.job() for _ in range(random.randint(0, 3))]
        }
        
        user["address"] = {
            "street": fake.street_address(),
            "city": fake.city(),
            "state": fake.state(),
            "country": fake.country(),
            "postal_code": fake.postcode(),
            "coordinates": {
                "latitude": float(fake.latitude()),
                "longitude": float(fake.longitude())
            }
        }
        
        user["preferences"] = {
            "newsletter": bool(random.choice([True, False])),
            "notifications": {
                "email": bool(random.choice([True, False])),
                "sms": bool(random.choice([True, False])),
                "push": bool(random.choice([True, False])),
                "frequency": random.choice(["daily", "weekly", "monthly", "never"])
            },
            "privacy": {
                "profile_visible": bool(random.choice([True, False])),
                "show_email": bool(random.choice([True, False])),
                "show_location": bool(random.choice([True, False]))
            }
        }
        
        # Social media (from Phase 2)
        user["social_media"] = {
            "twitter": f"@{fake.user_name()}" if random.random() > 0.3 else None,
            "linkedin": fake.url() if random.random() > 0.5 else None,
            "github": f"github.com/{fake.user_name()}" if random.random() > 0.7 else None
        }
        
        # 🆕 NEW FIELD: Subscription info
        user["subscription"] = {
            "tier": random.choice(["free", "basic", "premium", "enterprise"]),
            "start_date": str(fake.date_between(start_date='-2y', end_date='today')),
            "auto_renew": bool(random.choice([True, False]))
        }
        
        # 🆕 NEW FIELD: Usage metrics
        user["metrics"] = {
            "login_count": random.randint(1, 1000),
            "posts_created": random.randint(0, 500),
            "comments_made": random.randint(0, 2000),
            "last_activity": str(fake.date_time_between(start_date='-7d', end_date='now'))
        }
        
        # Optional fields
        if random.random() > 0.7:
            user["phone"] = fake.phone_number()
        
        if random.random() > 0.8:
            user["referral_code"] = fake.bothify(text='REF-####-????')
            user["referred_by"] = str(fake.uuid4()) if random.random() > 0.5 else None
        
        users.append(user)
    
    return users

# Generate Phase 3 data
phase3_users = generate_phase3_users(400)
print(f"✅ Generated {len(phase3_users)} Phase 3 users")

# Show sample highlighting the new fields
print("\n📄 Sample Phase 3 record (notice 'subscription' and 'metrics' fields):")
sample = phase3_users[0]
print(json.dumps({
    "user_id": sample["user_id"],
    "name": sample["name"],
    "social_media": sample["social_media"],
    "subscription": sample["subscription"],  # NEW FIELD!
    "metrics": sample["metrics"],  # NEW FIELD!
    "...": "other fields..."
}, indent=2))

# Save Phase 3 data
phase3_batches = [phase3_users[i:i+batch_size] for i in range(0, len(phase3_users), batch_size)]

print(f"\n💾 Saving Phase 3: {len(phase3_batches)} batches...")
for idx, batch in enumerate(phase3_batches):
    batch_file = f"{users_folder}/phase3_batch_{idx:03d}.json"
    jsonl_content = "\n".join([json.dumps(user) for user in batch])
    dbutils.fs.put(batch_file, jsonl_content, overwrite=True)
    print(f"  ✓ Saved batch {idx}: {len(batch)} records")

print(f"\n✅ Phase 3 complete: {len(phase3_users)} records saved")
print("🔄 Schema evolved again: Added 'subscription' and 'metrics' objects!")


🔨 Generating Phase 3: Adding subscription & metrics (400 records)...
✅ Generated 400 Phase 3 users

📄 Sample Phase 3 record (notice 'subscription' and 'metrics' fields):
{
  "user_id": "fab6e9b8-6585-42e0-aa7b-31bd44a5b583",
  "name": "Robin Humphrey",
  "social_media": {
    "twitter": "@brocktyler",
    "linkedin": "http://middleton.com/",
    "github": null
  },
  "subscription": {
    "tier": "free",
    "start_date": "2024-03-18",
    "auto_renew": false
  },
  "metrics": {
    "login_count": 425,
    "posts_created": 234,
    "comments_made": 1081,
    "last_activity": "2025-10-14 17:21:51.871194"
  },
  "...": "other fields..."
}

💾 Saving Phase 3: 4 batches...
  ✓ Saved batch 0: 100 records
  ✓ Saved batch 1: 100 records
  ✓ Saved batch 2: 100 records
  ✓ Saved batch 3: 100 records

✅ Phase 3 complete: 400 records saved
🔄 Schema evolved again: Added 'subscription' and 'metrics' objects!


## Summary: Schema Evolution Complete

View all generated files and schema evolution summary.


In [8]:
# Summary statistics
total_records = len(phase1_users) + len(phase2_users) + len(phase3_users)

print("=" * 70)
print("📊 SCHEMA EVOLUTION SUMMARY")
print("=" * 70)
print(f"\n✅ Total records generated: {total_records}")
print(f"   - Phase 1 (Basic schema): {len(phase1_users)} records")
print(f"   - Phase 2 (+ social_media): {len(phase2_users)} records")  
print(f"   - Phase 3 (+ subscription + metrics): {len(phase3_users)} records")

print(f"\n📂 Data location: {users_folder}")
print("\n📁 Files created:")
files = dbutils.fs.ls(users_folder)
display(files)

print(f"\n✅ Total files: {len(files)}")

print("\n" + "=" * 70)
print("🔄 SCHEMA EVOLUTION TIMELINE")
print("=" * 70)
print("""
Phase 1 Schema (phase1_batch_*.json):
  ├── user_id, username, email, name, age
  ├── created_at, last_login
  ├── profile {bio, occupation, company, interests[], skills[]}
  ├── address {street, city, state, country, postal_code, coordinates{}}
  ├── preferences {newsletter, notifications{}, privacy{}}
  └── [optional] phone, referral_code, referred_by

Phase 2 Schema (phase2_batch_*.json):
  ├── All Phase 1 fields
  └── 🆕 social_media {twitter, linkedin, github}

Phase 3 Schema (phase3_batch_*.json):
  ├── All Phase 1 & 2 fields
  ├── 🆕 subscription {tier, start_date, auto_renew}
  └── 🆕 metrics {login_count, posts_created, comments_made, last_activity}
""")

print("=" * 70)
print("🎯 NEXT STEPS")
print("=" * 70)
print("""
1. Run the Lakeflow Declarative Pipeline (pipeline.ipynb)
2. Pipeline will automatically infer schema from Phase 1 data
3. As it processes Phase 2 & 3, schema will evolve automatically
4. Watch the pipeline handle schema changes without failing!

Pipeline will demonstrate:
  ✓ Automatic schema inference
  ✓ Schema evolution with addNewColumns mode
  ✓ Rescue mode for strict schema control
  ✓ Schema hints for type guidance
""")


📊 SCHEMA EVOLUTION SUMMARY

✅ Total records generated: 1000
   - Phase 1 (Basic schema): 300 records
   - Phase 2 (+ social_media): 300 records
   - Phase 3 (+ subscription + metrics): 400 records

📂 Data location: /Volumes/pavan_naidu/json/raw_data/users_stream

📁 Files created:


[FileInfo(path='/Volumes/pavan_naidu/json/raw_data/users_stream/phase1_batch_000.json', name='phase1_batch_000.json', size=101186, modificationTime=1760591855000),
 FileInfo(path='/Volumes/pavan_naidu/json/raw_data/users_stream/phase1_batch_001.json', name='phase1_batch_001.json', size=101191, modificationTime=1760591856000),
 FileInfo(path='/Volumes/pavan_naidu/json/raw_data/users_stream/phase1_batch_002.json', name='phase1_batch_002.json', size=101687, modificationTime=1760591856000),
 FileInfo(path='/Volumes/pavan_naidu/json/raw_data/users_stream/phase2_batch_000.json', name='phase2_batch_000.json', size=110114, modificationTime=1760592281000),
 FileInfo(path='/Volumes/pavan_naidu/json/raw_data/users_stream/phase2_batch_001.json', name='phase2_batch_001.json', size=109828, modificationTime=1760592281000),
 FileInfo(path='/Volumes/pavan_naidu/json/raw_data/users_stream/phase2_batch_002.json', name='phase2_batch_002.json', size=110114, modificationTime=1760592282000),
 FileInfo(path='


✅ Total files: 10

🔄 SCHEMA EVOLUTION TIMELINE

Phase 1 Schema (phase1_batch_*.json):
  ├── user_id, username, email, name, age
  ├── created_at, last_login
  ├── profile {bio, occupation, company, interests[], skills[]}
  ├── address {street, city, state, country, postal_code, coordinates{}}
  ├── preferences {newsletter, notifications{}, privacy{}}
  └── [optional] phone, referral_code, referred_by

Phase 2 Schema (phase2_batch_*.json):
  ├── All Phase 1 fields
  └── 🆕 social_media {twitter, linkedin, github}

Phase 3 Schema (phase3_batch_*.json):
  ├── All Phase 1 & 2 fields
  ├── 🆕 subscription {tier, start_date, auto_renew}
  └── 🆕 metrics {login_count, posts_created, comments_made, last_activity}

🎯 NEXT STEPS

1. Run the Lakeflow Declarative Pipeline (pipeline.ipynb)
2. Pipeline will automatically infer schema from Phase 1 data
3. As it processes Phase 2 & 3, schema will evolve automatically
4. Watch the pipeline handle schema changes without failing!

Pipeline will demonstrate

## Cleanup: Remove All Generated Data

Run this cell to clean up all generated files and folders.  
⚠️ **Warning**: This will permanently delete all data generated in this notebook!


In [9]:
# Cleanup script to remove all generated data
import time

print("=" * 70)
print("🧹 CLEANUP: Removing all generated data")
print("=" * 70)

try:
    # List files before cleanup
    print(f"\n📂 Current folder: {users_folder}")
    files_before = dbutils.fs.ls(users_folder)
    print(f"   Found {len(files_before)} files to delete")
    
    # Remove all files and the folder
    print(f"\n🗑️  Deleting folder: {users_folder}")
    dbutils.fs.rm(users_folder, recurse=True)
    print("   ✅ Successfully deleted all files and folder")
    
    # Verify deletion
    try:
        remaining_files = dbutils.fs.ls(users_folder)
        print(f"   ⚠️  Warning: {len(remaining_files)} files still exist")
    except Exception:
        print("   ✅ Folder completely removed")
    
    print(f"\n✅ Cleanup complete!")
    print(f"   - Removed {len(files_before)} data files")
    print(f"   - Removed folder: {users_folder}")
    
except Exception as e:
    print(f"\n❌ Error during cleanup: {e}")
    print("   The folder may not exist or may already be deleted")

print("\n" + "=" * 70)
print("💡 TIP: You can re-run the data generation cells to create fresh data")
print("=" * 70)


🧹 CLEANUP: Removing all generated data

📂 Current folder: /Volumes/pavan_naidu/json/raw_data/users_stream
   Found 10 files to delete

🗑️  Deleting folder: /Volumes/pavan_naidu/json/raw_data/users_stream
   ✅ Successfully deleted all files and folder
   ✅ Folder completely removed

✅ Cleanup complete!
   - Removed 10 data files
   - Removed folder: /Volumes/pavan_naidu/json/raw_data/users_stream

💡 TIP: You can re-run the data generation cells to create fresh data
