In [1]:
import uuid
import random
from datetime import datetime, timedelta

# --- Configuration ---
NUM_MATCHED_RECORDS = 100000
NUM_ORPHANED_REQUESTS = 5000
NUM_ORPHANED_RESPONSES = 5000
START_TIME = datetime(2025, 9, 7, 12, 0, 0)

# Possible HTTP status codes and their weights
STATUS_CODES = [200, 201, 404, 500, 503]
STATUS_WEIGHTS = [80, 5, 10, 4, 1]

def generate_log_files():
    """Generates requests.log and responses.log with matched, orphaned, and shuffled data."""
    
    print("Generating log data... this may take a moment.")
    
    request_lines = []
    response_lines = []

    # --- 1. Generate Matched Records ---
    for i in range(NUM_MATCHED_RECORDS):
        req_id = f"req-{uuid.uuid4()}"
        user_id = f"user-{random.randint(1, 1000)}"
        
        # Create slightly varying timestamps
        start_time = START_TIME + timedelta(seconds=i, milliseconds=random.randint(0, 999))
        duration_ms = random.randint(50, 2000)
        end_time = start_time + timedelta(milliseconds=duration_ms)
        
        status_code = random.choices(STATUS_CODES, weights=STATUS_WEIGHTS, k=1)[0]

        # Format timestamps to ISO 8601 with 'Z'
        start_time_str = start_time.isoformat() + "Z"
        end_time_str = end_time.isoformat() + "Z"

        request_lines.append(f"{start_time_str},{req_id},{user_id}\n")
        response_lines.append(f"{end_time_str},{req_id},{status_code}\n")

    # --- 2. Generate Orphaned Requests (no matching response) ---
    for i in range(NUM_ORPHANED_REQUESTS):
        req_id = f"req-orphan-{uuid.uuid4()}"
        user_id = f"user-orphan-{random.randint(1, 500)}"
        start_time = START_TIME + timedelta(minutes=i)
        start_time_str = start_time.isoformat() + "Z"
        request_lines.append(f"{start_time_str},{req_id},{user_id}\n")

    # --- 3. Generate Orphaned Responses (no matching request) ---
    for i in range(NUM_ORPHANED_RESPONSES):
        req_id = f"resp-orphan-{uuid.uuid4()}"
        status_code = random.choices(STATUS_CODES, weights=STATUS_WEIGHTS, k=1)[0]
        end_time = START_TIME + timedelta(hours=i)
        end_time_str = end_time.isoformat() + "Z"
        response_lines.append(f"{end_time_str},{req_id},{status_code}\n")
        
    # --- 4. Shuffle the lines to create the "unordered" challenge ---
    print("Shuffling log entries...")
    random.shuffle(request_lines)
    random.shuffle(response_lines)
    
    # --- 5. Write to files ---
    print("Writing requests.log...")
    with open('requests.log', 'w') as f:
        f.writelines(request_lines)
        
    print("Writing responses.log...")
    with open('responses.log', 'w') as f:
        f.writelines(response_lines)
        
    print("\n✅ Log files 'requests.log' and 'responses.log' created successfully!")
    print(f"Total lines in requests.log: {len(request_lines)}")
    print(f"Total lines in responses.log: {len(response_lines)}")

if __name__ == "__main__":
    generate_log_files()

Generating log data... this may take a moment.
Shuffling log entries...
Writing requests.log...
Writing responses.log...

✅ Log files 'requests.log' and 'responses.log' created successfully!
Total lines in requests.log: 105000
Total lines in responses.log: 105000
